Fundamentals 17 min read

How a Tiny Compiler Works: From Tokenizer to Code Generator

This article walks through the core concepts and implementation steps of a minimal JavaScript compiler, covering tokenization, parsing into an AST, traversing with a visitor, transforming the tree, and generating target code, while also explaining the visitor pattern and polyfills.

ELab Team
ELab Team
ELab Team
How a Tiny Compiler Works: From Tokenizer to Code Generator

Introduction

In modern front‑end development Babel is used to compile newer ES6+ syntax to widely supported ES5. The article uses the open‑source the‑super‑tiny‑compiler project to illustrate how a compiler works, converting Lisp‑style function calls into C‑style calls.

Compiler Workflow

The compilation process consists of three stages: parsing (lexical analysis and syntactic analysis), transformation, and code generation.

Tokenizer (Lexical Analysis)

function tokenizer(input) {
  let current = 0;
  let tokens = [];
  while (current < input.length) {
    let char = input[current];
    if (char === '(') {
      tokens.push({type: 'paren', value: '('});
      current++;
      continue;
    }
    if (char === ')') {
      tokens.push({type: 'paren', value: ')'});
      current++;
      continue;
    }
    let WHITESPACE = /\s/;
    if (WHITESPACE.test(char)) {
      current++;
      continue;
    }
    let NUMBERS = /[0-9]/;
    if (NUMBERS.test(char)) {
      let value = '';
      while (NUMBERS.test(char)) {
        value += char;
        char = input[++current];
      }
      tokens.push({type: 'number', value});
      continue;
    }
    if (char === '"') {
      let value = '';
      char = input[++current];
      while (char !== '"') {
        value += char;
        char = input[++current];
      }
      char = input[++current];
      tokens.push({type: 'string', value});
      continue;
    }
    let LETTERS = /[a-z]/i;
    if (LETTERS.test(char)) {
      let value = '';
      while (LETTERS.test(char)) {
        value += char;
        char = input[++current];
      }
      tokens.push({type: 'name', value});
      continue;
    }
    throw new TypeError(`I dont know what this character is: ${char}`);
  }
  return tokens;
}

Parser (Syntactic Analysis)

function parser(tokens) {
  let current = 0;
  function walk() {
    let token = tokens[current];
    if (token.type === 'number') {
      current++;
      return {type: 'NumberLiteral', value: token.value};
    }
    if (token.type === 'string') {
      current++;
      return {type: 'StringLiteral', value: token.value};
    }
    if (token.type === 'paren' && token.value === '(') {
      token = tokens[++current];
      let node = {type: 'CallExpression', name: token.value, params: []};
      token = tokens[++current];
      while (token.type !== 'paren' || token.value !== ')') {
        node.params.push(walk());
        token = tokens[current];
      }
      current++;
      return node;
    }
    throw new TypeError(token.type);
  }
  let ast = {type: 'Program', body: []};
  while (current < tokens.length) {
    ast.body.push(walk());
  }
  return ast;
}

Traverser

function traverser(ast, visitor) {
  function traverseArray(array, parent) {
    array.forEach(child => traverseNode(child, parent));
  }
  function traverseNode(node, parent) {
    let methods = visitor[node.type];
    if (methods && methods.enter) methods.enter(node, parent);
    switch (node.type) {
      case 'Program':
        traverseArray(node.body, node);
        break;
      case 'CallExpression':
        traverseArray(node.params, node);
        break;
      case 'NumberLiteral':
      case 'StringLiteral':
        break;
      default:
        throw new TypeError(node.type);
    }
    if (methods && methods.exit) methods.exit(node, parent);
  }
  traverseNode(ast, null);
}

Transformer

function transformer(ast) {
  let newAst = {type: 'Program', body: []};
  ast._context = newAst.body;
  traverser(ast, {
    NumberLiteral: {
      enter(node, parent) {
        parent._context.push({type: 'NumberLiteral', value: node.value});
      }
    },
    StringLiteral: {
      enter(node, parent) {
        parent._context.push({type: 'StringLiteral', value: node.value});
      }
    },
    CallExpression: {
      enter(node, parent) {
        let expression = {
          type: 'CallExpression',
          callee: {type: 'Identifier', name: node.name},
          arguments: []
        };
        node._context = expression.arguments;
        if (parent.type !== 'CallExpression') {
          expression = {type: 'ExpressionStatement', expression};
        }
        parent._context.push(expression);
      }
    }
  });
  return newAst;
}

Code Generator

function codeGenerator(node) {
  switch (node.type) {
    case 'Program':
      return node.body.map(codeGenerator).join('
');
    case 'ExpressionStatement':
      return `${codeGenerator(node.expression)};`;
    case 'CallExpression':
      return `${codeGenerator(node.callee)}(${node.arguments.map(codeGenerator).join(', ')})`;
    case 'Identifier':
      return node.name;
    case 'NumberLiteral':
      return node.value;
    case 'StringLiteral':
      return `"${node.value}"`;
    default:
      throw new TypeError(node.type);
  }
}

Visitor Pattern Overview

The visitor pattern separates operations from the object structure they work on. In the compiler, a visitor object provides enter and exit methods for each AST node type, allowing transformation logic to be added without modifying the node classes themselves.

Polyfill Example

(function (window) {
  if (window.incompatibleFeature) {
    return window.incompatibleFeature;
  } else {
    window.incompatibleFeature = function () {
      // compatibility code
    };
  }
})(window);
Compiler workflow diagram
Compiler workflow diagram
Visitor pattern diagram
Visitor pattern diagram
JavaScriptASTcompilerParservisitor-pattern
ELab Team
Written by

ELab Team

Sharing fresh technical insights

0 followers
Reader feedback

How this landed with the community

Sign in to like

Rate this article

Was this worth your time?

Sign in to rate
Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.