We can't find the internet
Attempting to reconnect
构建一个简单的编译器, 将 List 格式的代码转换成 C 格式的
原始代码
(plus 3 (abstract 9 6))
编译器
一个编译器的前端模型
根据编译过程来解决这个问题
- 生成
tokens
- 根据
tokens
生成ast
- 转换
ast
到newAst
- 从
newAst
生成代码
Tokenizer
将源代码转换为token
流
const tokenizer = (input) => { let pos = 0; let tokens = []; while (pos < input.length) { let char = input[pos]; const PAREN_MATCH = /[\(\)]/; if (PAREN_MATCH.test(char)) { tokens.push({type: 'paren', value: char}); pos++; continue; } const NAME_MATCH = /[a-zA-Z_]/; const NAME_MATCH_ = /[a-zA-Z0-9_]/; if (NAME_MATCH.test(char)) { let verb = char; while (NAME_MATCH_.test(input[++pos])) { verb += input[pos]; } tokens.push({type: 'name', value: verb}); continue; } const NUM_MATCH = /[0-9]/; if (NUM_MATCH.test(char)) { let verb = char; while (NUM_MATCH.test(input[++pos])) { verb += input[pos]; } tokens.push({type: 'number', value: verb}); continue; } const WHITE_SPACE = /\s/; if (WHITE_SPACE.test(char)) { pos++; continue; } throw new Error(`Unexpect token at ${pos}`); } return tokens; }; module.exports = tokenizer;
Parser
将token
流转换为AST
const parser = (tokens) => { let current = 0; const walk = () => { let token = tokens[current]; if (token.type === 'number') { current++; return { type: 'NumberLiteral', value: token.value, }; } if (token.type === 'paren' && token.value === '(') { token = tokens[++current]; let node = { type: 'CallExpression', name: token.value, params: [], }; token = tokens[++current]; while ( token.type !== 'paren' || (token.type === 'paren' && token.value !== ')') ) { node.params.push(walk()); token = tokens[current]; } current++; return node; } throw new TypeError(token.type); }; let ast = { type: 'Program', body: [], }; while (current < tokens.length) { ast.body.push(walk()); } return ast; }; module.exports = parser;
Traverser and Transformer
Traverser
提供了遍历 AST 的方法
Transformer
通过 Traverser
遍历语法树来修改 AST
Traverser
const traverser = (ast, visitor) => { const traverseArray = (array, parent) => { array.forEach((child) => { traverseNode(child, parent); }); }; const traverseNode = (node, parent) => { let methods = visitor[node.type]; if (methods && methods.enter) { methods.enter(node, parent); } switch (node.type) { case 'Program': traverseArray(node.body, node); break; case 'CallExpression': traverseArray(node.params, node); break; case 'NumberLiteral': break; default: throw new TypeError(node.type); } if (methods && methods.exit) { methods.exit(node, parent); } }; traverseNode(ast, null); }; module.exports = traverser;
Transformer
const traverser = require('./traverser'); const transformer = (ast) => { let newAst = { type: 'Program', body: [], }; ast._context = newAst.body; traverser(ast, { NumberLiteral: { enter(node, parent) { parent._context.push({ type: 'NumberLiteral', value: node.value, }); }, }, CallExpression: { enter(node, parent) { let expression = { type: 'CallExpression', callee: { type: 'Identifier', name: node.name, }, arguments: [], }; node._context = expression.arguments; if (parent.type !== 'CallExpression') { expression = { type: 'ExpressionStatement', expression: expression, }; } parent._context.push(expression); }, }, }); return newAst; }; module.exports = transformer;
Code Generator
将AST
重新生成为代码
const codeGenerator = (node) => { switch (node.type) { case 'Program': return node.body.map(codeGenerator).join('\n'); case 'ExpressionStatement': return codeGenerator(node.expression) + ';'; case 'CallExpression': return ( codeGenerator(node.callee) + '(' + node.arguments.map(codeGenerator).join(', ') + ')' ); case 'Identifier': return node.name; case 'NumberLiteral': return node.value; default: throw new TypeError(node.type); } }; module.exports = codeGenerator;