Anyone starting with compilers and jumping directly to learn developing tools using LLVM or maybe just wanting to get to understand it’s working must have felt that their documentation is quite tough to hack into directly. I am writing this post to help understand developing a compiler frontend using LLVM backend.
The most important thing to figure before creating a compiler is to figure out the language for which the compiler will be created.
In this post we will develop a compiler frontend for an imperative language like C. We will be creating a multipass compiler front end that will support features like
- Loop handling
- Contextual Semantics
- Input/Output (only in the form of print)
- Conditional Statements
- Binary operations
- Commenting (both inline and paragrah wise)
- JIT compilation
The project is available here https://github.com/SatyendraBanjare/C-LLVM-compiler .
Our compiler frontend will output LLVM IR which can be further analyzed and optimzed using LLVM.
[NOTE] I expect the readers to be familiar atleast in theory about compilers, lexing and parsing.
For learning about tools used, I will request to go through these links beforehand
References :
- http://aquamentus.com/tut_lexyacc.html
- http://dinosaur.compilertools.net/flex/flex_11.html
- https://www.univ-orleans.fr/lifo/Members/Mirian.Halfeld/Cours/TLComp/l3-0708-LexA.pdf
- https://www.ibm.com/support/knowledgecenter/en/ssw_aix_72/com.ibm.aix.genprogc/yaac_file_declarations.htm
- https://gist.github.com/serge-sans-paille/aa332fa22692fcdfdc51
CodeOut
Let’s begin with creating the lexical rules. Referring to above mentioned links, this is how final lexer file should look like. Most of this is self explainatory. Some tricks are used to implement Comments. How this works is explained in http://aquamentus.com/tut_lexyacc.html . We make the state go to comment / comment_oneline state and do nothing till the comment section is not over.
%{ | |
#include <string> | |
#include <iostream> | |
#include "AST_tree.h" | |
#include "C_syntax.hpp" | |
#define TOKEN(t) (yylval.token = t) | |
extern int lineNumber; | |
%} | |
%option noyywrap | |
%x comment | |
%x comment_oneline | |
%% | |
"/*" BEGIN(comment); | |
<comment>[^*\n]* /* eat anything that's not a '*' */ | |
<comment>"*"+[^*/\n]* /* eat up '*'s not followed by '/'s */ | |
<comment>\n ++lineNumber; | |
<comment>"*"+"/" BEGIN(INITIAL); | |
"//" BEGIN(comment_oneline); | |
<comment_oneline>\n BEGIN(INITIAL); | |
"extern" return TOKEN(EXTERN); | |
"return" return TOKEN(RETURN); | |
"int" { yylval.string = new string(yytext, yyleng); return INT; } | |
"double" { yylval.string = new string(yytext, yyleng); return DOUBLE; } | |
"char" { yylval.string = new string(yytext, yyleng); return CHAR; } | |
"void" { yylval.string = new string(yytext, yyleng); return VOID; } | |
"const" { yylval.string = new string(yytext, yyleng); return CONST; } | |
"if" return TOKEN(IF); | |
"else" return TOKEN(ELSE); | |
"for" return TOKEN(FOR); | |
"while" return TOKEN(WHILE); | |
"break" return TOKEN(BREAK); | |
"continue" return TOKEN(CONTINUE); | |
["].*["] { | |
yylval.string = new string(yytext, yyleng); | |
yylval.string->erase(yylval.string->begin()); | |
yylval.string->erase(yylval.string->end() - 1); | |
return CSTR; | |
} | |
[_A-Za-z][_0-9A-Za-z]* { yylval.string = new string(yytext, yyleng); return VAR; } | |
[0-9]+ { yylval.string = new string(yytext, yyleng); return CINT; } | |
[0-9]+\.[0-9]* { yylval.string = new string(yytext, yyleng); return CDOUBLE; } | |
['].['] { | |
yylval.string = new string(yytext, yyleng); | |
yylval.string->erase(yylval.string->begin()); | |
yylval.string->erase(yylval.string->end() - 1); | |
return CCHAR; | |
} | |
"(" return TOKEN(LPAREN); | |
")" return TOKEN(RPAREN); | |
"[" return TOKEN(LBRACK); | |
"]" return TOKEN(RBRACK); | |
"{" return TOKEN(LBRACE); | |
"}" return TOKEN(RBRACE); | |
"=" return TOKEN(EQUAL); | |
"==" return TOKEN(EQ); | |
"!=" return TOKEN(NE); | |
">" return TOKEN(GR); | |
">=" return TOKEN(GE); | |
"<" return TOKEN(LW); | |
"<=" return TOKEN(LE); | |
"&&" return TOKEN(AND); | |
"||" return TOKEN(OR); | |
"+" return TOKEN(ADD); | |
"-" return TOKEN(SUB); | |
"*" return TOKEN(MUL); | |
"/" return TOKEN(DIV); | |
"%" return TOKEN(MODULO); | |
"&" return TOKEN(BIT_AND); | |
"|" return TOKEN(BIT_OR); | |
"^" return TOKEN(BIT_XOR); | |
">>" return TOKEN(BIT_SHIFT_RIGHT); | |
"<<" return TOKEN(BIT_SHIFT_LEFT); | |
"++" return TOKEN(INCREMENT_OP); | |
"--" return TOKEN(DECREMENT_OP); | |
"+=" return TOKEN(SADD); | |
"-=" return TOKEN(SSUB); | |
"*=" return TOKEN(SMUL); | |
"/=" return TOKEN(SDIV); | |
"." return TOKEN(DOT); | |
"," return TOKEN(COMMA); | |
":" return TOKEN(COLON); | |
";" return TOKEN(SEMICOLON); | |
[ \t\r]* ; | |
"\n" lineNumber += 1; | |
^"#include ".+ ; | |
. cout << "Unknown token! " << yytext << endl; yyterminate(); | |
%% |
At this point We have developed the Vocabulary of our language. Let us now develop the reasoning and grammar.
%{ | |
#include <cstdio> | |
#include <cstdlib> | |
#include <iostream> | |
#include <map> | |
#include "AST_tree.h" | |
BlockExprNode *root; | |
void yyerror(char *) {}; | |
int yylex(void); | |
int lineNumber = 1; | |
map<string, E_TYPE> varTable; | |
void addNewVar(string name, E_TYPE type); | |
string typeStr(E_TYPE type); | |
void setVarType(VariableExprNode *); | |
E_TYPE checkExprType(ExprNode *lhs, ExprNode *rhs); | |
void noSemicolonError(); | |
%} | |
%union { | |
int token; | |
string *string; | |
VariableExprNode *var; | |
ExprNode *expr; | |
vector<VarDecStatementNode*> *vars; | |
vector<ExprNode*> *exprs; | |
BlockExprNode *block; | |
StatementNode *statement; | |
VarDecStatementNode *var_dec; | |
} | |
%token <string> INT DOUBLE CHAR VOID CONST /* Basic Type Names */ | |
%token <string> CSTR CINT CDOUBLE CCHAR /* Const Literal */ | |
%token <string> VAR /* Variable Names */ | |
%token <token> IF ELSE FOR WHILE BREAK CONTINUE /* Flow Controllers */ | |
%token <token> LPAREN RPAREN LBRACK RBRACK LBRACE RBRACE /* Enclosures */ | |
%token <token> EQ NE GR GE LW LE AND OR EQUAL ADD SUB MUL DIV MODULO /* Binary Operators */ | |
%token <token> BIT_AND BIT_OR BIT_SHIFT_RIGHT BIT_SHIFT_LEFT BIT_XOR /* Logical Operators */ | |
%token <token> SADD SSUB SMUL SDIV /* Self Operators */ | |
%token <token> DOT COMMA COLON SEMICOLON /* Endings */ | |
%token <token> EXTERN RETURN /* Others */ | |
%token <token> INCREMENT_OP DECREMENT_OP | |
%type <var> variable type | |
%type <vars> function_args | |
%type <expr> expr const logic_expr | |
%type <exprs> invoke_args | |
%type <block> program block global_block local_block | |
%type <statement> global_statement local_statement | |
%type <statement> variable_declaration | |
%type <statement> array_declaration function_declaration extern_function_declaration | |
%type <statement> condition loop | |
%type <token> compare | |
%left EQUAL | |
%left EQ NE GR GE LW LE | |
%left AND OR | |
%left ADD SUB SADD SSUB | |
%left MUL DIV SMUL SDIV MODULO | |
%left BIT_AND BIT_OR BIT_SHIFT_RIGHT BIT_SHIFT_LEFT BIT_COMP BIT_XOR | |
%nonassoc LOWER_THAN_ELSE | |
%nonassoc ELSE |
This is first part where we have described the various token values, type values and associativity rules. We have created a union of later described block expression. Finally we have created a map for variable name and its value.
%% | |
program: global_block { root = $1; }; | |
global_block: global_statement { =newBlockExprNode();=newBlockExprNode();->statements->push_back($<statement>1); } | |
| global_block global_statement { $$->statements->push_back($<statement>2); } | |
; | |
global_statement: function_declaration { $$ = $1; } | |
| extern_function_declaration SEMICOLON { $$ = $1; } | |
| extern_function_declaration error { noSemicolonError(); $$ = $1;} | |
| expr SEMICOLON { $$ = new ExprStatementNode($1); } | |
; | |
function_declaration: type variable LPAREN function_args RPAREN block { $$ = new FuncDecStatementNode($1, $2, $4, $6); }; | |
extern_function_declaration: EXTERN type variable LPAREN function_args RPAREN { $$ = new ExternFuncDecStatementNode($2, $3, $5); }; | |
type: INT { $$ = new VariableExprNode(*$1, E_INT); delete $1; } | |
| DOUBLE { $$ = new VariableExprNode(*$1, E_DOUBLE); delete $1; } | |
| CHAR { $$ = new VariableExprNode(*$1, E_CHAR); delete $1; } | |
| VOID { $$ = new VariableExprNode(*$1, E_VOID); delete $1; } | |
| CONST type { $$ = new VariableExprNode(*$1, E_CONST); delete $1;} | |
; | |
variable: VAR { $$ = new VariableExprNode(*$1); delete $1; }; | |
function_args: /* NULL */ { $$ = new vector<VarDecStatementNode*>(); } | |
| variable_declaration { =newvector<VarDecStatementNode∗>();=newvector<VarDecStatementNode∗>();->push_back($<var_dec>1); } | |
| function_args COMMA variable_declaration { $1->push_back($<var_dec>3); $$ = $1; } | |
; | |
block: LBRACE local_block RBRACE { $$ = $2; } | |
| local_statement { =newBlockExprNode();=newBlockExprNode();->statements->push_back($<statement>1); } | |
; | |
variable_declaration: type variable { $2->_type = $1->_type; $$ = new VarDecStatementNode($1, $2); addNewVar($2->name, $2->_type);} | |
| type variable EQUAL expr { $2->_type = $1->_type; $$ = new VarDecStatementNode($1, $2, $4); addNewVar($2->name, $2->_type); checkExprType($2, $4); } | |
; | |
local_block: local_statement { =newBlockExprNode();=newBlockExprNode();->statements->push_back($<statement>1); } | |
| local_block local_statement { $$->statements->push_back($<statement>2); } | |
; | |
local_statement: variable_declaration SEMICOLON { $$ = $1; } | |
| array_declaration SEMICOLON { $$ = $1; } | |
| condition { $$ = $1; } | |
| loop { $$ = $1; } | |
| expr SEMICOLON { $$ = new ExprStatementNode($1); } | |
| RETURN expr SEMICOLON { $$ = new ReturnStatementNode($2); } | |
| SEMICOLON { /* NULL */ } | |
| variable_declaration error { noSemicolonError(); $$ = $1; } | |
| array_declaration error { noSemicolonError(); $$ = $1; } | |
| expr error { noSemicolonError(); $$ = new ExprStatementNode($1); } | |
; | |
expr: variable { $<var>$ = $1; } | |
| const { $$ = $1; } | |
| expr ADD expr { =newOperatorExprNode($1,$2,$3);=newOperatorExprNode($1,$2,$3);->_type = checkExprType($1, $3); } | |
| expr SUB expr { =newOperatorExprNode($1,$2,$3);=newOperatorExprNode($1,$2,$3);->_type = checkExprType($1, $3); } | |
| expr MUL expr { =newOperatorExprNode($1,$2,$3);=newOperatorExprNode($1,$2,$3);->_type = checkExprType($1, $3); } | |
| expr DIV expr { =newOperatorExprNode($1,$2,$3);=newOperatorExprNode($1,$2,$3);->_type = checkExprType($1, $3); } | |
| expr MODULO expr { =newOperatorExprNode($1,$2,$3);=newOperatorExprNode($1,$2,$3);->_type = checkExprType($1, $3); } | |
| expr BIT_AND expr { =newOperatorExprNode($1,$2,$3);->_type = checkExprType($1, $3); } | |
| expr BIT_OR expr { =newOperatorExprNode($1,$2,$3);->_type = checkExprType($1, $3); } | |
| expr BIT_SHIFT_RIGHT expr { =newOperatorExprNode($1,$2,$3);->_type = checkExprType($1, $3); } | |
| expr BIT_SHIFT_LEFT expr { =newOperatorExprNode($1,$2,$3);->_type = checkExprType($1, $3); } | |
| expr BIT_XOR expr { =newOperatorExprNode($1,$2,$3);->_type = checkExprType($1, $3); } | |
| variable SADD expr { =newOperatorExprNode($1,$2,$3); = new AssignExprNode($1, );setVarType($1);->_type = checkExprType($1, $3);} | |
| variable SSUB expr { =newOperatorExprNode($1,$2,$3); = new AssignExprNode($1, );setVarType($1);->_type = checkExprType($1, $3);} | |
| variable SMUL expr { =newOperatorExprNode($1,$2,$3); = new AssignExprNode($1, );setVarType($1);->_type = checkExprType($1, $3);} | |
| variable SDIV expr { =newOperatorExprNode($1,$2,$3); = new AssignExprNode($1, );setVarType($1);->_type = checkExprType($1, $3);} | |
| expr compare expr { =newOperatorExprNode($1,$2,$3);->_type = E_INT; } | |
| variable EQUAL expr { =newAssignExprNode($1,$3);setVarType($1);checkExprType($1,$3);->_type = $1->_type;} | |
| variable LPAREN invoke_args RPAREN { =newFuncExprNode($1,$3);addNewVar($1−>name,EFUNC);setVarType($1);->_type = $1->_type;} | |
| variable LBRACK expr RBRACK { =newIndexExprNode($1,$3);->_type = $1->_type; } | |
| variable LBRACK expr RBRACK EQUAL expr { =newIndexExprNode($1,$3,$6);checkExprType($1,$6);->_type = $1->_type; } | |
| variable LBRACK expr RBRACK SADD expr { =newIndexExprNode($1,$3); = new OperatorExprNode(,$5,$6); = new IndexExprNode($1, $3, );checkExprType($1,$6);->_type = $1->_type; } | |
| variable LBRACK expr RBRACK SSUB expr { =newIndexExprNode($1,$3); = new OperatorExprNode(,$5,$6); = new IndexExprNode($1, $3, );checkExprType($1,$6);->_type = $1->_type; } | |
| variable LBRACK expr RBRACK SMUL expr { =newIndexExprNode($1,$3); = new OperatorExprNode(,$5,$6); = new IndexExprNode($1, $3, );checkExprType($1,$6);->_type = $1->_type; } | |
| variable LBRACK expr RBRACK SDIV expr { =newIndexExprNode($1,$3); = new OperatorExprNode(,$5,$6); = new IndexExprNode($1, $3, );checkExprType($1,$6);->_type = $1->_type; } | |
| LPAREN expr RPAREN { $$ = $2; } | |
| LPAREN type RPAREN expr { =newCastExprNode($2,$4);->_type = $2->_type;} | |
; | |
array_declaration: type variable LBRACK CINT RBRACK { $$ = new ArrayDecStatementNode($1, $2, atol($4->c_str())); } | |
| type variable LBRACK RBRACK EQUAL CSTR { $2->_type = $1->_type; $$ = new ArrayDecStatementNode($1, $2, *$6); } | |
| type variable LBRACK RBRACK EQUAL LBRACE invoke_args RBRACE { $2->_type = $1->_type; $$ = new ArrayDecStatementNode($1, $2, $7); } | |
; | |
condition: IF LPAREN logic_expr RPAREN block %prec LOWER_THAN_ELSE { $$ = new IfStatementNode($3, $5); } | |
| IF LPAREN logic_expr RPAREN block ELSE block { $$ = new IfStatementNode($3, $5, $7); } | |
; | |
loop: FOR LPAREN expr SEMICOLON logic_expr SEMICOLON expr RPAREN block { $$ = new ForStatementNode($3, $5, $7, $9); } | |
| WHILE LPAREN logic_expr RPAREN block { $$ = new WhileStatementNode($3, $5); } | |
; | |
const: CINT { $$ = new IntExprNode(atoi($1->c_str())); delete $1; } | |
| CDOUBLE { $$ = new DoubleExprNode(atoi($1->c_str())); delete $1; } | |
| CCHAR { $$ = new CharExprNode($1->front()); delete $1; } | |
| SUB CINT { $$ = new IntExprNode(-atol($2->c_str())); delete $2; } | |
| SUB CDOUBLE { $$ = new IntExprNode(-atof($2->c_str())); delete $2; } | |
; | |
compare: EQ { $$ = $1; } | |
| NE { $$ = $1; } | |
| GR { $$ = $1; } | |
| GE { $$ = $1; } | |
| LW { $$ = $1; } | |
| LE { $$ = $1; } | |
; | |
invoke_args: /* NULL */ { $$ = new vector<ExprNode*>(); } | |
| expr { =newvector<ExprNode∗>();->push_back($1); } | |
| invoke_args COMMA expr { $1->push_back($3); $$ = $1; } | |
; | |
logic_expr: logic_expr OR logic_expr { $$ = new OperatorExprNode($1, $2, $3); } | |
| logic_expr AND logic_expr { $$ = new OperatorExprNode($1, $2, $3); } | |
| expr { $$ = $1; } | |
; | |
%% |
This is the second part where we describe all the expressions. We have created blocks that will be used while writing algorithms of implementation.
void addNewVar(string name, E_TYPE type) { | |
map<string, E_TYPE>::iterator it; | |
it = varTable.find(name); | |
if (it == varTable.end()) { | |
varTable[name] = type; | |
} else if (type == E_FUNC) { | |
varTable[name] = type; | |
} else { | |
cout << "line " << lineNumber << ": redefinition of variable " << name << " from (" << typeStr((*it).second) << ") to (" << typeStr(type) << ")." << endl; | |
varTable[name] = type; | |
} | |
} | |
string typeStr(E_TYPE type) { | |
switch (type) { | |
case E_VOID: | |
return "void"; | |
case E_CONST: | |
return "const"; | |
case E_INT: | |
return "int"; | |
case E_CHAR: | |
return "char"; | |
case E_DOUBLE: | |
return "double"; | |
case E_PTR: | |
return "pointer"; | |
case E_FUNC: | |
return "function part"; | |
default: | |
return "unknown"; | |
} | |
} | |
E_TYPE checkExprType(ExprNode *lhs, ExprNode *rhs) { | |
if (lhs->_type == E_UNKNOWN) { | |
cout << "line " << lineNumber << ": unknown expression type on lhs" << endl; | |
return E_UNKNOWN; | |
} | |
if (rhs->_type == E_UNKNOWN) { | |
cout << "line " << lineNumber << ": unknown expression type on rhs" << endl; | |
return E_UNKNOWN; | |
} | |
E_TYPE i, j; | |
i = lhs->_type > rhs->_type ? rhs->_type : lhs->_type; // smaller one | |
j = lhs->_type < rhs->_type ? rhs->_type : lhs->_type; // bigger one | |
if (j == E_FUNC) { | |
return i; | |
} | |
if (i != j) { | |
cout << "line " << lineNumber << ": implicitly convert type " << typeStr(i) << " to " << typeStr(j) << "." << endl; | |
} | |
return j; | |
} | |
void noSemicolonError() { | |
cout << "line " << lineNumber << ": missed Semicolon." << endl; | |
} | |
void setVarType(VariableExprNode *var){ | |
map<string, E_TYPE>::iterator it; | |
it = varTable.find(var->name); | |
if (it == varTable.end()) { | |
var->_type = E_UNKNOWN; | |
} else { | |
var->_type = (*it).second; | |
} | |
} | |
void printVarTable() { | |
std::map<std::string, E_TYPE>::iterator it; | |
for (it = varTable.begin(); it != varTable.end(); it++) { | |
cout << (*it).first << " : " << typeStr((*it).second) << std::endl; | |
} | |
} |
Here are some extra important methods that help in checking the code. Variable’s type is checked in here too.
Here is the complete file. https://gist.github.com/SatyendraBanjare/a9f12d927be4c3fc0537a41ea2573b4d
Now that we have developed our grammar, lets us implement it.
-
We will create a wrapper header for the methods implemented.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters#ifndef AST_TREE_H #define AST_TREE_H #include <iostream> #include <sstream> #include <fstream> #include <vector> #include <stack> #include <set> #include <string> #include <algorithm> #include <typeinfo> #include <llvm/IR/Value.h> #include <llvm/IR/Module.h> #include <llvm/IR/Function.h> #include <llvm/IR/Type.h> #include <llvm/IR/Verifier.h> #include <llvm/IR/DerivedTypes.h> #include <llvm/IR/LLVMContext.h> #include <llvm/IR/Instructions.h> #include <llvm/IR/CallingConv.h> #include <llvm/IR/IRPrintingPasses.h> #include <llvm/IR/IRBuilder.h> #include <llvm/IR/PassManager.h> #include <llvm/IR/BasicBlock.h> #include <llvm/IR/Constants.h> #include <llvm/IR/GlobalVariable.h> #include <llvm/IR/InlineAsm.h> #include <llvm/Bitcode/ReaderWriter.h> #include <llvm/Support/TargetSelect.h> #include <llvm/Support/FormattedStream.h> #include <llvm/Support/MathExtras.h> #include <llvm/Support/raw_ostream.h> #include <llvm/Support/FileSystem.h> #include <llvm/ExecutionEngine/GenericValue.h> #include <llvm/ExecutionEngine/ExecutionEngine.h> #include <llvm/ExecutionEngine/MCJIT.h> #include <llvm/ADT/SmallVector.h> #include <llvm/Pass.h> using namespace llvm; using namespace std; enum E_TYPE { E_UNKNOWN = -1, E_VOID = 0, E_CONST, E_CHAR, E_INT, E_DOUBLE, E_PTR, E_FUNC, }; class GenBlock { public: BasicBlock *block; map<string, Value*> locals; }; class BlockExprNode; class GenContext { public: stack<GenBlock *> blocks; map<string, Value*> globalVariables; Function *mainFunction; Function *tempFunction; string code; bool funcDeclaring; Module *module; public: GenContext() { funcDeclaring = false; module = new Module("main", getGlobalContext()); } void CodeGen(BlockExprNode& root); void OutputCode(ostream &out); GenericValue run(); map<string, Value*>& locals() { return blocks.top()->locals; } map<string, Value*>& globals() { return globalVariables; } BasicBlock *context() { return blocks.top()->block; } Function* currentFunction() { return tempFunction; } void currentFunction(Function *function) { tempFunction = function; } void ret(BasicBlock* block) { blocks.top()->block = block; } void push(BasicBlock *block, bool copy_locals = true) { GenBlock* new_block = new GenBlock(); new_block->block = block; if(copy_locals) { map<string, Value*> prev_locals = map<string, Value*>(blocks.top()->locals); new_block->locals = prev_locals; } blocks.push(new_block); } void pop() { GenBlock *top = blocks.top(); blocks.pop(); delete top; } }; class ASTNode { public: ASTNode() {}; virtual ~ASTNode(){}; virtual Value *CodeGen(GenContext&) = 0; }; class ExprNode: public ASTNode { public: E_TYPE _type; }; class StatementNode: public ASTNode {}; class IntExprNode: public ExprNode { public: long long val; public: IntExprNode(long long val): val(val) { _type = E_INT; } virtual Value *CodeGen(GenContext&); }; class CharExprNode: public ExprNode { public: char val; public: CharExprNode(char val): val(val) { _type = E_CHAR; } virtual Value *CodeGen(GenContext&); }; class DoubleExprNode: public ExprNode { double val; public: DoubleExprNode(double val): val(val) { _type = E_DOUBLE; } virtual Value *CodeGen(GenContext&); }; class VariableExprNode: public ExprNode { public: string name; public: VariableExprNode(const string &name, E_TYPE type = E_UNKNOWN): name(name) { _type = type; } virtual Value *CodeGen(GenContext&); }; class OperatorExprNode: public ExprNode { public: int op; ExprNode *left, *right; public: OperatorExprNode(ExprNode *left, int op, ExprNode *right): left(left), right(right), op(op) {} virtual Value *CodeGen(GenContext&); }; class BlockExprNode: public ExprNode { public: vector<StatementNode*> *statements; public: BlockExprNode(): statements(new vector<StatementNode*>()) {} virtual Value *CodeGen(GenContext&); }; class AssignExprNode: public ExprNode { public: VariableExprNode *left; ExprNode *right; public: AssignExprNode(VariableExprNode *left, ExprNode *right): left(left), right(right) {} virtual Value *CodeGen(GenContext&); }; class FuncExprNode: public ExprNode { public: VariableExprNode *functor; vector<ExprNode*> *args; public: FuncExprNode(VariableExprNode *functor): functor(functor), args(new vector<ExprNode*>()) {} FuncExprNode(VariableExprNode *functor, vector<ExprNode*> *args): functor(functor), args(args) {} virtual Value *CodeGen(GenContext&); }; class CastExprNode: public ExprNode { public: VariableExprNode *type; ExprNode *expr; public: CastExprNode(VariableExprNode *type, ExprNode *expr): type(type), expr(expr) {} virtual Value *CodeGen(GenContext&); }; class ExprStatementNode : public StatementNode { public: ExprNode *expr; public: ExprStatementNode(ExprNode *expr): expr(expr) {} virtual Value *CodeGen(GenContext&); }; class ReturnStatementNode : public StatementNode { public: ExprNode *expr; public: ReturnStatementNode(ExprNode *expr): expr(expr) {} virtual Value *CodeGen(GenContext&); }; class VarDecStatementNode : public StatementNode { public: VariableExprNode *type; VariableExprNode *name; ExprNode *expr; public: VarDecStatementNode(VariableExprNode *type, VariableExprNode *name): type(type), name(name), expr(NULL) {} VarDecStatementNode(VariableExprNode *type, VariableExprNode *name, ExprNode *expr): type(type), name(name), expr(expr) {} virtual Value *CodeGen(GenContext&); }; class ArrayDecStatementNode : public StatementNode { public: VariableExprNode *type; VariableExprNode *name; vector<ExprNode*> *init; long long size; bool isString; public: ArrayDecStatementNode(VariableExprNode *type, VariableExprNode *name, long long size): type(type), name(name), init(new vector<ExprNode*>()), size(size), isString(false) {} ArrayDecStatementNode(VariableExprNode *type, VariableExprNode *name, vector<ExprNode*> *init): type(type), name(name), init(init), size(init->size()), isString(false) {} ArrayDecStatementNode(VariableExprNode *type, VariableExprNode *name, const string &str): type(type), name(name), init(new vector<ExprNode*>()), isString(true) { for(auto it = str.begin(); it != str.end(); it++) init->push_back((ExprNode*)(new CharExprNode(*it))); init->push_back((ExprNode*)(new CharExprNode(0))); size = init->size() + 1; } virtual Value *CodeGen(GenContext&); }; class IndexExprNode : public ExprNode { public: VariableExprNode *name; ExprNode *expr; ExprNode *assign; public: IndexExprNode(VariableExprNode *name, ExprNode *expr): name(name), expr(expr), assign(NULL) {} IndexExprNode(VariableExprNode *name, ExprNode *expr, ExprNode *assign): name(name), expr(expr), assign(assign) {} virtual Value *CodeGen(GenContext&); }; class FuncDecStatementNode : public StatementNode { public: VariableExprNode *type; VariableExprNode *name; vector<VarDecStatementNode*> *args; BlockExprNode *block; public: FuncDecStatementNode(VariableExprNode *type, VariableExprNode *name, vector<VarDecStatementNode*> *args, BlockExprNode *block): type(type), name(name), args(args), block(block) {} virtual Value *CodeGen(GenContext&); }; class ExternFuncDecStatementNode : public StatementNode { public: VariableExprNode *type; VariableExprNode *name; vector<VarDecStatementNode*> *args; public: ExternFuncDecStatementNode(VariableExprNode *type, VariableExprNode *name, vector<VarDecStatementNode*> *_args): type(type), name(name), args(_args) { vector<VarDecStatementNode*>::const_iterator it; } virtual Value *CodeGen(GenContext&); }; class IfStatementNode : public StatementNode { public: ExprNode *condExpr; BlockExprNode *trueBlock; BlockExprNode *falseBlock; public: IfStatementNode(ExprNode *condExpr, BlockExprNode *trueBlock): condExpr(condExpr), trueBlock(trueBlock), falseBlock(new BlockExprNode()) {} IfStatementNode(ExprNode *condExpr, BlockExprNode *trueBlock, BlockExprNode *falseBlock): condExpr(condExpr), trueBlock(trueBlock), falseBlock(falseBlock) {} virtual Value *CodeGen(GenContext&); }; class ForStatementNode : public StatementNode { public: ExprNode *initExpr; ExprNode *condExpr; ExprNode *loopExpr; BlockExprNode *block; public: ForStatementNode(ExprNode *initExpr, ExprNode *condExpr, ExprNode *loopExpr, BlockExprNode *block): initExpr(initExpr), condExpr(condExpr), loopExpr(loopExpr), block(block) {} virtual Value *CodeGen(GenContext&); }; class WhileStatementNode : public StatementNode { public: ExprNode *whileExpr; BlockExprNode *block; public: WhileStatementNode(ExprNode *whileExpr, BlockExprNode *block): whileExpr(whileExpr), block(block) {} virtual Value *CodeGen(GenContext&); }; #endif -
Define the functions to be used.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters#include "AST_tree.h" #include "C_syntax.hpp" extern int yyparse(); extern BlockExprNode* root; bool error = false; static Value* cast(Value* value, Type* type, GenContext& context); Function* printfFunction(GenContext& context) { vector<Type*> printfArgs; printfArgs.push_back(Type::getInt8PtrTy(getGlobalContext())); FunctionType* printfType = FunctionType::get(Type::getInt32Ty(getGlobalContext()), printfArgs, true); Function *printfFunc = Function::Create(printfType, Function::ExternalLinkage, Twine("printf"), context.module); printfFunc->setCallingConv(CallingConv::C); return printfFunc; } Function* strlenFunction(GenContext& context) { vector<Type*> strlenArgs; strlenArgs.push_back(Type::getInt8PtrTy(getGlobalContext())); FunctionType* strlenType = FunctionType::get(Type::getInt64Ty(getGlobalContext()), strlenArgs, false); Function *strlenFunc = Function::Create(strlenType, Function::ExternalLinkage, Twine("strlen"), context.module); strlenFunc->setCallingConv(CallingConv::C); return strlenFunc; } Function* isdigitFunction(GenContext& context) { vector<Type*> isdigitArgs; isdigitArgs.push_back(Type::getInt8Ty(getGlobalContext())); FunctionType* isdigitType = FunctionType::get(Type::getInt1Ty(getGlobalContext()), isdigitArgs, false); Function *isdigitFunc = Function::Create(isdigitType, Function::ExternalLinkage, Twine("isdigit"), context.module); isdigitFunc->setCallingConv(CallingConv::C); return isdigitFunc; } Function* atoiFunction(GenContext& context) { vector<Type*> atoiArgs; atoiArgs.push_back(Type::getInt8PtrTy(getGlobalContext())); FunctionType* atoiType =FunctionType::get(Type::getInt64Ty(getGlobalContext()), atoiArgs, false); Function *atoiFunc = Function::Create(atoiType, Function::ExternalLinkage, Twine("atoi"), context.module); atoiFunc->setCallingConv(CallingConv::C); return atoiFunc; } void linkExternalFunctions(GenContext& context) { printfFunction(context); strlenFunction(context); isdigitFunction(context); atoiFunction(context); } void GenContext::CodeGen(BlockExprNode& root) { vector<Type*> arg_types; FunctionType *ftype = FunctionType::get(Type::getVoidTy(getGlobalContext()), makeArrayRef(arg_types), false); SmallVector<AttributeSet, 4> attrs; AttrBuilder builder; builder.addAttribute(Attribute::NoUnwind); attrs.push_back(AttributeSet::get(getGlobalContext(), ~0U, builder)); AttributeSet funcFunc = AttributeSet::get(getGlobalContext(), attrs); root.CodeGen(*this); PassManager<Module> pm; raw_string_ostream *out = new raw_string_ostream(code); pm.addPass(PrintModulePass(*out)); pm.run(*module); } void GenContext::OutputCode(ostream &out) { out << code; } GenericValue GenContext::run() { ExecutionEngine *ee = EngineBuilder(unique_ptr<Module>(module)).create(); vector<GenericValue> noargs; GenericValue v; ee->finalizeObject(); mainFunction = module->getFunction("main"); v = ee->runFunction(mainFunction, noargs); return v; } static Type *typeOf(VariableExprNode *var) { Type *type; if (var->name == "int") type = Type::getInt64Ty(getGlobalContext()); else if (var->name == "char") type = Type::getInt8Ty(getGlobalContext()); else if (var->name == "double") type = Type::getDoubleTy(getGlobalContext()); else if (var->name == "void") type = Type::getVoidTy(getGlobalContext()); return type; } Value* VariableExprNode::CodeGen(GenContext& context) { Value* val; if (!context.funcDeclaring || context.locals().find(name) == context.locals().end()) { if (context.globals().find(name) == context.globals().end()) { cerr << "Undeclared Variable " << name << endl; return NULL; } else val = context.globals()[name]; } else val = context.locals()[name]; if (((AllocaInst*)val)->getAllocatedType()->isArrayTy()) { ConstantInt* constInt = ConstantInt::get(getGlobalContext(), APInt(32, StringRef("0"), 10)); vector<Value*> args; args.push_back(constInt); args.push_back(constInt); Type* type; type = ((AllocaInst*)val)->getAllocatedType(); val = GetElementPtrInst::Create(type, val, args, "", context.context()); return val; } else return new LoadInst(val, "", false, context.context()); } Value* CharExprNode::CodeGen(GenContext& context) { return ConstantInt::get(Type::getInt8Ty(getGlobalContext()), val, true); } Value* IntExprNode::CodeGen(GenContext& context) { return ConstantInt::get(Type::getInt64Ty(getGlobalContext()), val, true); } Value* DoubleExprNode::CodeGen(GenContext& context) { return ConstantFP::get(Type::getDoubleTy(getGlobalContext()), val); } Value* BlockExprNode::CodeGen(GenContext& context) { Value *returnValue = NULL; for (auto it = statements->begin(); it != statements->end(); it++) returnValue = (*it)->CodeGen(context); return returnValue; } Value* OperatorExprNode::CodeGen(GenContext& context) { Instruction::BinaryOps instr; ICmpInst::Predicate pred; bool floatOp = false; bool mathOP = false; Value* leftVal = left->CodeGen(context); Value* rightVal = right->CodeGen(context); if (leftVal->getType()->isDoubleTy() || rightVal->getType()->isDoubleTy()) { leftVal = cast(leftVal, Type::getDoubleTy(getGlobalContext()), context); rightVal = cast(rightVal, Type::getDoubleTy(getGlobalContext()), context); floatOp = true; } else if (leftVal->getType() == rightVal->getType()) { } else { leftVal = cast(leftVal, Type::getInt64Ty(getGlobalContext()), context); rightVal = cast(rightVal, Type::getInt64Ty(getGlobalContext()), context); } if (!floatOp) { switch (op) { case EQ: pred = ICmpInst::ICMP_EQ; break; case NE: pred = ICmpInst::ICMP_NE; break; case GR: pred = ICmpInst::ICMP_SGT; break; case LW: pred = ICmpInst::ICMP_SLT; break; case GE: pred = ICmpInst::ICMP_SGE; break; case LE: pred = ICmpInst::ICMP_SLE; break; case ADD: case SADD: instr = Instruction::Add; mathOP=true; break; case SUB: case SSUB: instr = Instruction::Sub; mathOP=true; break; case MUL: case SMUL: instr = Instruction::Mul; mathOP=true; break; case DIV: case SDIV: instr = Instruction::SDiv; mathOP=true; break; case OR: instr = Instruction::Or; mathOP=true; break; case AND: instr = Instruction::And; mathOP=true; break; case MODULO: instr = Instruction::SRem; mathOP=true; break; case BIT_AND: instr = Instruction::And; mathOP=true; break; case BIT_OR: instr = Instruction::Or; mathOP=true; break; case BIT_SHIFT_RIGHT: instr = Instruction::LShr; mathOP=true; break; case BIT_SHIFT_LEFT: instr = Instruction::Shl; mathOP=true; break; case BIT_XOR: instr = Instruction::Xor; mathOP=true; break; } } else { switch (op) { case EQ: pred = ICmpInst::FCMP_OEQ; break; case NE: pred = ICmpInst::FCMP_ONE; break; case GR: pred = ICmpInst::FCMP_OGT; break; case LW: pred = ICmpInst::FCMP_OLT; break; case GE: pred = ICmpInst::FCMP_OGE; break; case LE: pred = ICmpInst::FCMP_OLE; break; case ADD: case SADD: instr = Instruction::FAdd; mathOP=true; break; case SUB: case SSUB: instr = Instruction::FSub; mathOP=true; break; case MUL: case SMUL: instr = Instruction::FMul; mathOP=true; break; case DIV: case SDIV: instr = Instruction::FDiv; mathOP=true; break; } } if (mathOP) return BinaryOperator::Create(instr, leftVal, rightVal, "", context.context()); else return new ICmpInst(*context.context(), pred, leftVal, rightVal, ""); } Value* AssignExprNode::CodeGen(GenContext& context) { Value* rightVal; Value* leftVal; if (!context.funcDeclaring || context.locals().find(left->name) == context.locals().end()) { if (context.globals().find(left->name) == context.globals().end()) return NULL; else leftVal = context.globals()[left->name]; } else leftVal = context.locals()[left->name]; rightVal = right->CodeGen(context); if (leftVal->getType() == Type::getInt64PtrTy(getGlobalContext())) rightVal = cast(rightVal, Type::getInt64Ty(getGlobalContext()), context); else if (leftVal->getType() == Type::getDoublePtrTy(getGlobalContext())) rightVal = cast(rightVal, Type::getDoubleTy(getGlobalContext()), context); else if (leftVal->getType() == Type::getInt8PtrTy(getGlobalContext())) rightVal = cast(rightVal, Type::getInt8Ty(getGlobalContext()), context); return new StoreInst(rightVal, leftVal, false, context.context()); } Value* FuncExprNode::CodeGen(GenContext& context) { // Get functor Function *function = context.module->getFunction(functor->name.c_str()); if (function == NULL) cerr << "No such function " << functor->name << endl; vector<Value*> argsRef; for (auto it = args->begin(); it != args->end(); it++) argsRef.push_back((*it)->CodeGen(context)); CallInst *call = CallInst::Create(function, makeArrayRef(argsRef), "", context.context()); return call; } Value* CastExprNode::CodeGen(GenContext& context) { Value* value = expr->CodeGen(context); Type* castType = typeOf(type); value = cast(value, castType, context); return value; } Value* IndexExprNode::CodeGen(GenContext& context) { Value* array = name->CodeGen(context); Value* num = cast(expr->CodeGen(context), Type::getInt64Ty(getGlobalContext()), context); num = new TruncInst(num, Type::getInt32Ty(getGlobalContext()), "", context.context()); Type* arrayType = cast<PointerType>(array->getType()->getScalarType())->getElementType(); Instruction* instr; Value* retInst; instr = GetElementPtrInst::Create(arrayType, array, num, "", context.context()); // whether read or write if (assign == NULL) retInst = new LoadInst(instr, "", false, context.context()); else retInst = new StoreInst(assign->CodeGen(context), instr, false, context.context()); return retInst; } Value* ExprStatementNode::CodeGen(GenContext& context) { return expr->CodeGen(context); } Value* VarDecStatementNode::CodeGen(GenContext& context) { Value* newVar; newVar = new AllocaInst(typeOf(type), name->name.c_str(), context.context()); context.locals()[name->name] = newVar; if (expr != NULL) { AssignExprNode assign(name, expr); assign.CodeGen(context); } return newVar; } Value* ArrayDecStatementNode::CodeGen(GenContext& context) { ArrayType* arrayType = ArrayType::get(typeOf(type), size); AllocaInst *alloc = new AllocaInst(arrayType, name->name.c_str(), context.context()); context.locals()[name->name] = alloc; if (init->size() != 0) { for (auto it = init->begin(); it != init->end(); ++it) { ExprNode* num = new IntExprNode(it - init->begin()); IndexExprNode a(name, num, (*it)); a.CodeGen(context); delete num; } } return alloc; } Value* ReturnStatementNode::CodeGen(GenContext& context) { return expr->CodeGen(context); } Value* FuncDecStatementNode::CodeGen(GenContext& context) { // Function type vector<Type*> argTypeRef; for (auto it = args->begin(); it != args->end(); it++) argTypeRef.push_back(typeOf((*it)->type)); FunctionType *funcType = FunctionType::get(typeOf(type), ArrayRef<Type*>(argTypeRef), false); Function *function = Function::Create(funcType, GlobalValue::ExternalLinkage, name->name.c_str(), context.module); function->setCallingConv(CallingConv::C); SmallVector<AttributeSet, 4> Attrs; AttrBuilder Builder; Builder.addAttribute(Attribute::NoUnwind); Attrs.push_back(AttributeSet::get(getGlobalContext(), ~0U, Builder)); AttributeSet funcFuncAttrSet = AttributeSet::get(getGlobalContext(), Attrs); function->setAttributes(funcFuncAttrSet); context.currentFunction(function); context.funcDeclaring = true; // Start function BasicBlock *funcBlock = BasicBlock::Create(getGlobalContext(), "", function, 0); context.push(funcBlock, false); Function::arg_iterator argsValues = function->arg_begin(); for (auto it = args->begin(); it != args->end(); it++, argsValues++) { (*it)->CodeGen(context); Value *argumentValue = &(*argsValues); argumentValue->setName((*it)->name->name.c_str()); StoreInst *inst = new StoreInst(argumentValue, context.locals()[(*it)->name->name], false, funcBlock); } // Get return value Value* returnValue = block->CodeGen(context); context.funcDeclaring = false; // Return BasicBlock *returnBlock = BasicBlock::Create(getGlobalContext(), "", function, 0); BranchInst::Create(returnBlock, context.context()); ReturnInst::Create(getGlobalContext(), returnValue, returnBlock); context.pop(); return function; } Value* ExternFuncDecStatementNode::CodeGen(GenContext& context) { vector<Type*> argTypes; FunctionType *ftype; Function *function; for (auto it = args->begin(); it != args->end(); it++) argTypes.push_back(typeOf((*it)->type)); ftype = FunctionType::get(Type::getVoidTy(getGlobalContext()), makeArrayRef(argTypes), false); function = Function::Create(ftype, GlobalValue::ExternalLinkage, name->name.c_str(), context.module); return function; } Value* IfStatementNode::CodeGen(GenContext& context) { BasicBlock* ifTrue = BasicBlock::Create(getGlobalContext(), "", context.currentFunction(), 0); BasicBlock* ifFalse = BasicBlock::Create(getGlobalContext(), "", context.currentFunction(), 0); BasicBlock* ifEnd = BasicBlock::Create(getGlobalContext(), "", context.currentFunction(), 0); BranchInst::Create(ifTrue, ifFalse, condExpr->CodeGen(context), context.context()); // Entering IF context.push(ifTrue); trueBlock->CodeGen(context); // JMP to END BranchInst::Create(ifEnd, context.context()); context.pop(); // Entering ELSE context.push(ifFalse); falseBlock->CodeGen(context); // JMP to END BranchInst::Create(ifEnd, context.context()); context.pop(); // Return END context.ret(ifEnd); return ifEnd; } Value* ForStatementNode::CodeGen(GenContext& context) { // Initialize initExpr->CodeGen(context); BasicBlock* forIter = BasicBlock::Create(getGlobalContext(), "", context.currentFunction(), 0); BasicBlock* forEnd = BasicBlock::Create(getGlobalContext(), "", context.currentFunction(), 0); BasicBlock* forCheck = BasicBlock::Create(getGlobalContext(), "", context.currentFunction(), 0); // Check condition satisfaction BranchInst::Create(forCheck, context.context()); context.push(forCheck); // Whether break the loop BranchInst::Create(forIter, forEnd, condExpr->CodeGen(context), forCheck); context.pop(); // Entering loop block context.push(forIter); block->CodeGen(context); // Iteration loopExpr->CodeGen(context); // Jump back to condition checking BranchInst::Create(forCheck, context.context()); context.pop(); // Return END context.ret(forEnd); return forEnd; } Value* WhileStatementNode::CodeGen(GenContext& context) { BasicBlock* whileIter = BasicBlock::Create(getGlobalContext(), "", context.currentFunction(), 0); BasicBlock* whileEnd = BasicBlock::Create(getGlobalContext(), "", context.currentFunction(), 0); BasicBlock* whileCheck = BasicBlock::Create(getGlobalContext(), "", context.currentFunction(), 0); // Check condition satisfaction BranchInst::Create(whileCheck, context.context()); context.push(whileCheck); // Whether break the loop BranchInst::Create(whileIter, whileEnd, whileExpr->CodeGen(context), context.context()); context.pop(); // Entering loop block context.push(whileIter); block->CodeGen(context); // Jump back to condition checking BranchInst::Create(whileCheck, context.context()); context.pop(); // Return END context.ret(whileEnd); return whileEnd; } static Value* cast(Value* value, Type* type, GenContext& context) { if (type == value->getType()) return value; if (type == Type::getDoubleTy(getGlobalContext())) { if (value->getType() == Type::getInt64Ty(getGlobalContext()) || value->getType() == Type::getInt8Ty(getGlobalContext())) value = new SIToFPInst(value, type, "", context.context()); else cout << "Cannot cast this value.\n"; } else if (type == Type::getInt64Ty(getGlobalContext())) { if (value->getType() == Type::getDoubleTy(getGlobalContext())) value = new FPToSIInst(value, type, "", context.context()); else if (value->getType() == Type::getInt8Ty(getGlobalContext())) value = new SExtInst(value, type, "", context.context()); else if (value->getType() == Type::getInt32Ty(getGlobalContext())) value = new ZExtInst(value, type, "", context.context()); else if (value->getType() == Type::getInt8PtrTy(getGlobalContext())) value = new PtrToIntInst(value, type, "", context.context()); else if (value->getType() == Type::getInt64PtrTy(getGlobalContext())) value = new PtrToIntInst(value, type, "", context.context()); else cout << "Cannot cast this value.\n"; } else if (type == Type::getInt8Ty(getGlobalContext())) { if (value->getType() == Type::getDoubleTy(getGlobalContext())) value = new FPToSIInst(value, type, "", context.context()); else if (value->getType() == Type::getInt64Ty(getGlobalContext())) value = new TruncInst(value, type, "", context.context()); else cout << "Cannot cast this value.\n"; } else cout << "Cannot cast this value.\n"; return value; } -
And the final wrapper.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters#include <iostream> #include <fstream> #include <unistd.h> #include "AST_tree.h" using namespace std; using namespace llvm; extern FILE* yyin; extern BlockExprNode* root; extern int yyparse(); extern void linkExternalFunctions(GenContext &context); void usage() { cout << "\nusage: ./compiler <filename.c>\n" << endl; } int main(int argc, char **argv) { char *filename; // check args if (argc == 2) { filename = argv[1]; } else { usage(); return 0; } // check filename int len = strlen(filename); if (filename[len - 1] != 'c' || filename[len - 2] != '.') { usage(); return 0; } yyin = fopen(filename, "r"); if (!yyin) { perror("File opening failed"); return EXIT_FAILURE; } if (yyparse()){ cout << "ERROR!" << endl; return EXIT_FAILURE; } GenContext context; InitializeNativeTarget(); InitializeNativeTargetAsmPrinter(); InitializeNativeTargetAsmParser(); linkExternalFunctions(context); cout << "Generating LLVM code" << endl; cout << "--------------------" << endl; context.CodeGen(*root); cout << endl; cout << "--------------------" << endl; cout << "Finished" << endl; filename[len-1] = 'l'; filename[len] = 'l'; filename[len+1] = '\0'; ofstream outfile; outfile.open(filename, ios::out); context.OutputCode(outfile); outfile.close(); cout << "Run LLVM code" << endl; cout << "-------------" << endl; context.run(); cout << endl; cout << "-------------" << endl; cout << "Rnd LLVM code ends" << endl; cout << "Finished" << endl; fclose(yyin); return 0; }
Compile & Run
This is how the Makefile is written. Basically is a series of operations of lexical analysis, parsing and final code generation.
To build , simply do make
.
all: compiler | |
OBJS = C_syntax.o \ | |
AST_tree.o \ | |
main.o \ | |
C_lexer.o | |
LLVMCONFIG = llvm-config | |
CPPFLAGS = $(LLVMCONFIG)--cppflags -std=c++11 -g | |
LDFLAGS = $(LLVMCONFIG)--ldflags -lpthread -ldl -lz -lncurses -rdynamic | |
LIBS = $(LLVMCONFIG)--libs | |
SYSTEMLIBS = $(LLVMCONFIG)—-system-libs | |
C_syntax.cpp: C_syntax.yacc | |
bison -d --no-lines -o $@ $^ | |
C_syntax.hpp: C_syntax.cpp | |
C_lexer.cpp: C_lexer.lex C_syntax.hpp | |
flex -L -o $@ $^ | |
%.o: %.cpp | |
g++ -c $(CPPFLAGS) -o $@ $< -fpermissive | |
compiler: $(OBJS) | |
clang++ -o $@ $(OBJS) $(LIBS) $(LDFLAGS) | |
clean: | |
$(RM) -rf C_syntax.cpp C_syntax.hpp compiler y.output c_code/*.ll C_lexer.cpp $(OBJS) |
Testing
To test that our compiler works, just do
./compiler test.c