hyc

Hydrogen Compiler written in C
git clone git://git.kocotian.pl/hyc.git
Log | Files | Refs | README | LICENSE

ast.c (12250B)


      1 /*
      2    hyc - Hydrogen Compiler written in C
      3    Copyright (C) 2021  Kacper Kocot <kocotian@kocotian.pl>
      4 
      5    This program is free software; you can redistribute it and/or modify
      6    it under the terms of the GNU General Public License as published by
      7    the Free Software Foundation; either version 3 of the License, or
      8    (at your option) any later version.
      9 
     10    This program is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13    GNU General Public License for more details.
     14 
     15    You should have received a copy of the GNU General Public License
     16    along with this program; if not, write to the Free Software Foundation,
     17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
     18 
     19 */
     20 
     21 #include <ast.h>
     22 #include <err.h>
     23 #include <stdlib.h>
     24 #include <util.h>
     25 
     26 typedef struct {
     27 	Token *data;
     28 	size_t len;
     29 	ssize_t pos;
     30 } Tokenizer;
     31 
     32 #define new(PTR) (*((PTR) = malloc(sizeof *(PTR))))
     33 
     34 static Token *prevToken(Tokenizer *t);
     35 
     36 static Token *nextToken(Tokenizer *t);
     37 static Token *nextTokenType(Tokenizer *t, TokenType type);
     38 static Token *enextToken(Tokenizer *t);
     39 static Token *enextTokenType(Tokenizer *t, TokenType type);
     40 
     41 static ASTExpression tokenstoASTExpressionLiteral(Tokenizer *t);
     42 static ASTExpression tokenstoASTExpressionFunctionArgumentList(Tokenizer *t);
     43 static ASTExpression tokenstoASTExpression(Tokenizer *t);
     44 
     45 static ASTStatement tokenstoASTStatementCompound(Tokenizer *t);
     46 static ASTStatement tokenstoASTStatementConditional(Tokenizer *t);
     47 static ASTStatement tokenstoASTStatementReturn(Tokenizer *t);
     48 static ASTStatement tokenstoASTStatementExpression(Tokenizer *t);
     49 static ASTStatement tokenstoASTStatementInlineAssembly(Tokenizer *t);
     50 static ASTStatement tokenstoASTStatementVariableDeclaration(Tokenizer *t, int seek);
     51 static ASTStatement tokenstoASTStatement(Tokenizer *t);
     52 
     53 static ASTGlobal tokenstoASTGlobalFunction(Tokenizer *t);
     54 static ASTGlobal tokenstoASTGlobalExport(Tokenizer *t);
     55 static ASTGlobal tokenstoASTGlobal(Tokenizer *t);
     56 
     57 static Token *
     58 prevToken(Tokenizer *t)
     59 {
     60 	return (--t->pos >= 0) ? &(t->data[t->pos]) : NULL;
     61 }
     62 
     63 static Token *
     64 nextToken(Tokenizer *t)
     65 {
     66 	return ((unsigned)(++t->pos) < t->len) ? &(t->data[t->pos]) : NULL;
     67 }
     68 
     69 static Token *
     70 nextTokenType(Tokenizer *t, TokenType type)
     71 {
     72 	Token *tok;
     73 	if ((tok = nextToken(t)) != NULL) {
     74 		if (tok->type != type)
     75 			error(tok, "unexpected '%s' (expected '%s')",
     76 					strTokenType(tok->type), strTokenType(type));
     77 	}
     78 	return tok;
     79 }
     80 
     81 static Token *
     82 enextToken(Tokenizer *t)
     83 {
     84 	Token *tok;
     85 	if ((tok = nextToken(t)) == NULL)
     86 		error(tok, "unexpected end of input");
     87 	return tok;
     88 }
     89 
     90 static Token *
     91 enextTokenType(Tokenizer *t, TokenType type)
     92 {
     93 	Token *tok;
     94 	tok = enextToken(t);
     95 	if (tok->type != type)
     96 		error(tok, "unexpected '%s' (expected '%s')",
     97 				strTokenType(tok->type), strTokenType(type));
     98 	return tok;
     99 }
    100 
    101 /*****************************************************************************/
    102 
    103 /* Expressions */
    104 
    105 static ASTExpression
    106 tokenstoASTExpressionLiteral(Tokenizer *t)
    107 {
    108 	ASTExpression expr;
    109 	Token *tok;
    110 	tok = enextToken(t);
    111 
    112 	expr.Any.any.inittoken = tok;
    113 
    114 	if (tok->type == TokenIdentifier) {
    115 		expr.type = ASTExpressionLiteralIdentifier_T;
    116 		expr.Literal.value = Strdup(tok->str).data;
    117 	} else if (tok->type == TokenInteger) {
    118 		expr.type = ASTExpressionLiteralInteger_T;
    119 		expr.Literal.value = Strdup(tok->str).data;
    120 	} else if (tok->type == TokenString) {
    121 		expr.type = ASTExpressionLiteralString_T;
    122 		expr.Literal.value = Strdup((String){tok->str.data + 1, tok->str.len - 2}).data;
    123 	}
    124 
    125 	return expr;
    126 }
    127 
    128 static ASTExpression
    129 tokenstoASTExpressionFunctionArgumentList(Tokenizer *t)
    130 {
    131 	ASTExpression expr;
    132 	Token *tok;
    133 
    134 	expr.type = ASTExpressionFunctionArgumentList_T;
    135 	newVector(expr.FunctionArgumentList);
    136 	tok = enextTokenType(t, TokenOpeningParenthesis);
    137 
    138 	expr.Any.any.inittoken = tok;
    139 
    140 	if ((tok = enextToken(t))->type != TokenClosingParenthesis) {
    141 		prevToken(t);
    142 		do {
    143 			pushVector(expr.FunctionArgumentList, tokenstoASTExpression(t));
    144 		} while ((tok = enextToken(t))->type == TokenComma);
    145 		prevToken(t);
    146 		enextTokenType(t, TokenClosingParenthesis);
    147 	}
    148 
    149 	return expr;
    150 }
    151 
    152 static ASTExpression
    153 tokenstoASTExpression(Tokenizer *t)
    154 {
    155 	ASTExpression expr;
    156 	Token *tok;
    157 	tok = enextToken(t);
    158 
    159 	expr.Any.any.inittoken = tok;
    160 
    161 	if (0) {
    162 	/* Literals: */
    163 	} else if (tok->type == TokenIdentifier) {
    164 		prevToken(t);
    165 		expr = tokenstoASTExpressionLiteral(t);
    166 	} else if (tok->type == TokenInteger) {
    167 		prevToken(t);
    168 		expr = tokenstoASTExpressionLiteral(t);
    169 	} else if (tok->type == TokenString) {
    170 		prevToken(t);
    171 		expr = tokenstoASTExpressionLiteral(t);
    172 	} else if (tok->type == TokenOpeningParenthesis) {
    173 		expr = tokenstoASTExpression(t);
    174 		enextTokenType(t, TokenClosingParenthesis);
    175 	} else if (tok->type == TokenExclamationMark) {
    176 		expr.type = ASTExpressionUnaryLogicalNot_T;
    177 		new(expr.Unary.expr) = tokenstoASTExpression(t);
    178 	} else if (tok->type == TokenMinus) {
    179 		expr.type = ASTExpressionUnarySignChange_T;
    180 		new(expr.Unary.expr) = tokenstoASTExpression(t);
    181 	} else if (tok->type == TokenAmperstand) {
    182 		expr.type = ASTExpressionUnaryAddressof_T;
    183 		new(expr.Unary.expr) = tokenstoASTExpression(t);
    184 	} else if (tok->type == TokenAsterisk) {
    185 		expr.type = ASTExpressionUnaryValuefrom_T;
    186 		new(expr.Unary.expr) = tokenstoASTExpression(t);
    187 	} else if (tok->type == TokenPlusPlus) {
    188 		expr.type = ASTExpressionUnaryPreincrement_T;
    189 		new(expr.Unary.expr) = tokenstoASTExpression(t);
    190 	} else if (tok->type == TokenMinusMinus) {
    191 		expr.type = ASTExpressionUnaryPredecrement_T;
    192 		new(expr.Unary.expr) = tokenstoASTExpression(t);
    193 	} else {
    194 		error(tok, "unexpected token: '%s'", strTokenType(tok->type));
    195 	}
    196 
    197 	tok = enextToken(t);
    198 	if (tok->type == TokenOpeningParenthesis) { /* function call */
    199 		ASTExpression callexpr = expr;
    200 
    201 		prevToken(t);
    202 		expr.type = ASTExpressionFunctionCall_T;
    203 		new(expr.FunctionCall.callexpr) = callexpr;
    204 		new(expr.FunctionCall.argv) =
    205 			tokenstoASTExpressionFunctionArgumentList(t);
    206 	} else if (tok->type == TokenAssignment) { /* assignment */
    207 		ASTExpression left = expr;
    208 		ASTExpression right = tokenstoASTExpression(t);
    209 		expr.type = ASTExpressionBinaryAssignment_T;
    210 		new(expr.BinaryAssignment.left) = left;
    211 		new(expr.BinaryAssignment.right) = right;
    212 	} else {
    213 		prevToken(t);
    214 	}
    215 
    216 	return expr;
    217 }
    218 
    219 /* Statements */
    220 
    221 static ASTStatement
    222 tokenstoASTStatementCompound(Tokenizer *t)
    223 {
    224 	ASTStatement stat;
    225 	Token *tok;
    226 
    227 	stat.type = ASTStatementCompound_T;
    228 	tok = enextTokenType(t, TokenOpeningBrace);
    229 	stat.Any.any.inittoken = tok;
    230 	newVector(stat.Compound);
    231 
    232 	while ((tok = enextToken(t))->type != TokenClosingBrace) {
    233 		prevToken(t);
    234 		pushVector(stat.Compound, tokenstoASTStatement(t));
    235 	}
    236 
    237 	return stat;
    238 }
    239 
    240 static ASTStatement
    241 tokenstoASTStatementConditional(Tokenizer *t)
    242 {
    243 	/* if <expr> <statement> [else <statement>]; */
    244 	ASTStatement stat;
    245 	Token *tok;
    246 
    247 	stat.type = ASTStatementConditional_T;
    248 	tok = enextTokenType(t, TokenIdentifier);
    249 	stat.Any.any.inittoken = tok;
    250 	if (Strccmp(tok->str, "if"))
    251 		error(tok, "expected 'if' keyword");
    252 
    253 	new(stat.Conditional.condition) = tokenstoASTExpression(t);
    254 	new(stat.Conditional.body) = tokenstoASTStatement(t);
    255 
    256 	tok = enextToken(t);
    257 	if (tok->type == TokenIdentifier && !Strccmp(tok->str, "else"))
    258 		new(stat.Conditional.elsebody) = tokenstoASTStatement(t);
    259 	else {
    260 		prevToken(t); stat.Conditional.elsebody = NULL;
    261 	}
    262 
    263 	return stat;
    264 }
    265 
    266 static ASTStatement
    267 tokenstoASTStatementReturn(Tokenizer *t)
    268 {
    269 	/* return <expr>; */
    270 	ASTStatement stat;
    271 	Token *tok;
    272 
    273 	stat.type = ASTStatementReturn_T;
    274 	tok = enextTokenType(t, TokenIdentifier);
    275 	stat.Any.any.inittoken = tok;
    276 	if (Strccmp(tok->str, "return"))
    277 		error(tok, "expected 'return' keyword");
    278 
    279 	*(stat.Return.expr = malloc(sizeof *stat.Return.expr)) = tokenstoASTExpression(t);
    280 
    281 	return stat;
    282 }
    283 
    284 static ASTStatement
    285 tokenstoASTStatementExpression(Tokenizer *t)
    286 {
    287 	/* <expr>; */
    288 	ASTStatement stat;
    289 
    290 	stat.type = ASTStatementExpression_T;
    291 	*(stat.Expression.expr = malloc(sizeof *stat.Expression.expr))
    292 		= tokenstoASTExpression(t);
    293 	stat.Any.any.inittoken = stat.Expression.expr->Any.any.inittoken;
    294 
    295 	return stat;
    296 }
    297 
    298 static ASTStatement
    299 tokenstoASTStatementInlineAssembly(Tokenizer *t)
    300 {
    301 	/* asm <string literal>; */
    302 	ASTStatement stat;
    303 	Token *tok;
    304 
    305 	stat.type = ASTStatementInlineAssembly_T;
    306 	tok = enextTokenType(t, TokenIdentifier);
    307 	stat.Any.any.inittoken = tok;
    308 	if (Strccmp(tok->str, "asm"))
    309 		error(tok, "expected 'asm' keyword");
    310 
    311 	stat.InlineAssembly.expr = tokenstoASTExpressionLiteral(t).Literal;
    312 
    313 	return stat;
    314 }
    315 
    316 static ASTStatement
    317 tokenstoASTStatementVariableDeclaration(Tokenizer *t, int seek)
    318 {
    319 	/* var <identifier literal>; */
    320 	ASTStatement stat;
    321 	Token *tok;
    322 
    323 	stat.type = ASTStatementVariableDeclaration_T;
    324 	if (!seek) {
    325 		tok = enextTokenType(t, TokenIdentifier);
    326 		stat.Any.any.inittoken = tok;
    327 		if (Strccmp(tok->str, "var"))
    328 			error(tok, "expected 'var' keyword");
    329 	} else {
    330 		tok = enextToken(t);
    331 		stat.Any.any.inittoken = tok;
    332 		prevToken(t);
    333 	}
    334 
    335 	stat.VariableDeclaration.name = tokenstoASTExpressionLiteral(t).Literal;
    336 
    337 	return stat;
    338 }
    339 
    340 static ASTStatement
    341 tokenstoASTStatement(Tokenizer *t)
    342 {
    343 	ASTStatement stat;
    344 	Token *tok;
    345 
    346 	tok = enextToken(t);
    347 	stat.Any.any.inittoken = tok;
    348 
    349 	if (0) {
    350 	} else if (tok->type == TokenSemicolon) {
    351 		stat.type = ASTStatementNoOp_T;
    352 	} else if (tok->type == TokenOpeningBrace) {
    353 		prevToken(t);
    354 		stat = tokenstoASTStatementCompound(t);
    355 	} else if (tok->type == TokenIdentifier && !Strccmp(tok->str, "if")) {
    356 		prevToken(t);
    357 		stat = tokenstoASTStatementConditional(t);
    358 	} else if (tok->type == TokenIdentifier && !Strccmp(tok->str, "return")) {
    359 		prevToken(t);
    360 		stat = tokenstoASTStatementReturn(t);
    361 		tok = enextTokenType(t, TokenSemicolon);
    362 	} else if (tok->type == TokenIdentifier && !Strccmp(tok->str, "asm")) {
    363 		prevToken(t);
    364 		stat = tokenstoASTStatementInlineAssembly(t);
    365 		tok = enextTokenType(t, TokenSemicolon);
    366 	} else if (tok->type == TokenIdentifier && !Strccmp(tok->str, "var")) {
    367 		stat = tokenstoASTStatementVariableDeclaration(t, 1);
    368 		tok = enextTokenType(t, TokenSemicolon);
    369 	} else {
    370 		prevToken(t);
    371 		stat = tokenstoASTStatementExpression(t);
    372 		tok = enextTokenType(t, TokenSemicolon);
    373 	}
    374 
    375 	return stat;
    376 }
    377 
    378 /* Globals */
    379 
    380 static ASTGlobal
    381 tokenstoASTGlobalFunction(Tokenizer *t)
    382 {
    383 	ASTGlobal global;
    384 	Token *tok;
    385 
    386 	global.type = ASTGlobalFunction_T;
    387 	newVector(global.Function.parameters);
    388 	tok = enextTokenType(t, TokenIdentifier);
    389 	global.Any.any.inittoken = tok;
    390 	if (Strccmp(tok->str, "function"))
    391 		error(tok, "expected 'function' keyword");
    392 
    393 	global.Function.name = tokenstoASTExpressionLiteral(t).Literal;
    394 
    395 	tok = enextTokenType(t, TokenOpeningParenthesis);
    396 	if ((tok = enextToken(t))->type != TokenClosingParenthesis) {
    397 		prevToken(t);
    398 		do {
    399 			pushVector(global.Function.parameters,
    400 					tokenstoASTStatementVariableDeclaration(t, 1).VariableDeclaration);
    401 		} while ((tok = enextToken(t))->type == TokenComma);
    402 		prevToken(t);
    403 		enextTokenType(t, TokenClosingParenthesis);
    404 	}
    405 
    406 	new(global.Function.body) = tokenstoASTStatement(t);
    407 
    408 	return global;
    409 }
    410 
    411 static ASTGlobal
    412 tokenstoASTGlobalExport(Tokenizer *t)
    413 {
    414 	ASTGlobal global;
    415 	Token *tok;
    416 
    417 	global.type = ASTGlobalExport_T;
    418 	tok = enextTokenType(t, TokenIdentifier);
    419 	global.Any.any.inittoken = tok;
    420 	if (Strccmp(tok->str, "export"))
    421 		error(tok, "expected 'export' keyword");
    422 
    423 	global.Export.name = tokenstoASTExpressionLiteral(t).Literal;
    424 
    425 	return global;
    426 }
    427 
    428 static ASTGlobal
    429 tokenstoASTGlobal(Tokenizer *t)
    430 {
    431 	ASTGlobal global;
    432 	Token *tok;
    433 
    434 	tok = enextToken(t);
    435 	global.Any.any.inittoken = tok;
    436 
    437 	if (0) {
    438 	} else if (tok->type == TokenIdentifier && !Strccmp(tok->str, "function")) {
    439 		prevToken(t);
    440 		global = tokenstoASTGlobalFunction(t);
    441 	} else if (tok->type == TokenIdentifier && !Strccmp(tok->str, "export")) {
    442 		prevToken(t);
    443 		global = tokenstoASTGlobalExport(t);
    444 	} else {
    445 		error(tok, "unexpected token");
    446 	}
    447 
    448 	return global;
    449 }
    450 
    451 ASTModule
    452 tokenstoASTModule(Token *tdata, size_t tlen)
    453 {
    454 	ASTModule module;
    455 	Token *tok;
    456 	Tokenizer t = {tdata, tlen, -1};
    457 
    458 	newVector(module);
    459 
    460 	while ((tok = nextToken(&t)) != NULL) {
    461 		prevToken(&t);
    462 		pushVector(module, tokenstoASTGlobal(&t));
    463 	}
    464 
    465 	return module;
    466 }