ls

simple syscall based programming language from scratch
git clone git://git.kocotian.pl/ls.git
Log | Files | Refs | README | LICENSE

grammar.c (9013B)


      1 /* See AUTHORS file for copyright details
      2    and LICENSE file for license details. */
      3 
      4 #include <stdio.h>
      5 #include <stdlib.h>
      6 #include <string.h>
      7 
      8 #include "grammar.h"
      9 #include "lsc.h"
     10 
     11 #define ASMCONCAT(...) { \
     12 	output = realloc(output, outsiz += \
     13 			snprintf(buffer, BUFSIZ, __VA_ARGS__)); \
     14 	strncat(output, buffer, outsiz); \
     15 }
     16 
     17 extern char    *contents;
     18 extern char    *output;
     19 extern size_t   outsiz;
     20 extern int      sciter;
     21 
     22 static char *
     23 g_typetostr(TokenType type)
     24 {
     25 	switch (type) {
     26 #include "tokentype.c"
     27 	default:
     28 		return "<unknown>"; break;
     29 	}
     30 }
     31 
     32 static int
     33 g_expecttype(Token token, TokenType type)
     34 {
     35 	if (token.type != type)
     36 		errwarn("expected \033[1m%s\033[0m, got \033[1m%s\033[0m", 1,
     37 				token, g_typetostr(type), g_typetostr(token.type));
     38 	return 0;
     39 }
     40 
     41 size_t
     42 g_expression(Token *tokens, size_t toksize)
     43 {
     44 	size_t i;
     45 	char buffer[BUFSIZ], *val;
     46 	i = 0;
     47 
     48 	if (tokens[i].type == TokenNumber) { /* number literal */
     49 		val = malloc(tokens[i].len + 1);
     50 		strncpy(val, contents + tokens[i].off, tokens[i].len);
     51 		val[tokens[i].len] = '\0';
     52 		ASMCONCAT("\tmov rax, %s\n", val)
     53 		free(val);
     54 		++i;
     55 	} else if (tokens[i].type == TokenString) { /* string literal */
     56 		val = malloc(tokens[i].len + 1);
     57 		strncpy(val, contents + tokens[i].off, tokens[i].len);
     58 		val[tokens[i].len] = '\0';
     59 		ASMCONCAT("section .rodata\n"
     60 			"\t.STR%d: db %s, 0\n"
     61 			"section .text\n"
     62 			"\tmov rax, .STR%d\n",
     63 			sciter, val, sciter);
     64 		++sciter;
     65 		free(val);
     66 		++i;
     67 	} else if (tokens[i].type == TokenIdentifier) { /* identifier literal */
     68 		char *pfnname = malloc(tokens[i].len + 1);
     69 		int syscallrax = -1;
     70 
     71 		strncpy(pfnname, contents + tokens[i].off,
     72 				tokens[i].len);
     73 		pfnname[tokens[i].len] = '\0';
     74 
     75 		if ((syscallrax = getsyscallbyname(pfnname)) < 0) {
     76 			val = malloc(tokens[i].len + 1);
     77 			strncpy(val, contents + tokens[i].off, tokens[i].len);
     78 			val[tokens[i].len] = '\0';
     79 			ASMCONCAT("\tmov rax, %s\n", val);
     80 			free(val);
     81 		}
     82 		++i;
     83 	} else if (tokens[i].type == TokenOpeningParenthesis) { /* (expression) */
     84 		++i;
     85 		i += g_expression(tokens + i, toksize - i);
     86 		g_expecttype(tokens[i++], TokenClosingParenthesis);
     87 	} else if (tokens[i].type == TokenMinusSign) { /* -sign-change */
     88 		++i;
     89 		i += g_expression(tokens + i, toksize - i);
     90 		ASMCONCAT("\tneg rax\n");
     91 	} else if (tokens[i].type == TokenExclamationMark) { /* !negation */
     92 		++i;
     93 		i += g_expression(tokens + i, toksize - i);
     94 	} else if (tokens[i].type == TokenIncrement) { /* ++pre-incrementation */
     95 		++i;
     96 		i += g_expression(tokens + i, toksize - i);
     97 	} else if (tokens[i].type == TokenDecrement) { /* --pre-decrementation */
     98 		++i;
     99 		i += g_expression(tokens + i, toksize - i);
    100 	}
    101 
    102 	/* expressions that starts with another expressions; TODO */
    103 	{
    104 		/* i += g_expression(tokens + i, toksize - i); */
    105 		if (tokens[i].type == TokenQuestionMark) { /* ternary expression */
    106 			++i;
    107 			i += g_expression(tokens + i, toksize - i);
    108 			g_expecttype(tokens[i++], TokenColon);
    109 			i += g_expression(tokens + i, toksize - i);
    110 		} else if (tokens[i].type == TokenIncrement) { /* post-incrementation++ */
    111 			++i;
    112 		} else if (tokens[i].type == TokenDecrement) { /* post-decrementation-- */
    113 			++i;
    114 		} else if (tokens[i].type == TokenOpeningBracket) { /* indexing[expr] */
    115 			++i;
    116 			i += g_expression(tokens + i, toksize - i);
    117 			g_expecttype(tokens[i++], TokenClosingBracket);
    118 		} else if (tokens[i].type == TokenOpeningParenthesis) { /* function(expr) */
    119 			char *pfnname = malloc(tokens[i - 1].len + 1);
    120 			int syscallrax = -1;
    121 			int ai = -1; /* arg iterator */
    122 
    123 			strncpy(pfnname, contents + tokens[i - 1].off,
    124 					tokens[i - 1].len);
    125 			pfnname[tokens[i - 1].len] = '\0';
    126 			if ((syscallrax = getsyscallbyname(pfnname)) < 0)
    127 				ASMCONCAT("\tmov r15, rax\n")
    128 			free(pfnname);
    129 
    130 			do {
    131 				++ai; ++i;
    132 				i += g_expression(tokens + i, toksize - i);
    133 				val = malloc(tokens[i].len + 1);
    134 				strncpy(val, contents + tokens[i].off, tokens[i].len);
    135 				val[tokens[i].len] = '\0';
    136 				ASMCONCAT("\tpush %s\n\tmov %s, rax\n",
    137 					ai == 0 ? "rdi" : ai == 1 ? "rsi" :
    138 					ai == 2 ? "rdx" : ai == 3 ? "r10" :
    139 					ai == 4 ? "r8" : ai == 5 ? "r9" : "rax",
    140 					ai == 0 ? "rdi" : ai == 1 ? "rsi" :
    141 					ai == 2 ? "rdx" : ai == 3 ? "r10" :
    142 					ai == 4 ? "r8" : ai == 5 ? "r9" : "rax");
    143 				free(val);
    144 			} while (tokens[i].type == TokenComma);
    145 			g_expecttype(tokens[i++], TokenClosingParenthesis);
    146 			if (syscallrax < 0)
    147 				ASMCONCAT("\tmov rax, r15\n\tcall rax\n")
    148 			else
    149 				ASMCONCAT("\tmov rax, %d\n\tsyscall\n", syscallrax)
    150 			while (ai >= 0) {
    151 				ASMCONCAT("\tpop %s\n",
    152 					ai == 0 ? "rdi" : ai == 1 ? "rsi" :
    153 					ai == 2 ? "rdx" : ai == 3 ? "r10" :
    154 					ai == 4 ? "r8" : ai == 5 ? "r9" : "rax");
    155 				--ai;
    156 			};
    157 		} else {
    158 			if (tokens[i].type == TokenAssignmentSign) { /* expr = expr */
    159 				++i;
    160 			} else if (tokens[i].type == TokenPlusSign) { /* expr + expr */
    161 				ASMCONCAT("\tpush rax\n")
    162 				++i;
    163 				i += g_expression(tokens + i, toksize - i);
    164 				ASMCONCAT("\tmov rbx, rax\n\tpop rax\n\tadd rax, rbx\n")
    165 			} else if (tokens[i].type == TokenMinusSign) { /* expr - expr */
    166 				ASMCONCAT("\tpush rax\n")
    167 				++i;
    168 				i += g_expression(tokens + i, toksize - i);
    169 				ASMCONCAT("\tmov rbx, rax\n\tpop rax\n\tsub rax, rbx\n")
    170 			} else if (tokens[i].type == TokenPlusEqualSign) { /* expr += expr */
    171 				++i;
    172 			} else if (tokens[i].type == TokenMinusEqualSign) { /* expr -= expr */
    173 				++i;
    174 			} else if (tokens[i].type == TokenLogicalOrSign) { /* expr || expr */
    175 				++i;
    176 			} else if (tokens[i].type == TokenLogicalAndSign) { /* expr && expr */
    177 				++i;
    178 			} else if (tokens[i].type == TokenLogicalEquSign) { /* expr == expr */
    179 				++i;
    180 			} else if (tokens[i].type == TokenLogicalNotEquSign) { /* expr != expr */
    181 				++i;
    182 			/* } else if (tokens[i].type == TokenComma) { /1* expr, expr *1/ */
    183 			/* 	++i; */
    184 			} else {
    185 				return i;
    186 			}
    187 			i += g_expression(tokens + i, toksize - i);
    188 		}
    189 	}
    190 
    191 	return i;
    192 }
    193 
    194 size_t
    195 g_statement(Token *tokens, size_t toksize)
    196 {
    197 	size_t i;
    198 	char buffer[BUFSIZ], *val;
    199 	i = 0;
    200 
    201 	if (tokens[i].type == TokenOpeningBrace) { /* compound */
    202 		++i;
    203 		while (tokens[i].type != TokenClosingBrace && i < toksize) {
    204 			i += g_statement(tokens + i, toksize - i); ++i;
    205 		}
    206 	} else if (tokens[i].type == TokenKeyword) {
    207 		if        (!strncmp(contents + tokens[i].off, "if",     2)) { /* conditional */
    208 			g_expecttype(tokens[++i], TokenOpeningParenthesis);
    209 			++i;
    210 			i += g_expression(tokens + i, toksize - i);
    211 			g_expecttype(tokens[i++], TokenClosingParenthesis);
    212 			i += g_statement(tokens + i, toksize - i);
    213 		} else if (!strncmp(contents + tokens[i].off, "while",  5)) { /* while loop */
    214 			g_expecttype(tokens[++i], TokenOpeningParenthesis);
    215 			++i;
    216 			i += g_expression(tokens + i, toksize - i);
    217 			g_expecttype(tokens[i++], TokenClosingParenthesis);
    218 			i += g_statement(tokens + i, toksize - i);
    219 		} else if (!strncmp(contents + tokens[i].off, "return", 6)) { /* return */
    220 			++i;
    221 			i += g_expression(tokens + i, toksize - i);
    222 			g_expecttype(tokens[i], TokenSemicolon);
    223 			ASMCONCAT("\tpop rbp\n\tret\n")
    224 		} else if (!strncmp(contents + tokens[i].off, "var",    3)) { /* variable */
    225 			do {
    226 				++i;
    227 				g_expecttype(tokens[i++], TokenIdentifier);
    228 				if (tokens[i].type == TokenAssignmentSign) {
    229 					++i;
    230 					i += g_expression(tokens + i, toksize - i);
    231 				}
    232 			} while (tokens[i].type == TokenComma);
    233 			g_expecttype(tokens[i], TokenSemicolon);
    234 		} else if (!strncmp(contents + tokens[i].off, "const",  5)) { /* constant */
    235 			do {
    236 				++i;
    237 				g_expecttype(tokens[i++], TokenIdentifier);
    238 				if (tokens[i].type == TokenAssignmentSign) {
    239 					++i;
    240 					i += g_expression(tokens + i, toksize - i);
    241 				}
    242 			} while (tokens[i].type == TokenComma);
    243 			g_expecttype(tokens[i], TokenSemicolon);
    244 		} else {
    245 			char buf[128];
    246 			snprintf(buf, tokens[i].len + 1, "%s", contents + tokens[i].off);
    247 			errwarn("unexpected keyword: \033[1m%s\033[0m", 1,
    248 					tokens[i], buf);
    249 		}
    250 	} else if (!strncmp(contents + tokens[i].off, "void", 4)) { /* expression */
    251 	} else if (tokens[i].type == TokenSemicolon) { /* noop */
    252 		++i;
    253 	} else {
    254 		i += g_expression(tokens + i, toksize - i);
    255 		g_expecttype(tokens[i], TokenSemicolon);
    256 	}
    257 
    258 	return i;
    259 }
    260 
    261 size_t
    262 g_function(Token *tokens, size_t toksize)
    263 {
    264 	char *func_name;
    265 	char buffer[BUFSIZ];
    266 
    267 	size_t i, rb;
    268 	i = 0;
    269 	g_expecttype(tokens[i++], TokenIdentifier);
    270 	func_name = strndup(contents + tokens[i - 1].off, tokens[i - 1].len);
    271 	g_expecttype(tokens[i], TokenOpeningParenthesis);
    272 	if (tokens[i + 1].type == TokenKeyword
    273 	&& !strncmp(contents + tokens[i + 1].off, "void", 4))
    274 		i += 2;
    275 	else do {
    276 		++i;
    277 		g_expecttype(tokens[i++], TokenIdentifier);
    278 	} while (tokens[i].type == TokenComma);
    279 	g_expecttype(tokens[i++], TokenClosingParenthesis);
    280 
    281 	ASMCONCAT("\n%s:\n\tpush rbp\n\tmov rbp, rsp\n",
    282 			func_name);
    283 	free(func_name);
    284 
    285 	i += g_statement(tokens + i, toksize - i);
    286 
    287 	ASMCONCAT("\tpop rbp\n\tret\n");
    288 
    289 	return ++i;
    290 }
    291 
    292 size_t
    293 g_main(Token *tokens, size_t toksize)
    294 {
    295 	size_t i = 0;
    296 	while (i < toksize)
    297 		i += g_function(tokens + i, toksize - i);
    298 
    299 	return i;
    300 }