ls

simple syscall based programming language from scratch
git clone git://git.kocotian.pl/ls.git
Log | Files | Refs | README | LICENSE

lsc.c (7301B)


      1 /* See AUTHORS file for copyright details
      2  * and LICENSE file for license details.
      3  *
      4  * LinuxScript compiler is a simple, small compiler
      5  * for LinuxScript programming language wrote in
      6  * a suckless way.
      7  *
      8  * Consult README for more information about a language
      9  */
     10 
     11 #include <stdlib.h>
     12 #include <string.h>
     13 #include <unistd.h>
     14 
     15 #include "arg.h"
     16 #include "syscalls.h"
     17 #include "util.h"
     18 
     19 #define BUFSIZ 8192
     20 
     21 #include "tokenmacros.h"
     22 
     23 #include "lsc.h"
     24 #include "grammar.h"
     25 
     26 static ssize_t parseline(char *input, size_t ilen, size_t off, Token **tokens, size_t *toksiz, size_t *tokiter);
     27 static void usage(void);
     28 
     29 char *argv0;
     30 
     31 char *filename, *line;
     32 int fileline;
     33 
     34 char *contents;
     35 char *output;
     36 size_t outsiz;
     37 
     38 int sciter; /* string const iter */
     39 
     40 int
     41 getsyscallbyname(char *name)
     42 {
     43 	size_t i;
     44 	for (i = 0; i < (sizeof(syscalls) / sizeof(*syscalls)); ++i)
     45 		if (!strcmp(syscalls[i], name))
     46 			return (int)i;
     47 	return -1;
     48 }
     49 
     50 static ssize_t
     51 parseline(char *input, size_t ilen, size_t off, Token **tokens, size_t *toksiz, size_t *tokiter)
     52 {
     53 	TokenType type;
     54 	size_t i, j, li, valstart;
     55 	char ch;
     56 
     57 	line = input;
     58 
     59 	for (i = j = li = type = 0; i < ilen; ++i, ++j, ++li) {
     60 		if ((*tokiter >= (*toksiz - 1)))
     61 			*tokens = realloc(*tokens, sizeof(Token) * (*toksiz += 128));
     62 		ch = input[i];
     63 		if (!type) {
     64 			(*tokens)[*tokiter].file = filename;
     65 			(*tokens)[*tokiter].line = fileline;
     66 			(*tokens)[*tokiter].col = valstart - off + 1;
     67 			valstart = off + i;
     68 			if (ISNUM(ch))
     69 				type = TokenNumber;
     70 			else if (ISIDENSTARTCHAR(ch))
     71 				type = TokenIdentifier;
     72 			else if (ISQUOT(ch))
     73 				type = TokenString;
     74 			else if (ISEQUSIGN(ch)) { /* TODO: make this less bloated */
     75 				type = TokenAssignmentSign;
     76 				(*tokens)[*tokiter].off = valstart;
     77 				if (ISEQUSIGN(input[i + 1])) { /* == */
     78 					++i;
     79 					(*tokens)[*tokiter].len = ++j + 1;
     80 					(*tokens)[(*tokiter)++].type = TokenLogicalEquSign;
     81 				} else {
     82 					(*tokens)[*tokiter].len = j + 1;
     83 					(*tokens)[(*tokiter)++].type = TokenAssignmentSign;
     84 				}
     85 				type = TokenNull;
     86 				j = -1;
     87 			} else if (ISPLUSSIGN(ch)) {
     88 				type = TokenPlusSign;
     89 				(*tokens)[*tokiter].off = valstart;
     90 				if (ISEQUSIGN(input[i + 1])) { /* += */
     91 					++i;
     92 					(*tokens)[*tokiter].len = ++j + 1;
     93 					(*tokens)[(*tokiter)++].type = TokenPlusEqualSign;
     94 				} else if (ISPLUSSIGN(input[i + 1])) { /* ++ */
     95 					++i;
     96 					(*tokens)[*tokiter].len = ++j + 1;
     97 					(*tokens)[(*tokiter)++].type = TokenIncrement;
     98 				} else {
     99 					(*tokens)[*tokiter].len = j + 1;
    100 					(*tokens)[(*tokiter)++].type = TokenPlusSign;
    101 				}
    102 				type = TokenNull;
    103 				j = -1;
    104 			} else if (ISMINUSSIGN(ch)) {
    105 				type = TokenMinusSign;
    106 				(*tokens)[*tokiter].off = valstart;
    107 				if (ISEQUSIGN(input[i + 1])) { /* -= */
    108 					++i;
    109 					(*tokens)[*tokiter].len = ++j + 1;
    110 					(*tokens)[(*tokiter)++].type = TokenMinusEqualSign;
    111 				} else if (ISMINUSSIGN(input[i + 1])) { /* -- */
    112 					++i;
    113 					(*tokens)[*tokiter].len = ++j + 1;
    114 					(*tokens)[(*tokiter)++].type = TokenDecrement;
    115 				} else {
    116 					(*tokens)[*tokiter].len = j + 1;
    117 					(*tokens)[(*tokiter)++].type = TokenMinusSign;
    118 				}
    119 				type = TokenNull;
    120 				j = -1;
    121 			} else if (ISORSIGN(ch)) {
    122 				type = TokenOrSign;
    123 				(*tokens)[*tokiter].off = valstart;
    124 				if (ISORSIGN(input[i + 1])) { /* || */
    125 					++i;
    126 					(*tokens)[*tokiter].len = ++j + 1;
    127 					(*tokens)[(*tokiter)++].type = TokenLogicalOrSign;
    128 				} else {
    129 					(*tokens)[*tokiter].len = j + 1;
    130 					(*tokens)[(*tokiter)++].type = TokenOrSign;
    131 				}
    132 				type = TokenNull;
    133 				j = -1;
    134 			} else if (ISANDSIGN(ch)) {
    135 				type = TokenOrSign;
    136 				(*tokens)[*tokiter].off = valstart;
    137 				if (ISANDSIGN(input[i + 1])) { /* && */
    138 					++i;
    139 					(*tokens)[*tokiter].len = ++j + 1;
    140 					(*tokens)[(*tokiter)++].type = TokenLogicalAndSign;
    141 				} else {
    142 					(*tokens)[*tokiter].len = j + 1;
    143 					(*tokens)[(*tokiter)++].type = TokenAndSign;
    144 				}
    145 				type = TokenNull;
    146 				j = -1;
    147 			} else if (ISEXCLAMATIONSIGN(ch)) {
    148 				type = TokenExclamationMark;
    149 				(*tokens)[*tokiter].off = valstart;
    150 				if (ISEQUSIGN(input[i + 1])) { /* != */
    151 					++i;
    152 					(*tokens)[*tokiter].len = ++j + 1;
    153 					(*tokens)[(*tokiter)++].type = TokenLogicalNotEquSign;
    154 				} else {
    155 					(*tokens)[*tokiter].len = j + 1;
    156 					(*tokens)[(*tokiter)++].type = TokenExclamationMark;
    157 				}
    158 				type = TokenNull;
    159 				j = -1;
    160 			} else if (ISLINECOMMSTARTCHAR(ch)) {
    161 				break;
    162 			} else if (ISIGNORABLE(ch)) {
    163 				--j;
    164 				continue;
    165 			} else if (ISOPPAR(ch) || ISOPBRK(ch) || ISOPBRC(ch)
    166 					|| ISCLPAR(ch) || ISCLBRK(ch) || ISCLBRC(ch)
    167 					|| ISCOLON(ch) || ISSEMICOLON(ch)
    168 					|| ISCOMM(ch) || ISQUESTIONSIGN(ch)) {
    169 				(*tokens)[*tokiter].off = valstart;
    170 				(*tokens)[*tokiter].len = j + 1;
    171 				(*tokens)[(*tokiter)++].type =
    172 					ISQUESTIONSIGN(ch) ? TokenQuestionMark :
    173 					ISOPPAR(ch) ? TokenOpeningParenthesis :
    174 					ISOPBRK(ch) ? TokenOpeningBracket :
    175 					ISOPBRC(ch) ? TokenOpeningBrace :
    176 					ISCLPAR(ch) ? TokenClosingParenthesis :
    177 					ISCLBRK(ch) ? TokenClosingBracket :
    178 					ISCLBRC(ch) ? TokenClosingBrace :
    179 					ISCOLON(ch) ? TokenColon :
    180 					ISCOMM(ch) ? TokenComma :
    181 					TokenSemicolon;
    182 				type = TokenNull;
    183 				j = -1;
    184 			} else
    185 				errwarn("unexpected character: \033[1m%c \033[0m(\033[1m\\%o\033[0m)",
    186 						1, (*tokens)[*tokiter], ch, ch & 0xff);
    187 		} else if ((type == TokenNumber && !ISNUMCHAR(ch))
    188 		|| (type == TokenIdentifier && !ISIDENCHAR(ch))
    189 		|| (type == TokenString && ISQUOT(ch))) {
    190 			(*tokens)[*tokiter].file = filename;
    191 			(*tokens)[*tokiter].line = fileline;
    192 			(*tokens)[*tokiter].col = valstart - off + 1;
    193 			(*tokens)[*tokiter].off = valstart;
    194 			(*tokens)[*tokiter].len = j + (type == TokenString ? 1 : 0);
    195 			if (!strncmp(input + (valstart - off), "if", 2)
    196 			||  !strncmp(input + (valstart - off), "while", 5)
    197 			||  !strncmp(input + (valstart - off), "return", 6)
    198 			||  !strncmp(input + (valstart - off), "var", 3)
    199 			||  !strncmp(input + (valstart - off), "const", 5)
    200 			||  !strncmp(input + (valstart - off), "void", 4))
    201 				type = TokenKeyword;
    202 			(*tokens)[(*tokiter)++].type = type;
    203 			if (type != TokenString) --i;
    204 			type = TokenNull;
    205 			j = -1;
    206 		}
    207 	}
    208 
    209 	return i;
    210 }
    211 
    212 static void
    213 usage(void)
    214 {
    215 	die("usage: %s", argv0);
    216 }
    217 
    218 int
    219 main(int argc, char *argv[])
    220 {
    221 	char buffer[BUFSIZ];
    222 	ssize_t rb;
    223 	size_t csiz, toksiz, tokiter;
    224 	int lindex;
    225 	Token *tokens;
    226 
    227 	ARGBEGIN {
    228 	default:
    229 		usage();
    230 	} ARGEND
    231 
    232 	contents = malloc(csiz = 0);
    233 	tokens = malloc(sizeof(*tokens) * (toksiz = 128));
    234 
    235 	filename = "<stdin>";
    236 	sciter = 0;
    237 
    238 	for (rb = lindex = tokiter = 0; (rb = nextline(0, buffer, BUFSIZ)) > 0; ++lindex) {
    239 		contents = realloc(contents, csiz += rb);
    240 		memcpy(contents + (csiz - rb), buffer, rb);
    241 		fileline = lindex + 1;
    242 		parseline(contents + (csiz - rb), rb, (csiz - rb), &tokens, &toksiz, &tokiter);
    243 	}
    244 
    245 	{
    246 		const char space = ' ';
    247 		int j;
    248 		for (j = 0; j < tokiter; ++j) {
    249 			write(2, contents + tokens[j].off, tokens[j].len);
    250 			write(2, &space, 1);
    251 		}
    252 		write(2, "\n", 1);
    253 	}
    254 
    255 
    256 	output = malloc(outsiz = snprintf(buffer, BUFSIZ, "BITS 64\n"
    257 				"section .text\nglobal _start\n_start:\n"
    258 				"\tcall main\n\tmov rax, 60\n\tmov rdi, 0\n"
    259 				"\tsyscall\n\tret\n") + 1);
    260 
    261 	memcpy(output, buffer, outsiz);
    262 	memcpy(output + outsiz, "", 1);
    263 
    264 	g_main(tokens, tokiter);
    265 	write(1, output, outsiz - 1);
    266 
    267 	free(output);
    268 	free(tokens);
    269 	free(contents);
    270 }