ls

simple syscall based programming language from scratch
git clone git://git.kocotian.pl/ls.git
Log | Files | Refs | README | LICENSE

commit 9967b339eeeb35683e08e637c14d78c8370a752a
parent eb123ac750ccfbd670963eafa08411d6593d3db5
Author: kocotian <kocotian@kocotian.pl>
Date:   Sun, 28 Feb 2021 13:54:55 +0100

parsing finalized with rest of tokens, started with expressions in grammar

Diffstat:
Mgrammar.c | 17++++++++++++++---
Mgrammar.h | 4++--
Mlsc.c | 114+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Atokenmacros.h | 30++++++++++++++++++++++++++++++
Mtokentypes | 32++++++++++++++++++++++++++------
5 files changed, 160 insertions(+), 37 deletions(-)

diff --git a/grammar.c b/grammar.c @@ -36,7 +36,18 @@ g_expression(Token *tokens, size_t toksize) i = 0; /* temporarily, expression can be a number only; TODO */ - g_expecttype(tokens[i++], TokenNumber); + if (tokens[i].type == TokenNumber) { /* number literal */ + ++i; + } else if (tokens[i].type == TokenString) { /* string literal */ + ++i; + } else if (tokens[i].type == TokenIdentifier) { /* identifier literal */ + ++i; + } else if (tokens[i].type == TokenOpeningParenthesis) { /* (expression) */ + i += g_expression(tokens + i, toksize - i); + g_expecttype(tokens[i++], TokenClosingParenthesis); + } else { /* expressions that starts with another expressions; TODO */ + i += g_expression(tokens + i, toksize - i); + } return i; } @@ -72,7 +83,7 @@ g_statement(Token *tokens, size_t toksize) do { ++i; g_expecttype(tokens[i++], TokenIdentifier); - if (tokens[i].type == TokenEqualSign) { + if (tokens[i].type == TokenAssignmentSign) { ++i; i += g_expression(tokens + i, toksize - i); } @@ -82,7 +93,7 @@ g_statement(Token *tokens, size_t toksize) do { ++i; g_expecttype(tokens[i++], TokenIdentifier); - if (tokens[i].type == TokenEqualSign) { + if (tokens[i].type == TokenAssignmentSign) { ++i; i += g_expression(tokens + i, toksize - i); } diff --git a/grammar.h b/grammar.h @@ -3,7 +3,7 @@ #include "lsc.h" -static int g_expecttype(Token token, TokenType type); - +size_t g_expression(Token *tokens, size_t toksize); +size_t g_statement(Token *tokens, size_t toksize); size_t g_function(Token *tokens, size_t toksize); size_t g_main(Token *tokens, size_t toksize); diff --git a/lsc.c b/lsc.c @@ -18,28 +18,7 @@ #define BUFSIZ 8192 -#define ISLOW(ch) ((ch) > 0x60 && (ch) < 0x7b) -#define ISUPP(ch) ((ch) > 0x40 && (ch) < 0x5b) -#define ISNUM(ch) ((ch) > 0x2f && (ch) < 0x3a) -#define ISUND(ch) ((ch) == 0x5f) -#define ISOPPAR(ch) ((ch) == 0x28) -#define ISOPBRK(ch) ((ch) == 0x5b) -#define ISOPBRC(ch) ((ch) == 0x7b) -#define ISCLPAR(ch) ((ch) == 0x29) -#define ISCLBRK(ch) ((ch) == 0x5d) -#define ISCLBRC(ch) ((ch) == 0x7d) -#define ISQUOT(ch) ((ch) == 0x22) -#define ISCOMM(ch) ((ch) == 0x2c) -#define ISEQUSIGN(ch) ((ch) == 0x3d) -#define ISSEMICOLON(ch) ((ch) == 0x3b) - -#define ISIGNORABLE(ch) ((ch) > 0x00 && (ch) < 0x21) -#define ISLINECOMMSTARTCHAR(ch) ((ch) == 0x23) - -#define ISIDENSTARTCHAR(ch) (ISUND(ch) || ISLOW(ch) || ISUPP(ch)) -#define ISIDENCHAR(ch) (ISIDENSTARTCHAR(ch) || ISNUM(ch)) -#define ISNUMCHAR(ch) (ISNUM(ch) || ((ch) > 0x60 && (ch) < 0x67) || \ - ((ch) > 0x40 && (ch) < 0x47)) +#include "tokenmacros.h" #include "lsc.h" #include "grammar.h" @@ -87,14 +66,98 @@ parseline(char *input, size_t ilen, size_t off, Token **tokens, size_t *toksiz, type = TokenIdentifier; else if (ISQUOT(ch)) type = TokenString; - else if (ISLINECOMMSTARTCHAR(ch)) { + else if (ISEQUSIGN(ch)) { /* TODO: make this less bloated */ + type = TokenAssignmentSign; + (*tokens)[*tokiter].off = valstart; + if (ISEQUSIGN(input[i + 1])) { /* == */ + ++i; + (*tokens)[*tokiter].len = ++j + 1; + (*tokens)[(*tokiter)++].type = TokenLogicalEquSign; + } else { + (*tokens)[*tokiter].len = j + 1; + (*tokens)[(*tokiter)++].type = TokenAssignmentSign; + } + type = TokenNull; + j = -1; + } else if (ISPLUSSIGN(ch)) { + type = TokenPlusSign; + (*tokens)[*tokiter].off = valstart; + if (ISEQUSIGN(input[i + 1])) { /* += */ + ++i; + (*tokens)[*tokiter].len = ++j + 1; + (*tokens)[(*tokiter)++].type = TokenPlusEqualSign; + } else if (ISPLUSSIGN(input[i + 1])) { /* ++ */ + (*tokens)[*tokiter].len = ++j + 1; + (*tokens)[(*tokiter)++].type = TokenIncrement; + } else { + (*tokens)[*tokiter].len = j + 1; + (*tokens)[(*tokiter)++].type = TokenPlusSign; + } + type = TokenNull; + j = -1; + } else if (ISMINUSSIGN(ch)) { + type = TokenMinusSign; + (*tokens)[*tokiter].off = valstart; + if (ISEQUSIGN(input[i + 1])) { /* -= */ + ++i; + (*tokens)[*tokiter].len = ++j + 1; + (*tokens)[(*tokiter)++].type = TokenMinusEqualSign; + } else if (ISMINUSSIGN(input[i + 1])) { /* -- */ + (*tokens)[*tokiter].len = ++j + 1; + (*tokens)[(*tokiter)++].type = TokenDecrement; + } else { + (*tokens)[*tokiter].len = j + 1; + (*tokens)[(*tokiter)++].type = TokenMinusSign; + } + type = TokenNull; + j = -1; + } else if (ISORSIGN(ch)) { + type = TokenOrSign; + (*tokens)[*tokiter].off = valstart; + if (ISORSIGN(input[i + 1])) { /* || */ + ++i; + (*tokens)[*tokiter].len = ++j + 1; + (*tokens)[(*tokiter)++].type = TokenLogicalOrSign; + } else { + (*tokens)[*tokiter].len = j + 1; + (*tokens)[(*tokiter)++].type = TokenOrSign; + } + type = TokenNull; + j = -1; + } else if (ISANDSIGN(ch)) { + type = TokenOrSign; + (*tokens)[*tokiter].off = valstart; + if (ISANDSIGN(input[i + 1])) { /* && */ + ++i; + (*tokens)[*tokiter].len = ++j + 1; + (*tokens)[(*tokiter)++].type = TokenLogicalAndSign; + } else { + (*tokens)[*tokiter].len = j + 1; + (*tokens)[(*tokiter)++].type = TokenAndSign; + } + type = TokenNull; + j = -1; + } else if (ISEXCLAMATIONSIGN(ch)) { + type = TokenExclamationMark; + (*tokens)[*tokiter].off = valstart; + if (ISEQUSIGN(input[i + 1])) { /* != */ + ++i; + (*tokens)[*tokiter].len = ++j + 1; + (*tokens)[(*tokiter)++].type = TokenLogicalNotEquSign; + } else { + (*tokens)[*tokiter].len = j + 1; + (*tokens)[(*tokiter)++].type = TokenExclamationMark; + } + type = TokenNull; + j = -1; + } else if (ISLINECOMMSTARTCHAR(ch)) { break; } else if (ISIGNORABLE(ch)) { --j; continue; } else if (ISOPPAR(ch) || ISOPBRK(ch) || ISOPBRC(ch) || ISCLPAR(ch) || ISCLBRK(ch) || ISCLBRC(ch) - || ISCOMM(ch) || ISSEMICOLON(ch) || ISEQUSIGN(ch)) { + || ISCOMM(ch) || ISSEMICOLON(ch)) { (*tokens)[*tokiter].off = valstart; (*tokens)[*tokiter].len = j + 1; (*tokens)[(*tokiter)++].type = @@ -105,8 +168,7 @@ parseline(char *input, size_t ilen, size_t off, Token **tokens, size_t *toksiz, ISCLBRK(ch) ? TokenClosingBracket : ISCLBRC(ch) ? TokenClosingBrace : ISCOMM(ch) ? TokenComma : - ISSEMICOLON(ch) ? TokenSemicolon : - TokenEqualSign; + TokenSemicolon; type = TokenNull; j = -1; } else diff --git a/tokenmacros.h b/tokenmacros.h @@ -0,0 +1,30 @@ +#define ISLOW(ch) ((ch) > 0x60 && (ch) < 0x7b) +#define ISUPP(ch) ((ch) > 0x40 && (ch) < 0x5b) +#define ISNUM(ch) ((ch) > 0x2f && (ch) < 0x3a) +#define ISUND(ch) ((ch) == 0x5f) +#define ISOPPAR(ch) ((ch) == 0x28) +#define ISOPBRK(ch) ((ch) == 0x5b) +#define ISOPBRC(ch) ((ch) == 0x7b) +#define ISCLPAR(ch) ((ch) == 0x29) +#define ISCLBRK(ch) ((ch) == 0x5d) +#define ISCLBRC(ch) ((ch) == 0x7d) +#define ISQUOT(ch) ((ch) == 0x22) +#define ISCOMM(ch) ((ch) == 0x2c) +#define ISCOLON(ch) ((ch) == 0x3a) +#define ISSEMICOLON(ch) ((ch) == 0x3b) + +#define ISEQUSIGN(ch) ((ch) == 0x3d) +#define ISPLUSSIGN(ch) ((ch) == 0x2b) +#define ISMINUSSIGN(ch) ((ch) == 0x2d) + +#define ISORSIGN(ch) ((ch) == 0x7c) +#define ISANDSIGN(ch) ((ch) == 0x26) +#define ISEXCLAMATIONSIGN(ch) ((ch) == 0x21) + +#define ISIGNORABLE(ch) ((ch) > 0x00 && (ch) < 0x21) +#define ISLINECOMMSTARTCHAR(ch) ((ch) == 0x23) + +#define ISIDENSTARTCHAR(ch) (ISUND(ch) || ISLOW(ch) || ISUPP(ch)) +#define ISIDENCHAR(ch) (ISIDENSTARTCHAR(ch) || ISNUM(ch)) +#define ISNUMCHAR(ch) (ISNUM(ch) || ((ch) > 0x60 && (ch) < 0x67) || \ + ((ch) > 0x40 && (ch) < 0x47)) diff --git a/tokentypes b/tokentypes @@ -10,10 +10,6 @@ TokenIdentifier TokenString TokenKeyword -# "punctuation" -TokenComma -TokenSemicolon - # any brackets TokenOpeningParenthesis TokenOpeningBracket @@ -22,5 +18,29 @@ TokenClosingParenthesis TokenClosingBracket TokenClosingBrace -# operators -TokenEqualSign +# "punctuation" +TokenComma +TokenSemicolon +TokenColon + +# binary operators +TokenAssignmentSign +TokenLogicalEquSign + +TokenPlusSign +TokenPlusEqualSign +TokenIncrement + +TokenMinusSign +TokenMinusEqualSign +TokenDecrement + +TokenOrSign +TokenLogicalOrSign +TokenAndSign +TokenLogicalAndSign + +TokenExclamationMark +TokenLogicalNotEquSign + +TokenQuestionMark