hyc

Hydrogen Compiler written in C
git clone git://git.kocotian.pl/hyc.git
Log | Files | Refs | README | LICENSE

commit 135b166a85ba2aaf255543f33b8a3c8e5a21976d
parent d14b327c8054091ef2b4d6b2baa3529664b88f41
Author: kocotian <kocotian@kocotian.pl>
Date:   Mon, 26 Jul 2021 08:34:38 +0000

AST, errors, Strdup()

Diffstat:
MMakefile | 2+-
Mast.c | 141+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aerr.c | 43+++++++++++++++++++++++++++++++++++++++++++
Mhyc.c | 10+++++++++-
Minclude/ast.h | 8++++++--
Ainclude/err.h | 29+++++++++++++++++++++++++++++
Minclude/str.h | 1+
Minclude/tokenize.h | 1+
Mstr.c | 9+++++++++
Mtokenize.c | 60+++++++++++++++++++++++++++++++++++-------------------------
10 files changed, 275 insertions(+), 29 deletions(-)

diff --git a/Makefile b/Makefile @@ -19,7 +19,7 @@ include config.mk all: hyc -hyc: hyc.c util.c str.c tokenize.c ast.c +hyc: hyc.c util.c err.c str.c tokenize.c ast.c ${CC} -std=c99 -pedantic -Wall -Wextra -Wconversion -Iinclude -o $@ $^ install: hyc diff --git a/ast.c b/ast.c @@ -19,3 +19,144 @@ */ #include <ast.h> +#include <err.h> +#include <stdlib.h> +#include <util.h> + +typedef struct { + Token *data; + size_t len; + ssize_t pos; +} Tokenizer; + +static Token *nextToken(Tokenizer *t); +static Token *enextToken(Tokenizer *t); +static Token *enextTokenType(Tokenizer *t, TokenType type); + +static ASTStatement tokenstoASTStatement(Tokenizer *t); + +static ASTGlobal tokenstoASTGlobalFunction(Tokenizer *t); + +static Token * +nextToken(Tokenizer *t) +{ + return ((unsigned)(++t->pos) < t->len) ? &(t->data[t->pos]) : NULL; +} + +static Token * +nextTokenType(Tokenizer *t, TokenType type) +{ + Token *tok; + if ((tok = nextToken(t)) != NULL) { + if (tok->type != type) + error(tok, "token type mismatch (expected %s, got %s)", + strTokenType(type), strTokenType(tok->type)); + } + return tok; +} + +static Token * +enextToken(Tokenizer *t) +{ + Token *tok; + if ((tok = nextToken(t)) == NULL) + error(tok, "unexpected end of input"); + return tok; +} + +static Token * +enextTokenType(Tokenizer *t, TokenType type) +{ + Token *tok; + tok = enextToken(t); + if (tok->type != type) + error(tok, "token type mismatch (expected %s, got %s)", + strTokenType(type), strTokenType(tok->type)); + return tok; +} + +/*****************************************************************************/ + +/* Expressions */ + +static ASTExpression +tokenstoASTExpression(Tokenizer *t) +{ + ASTExpression expr; + Token *tok; + tok = enextToken(t); + + if (0) { + /* Literals: */ + } else if (tok->type == TokenIdentifier) { + expr.type = ASTExpressionLiteralIdentifier_T; + expr.Literal.value = Strdup(tok->str).data; + } else if (tok->type == TokenInteger) { + expr.type = ASTExpressionLiteralInteger_T; + expr.Literal.value = Strdup(tok->str).data; + } else if (tok->type == TokenString) { + expr.type = ASTExpressionLiteralString_T; + expr.Literal.value = Strdup(tok->str).data; + } + + return expr; +} + +/* Statements */ + +static ASTStatement +tokenstoASTStatement(Tokenizer *t) +{ + ASTStatement stat; + Token *tok; + + tok = enextToken(t); + + if (0) { + } else if (tok->type == TokenSemicolon) { + stat.type = ASTStatementNoOp_T; + } + + return stat; +} + +/* Globals */ + +static ASTGlobal +tokenstoASTGlobalFunction(Tokenizer *t) +{ + ASTGlobal global; + Token *tok; + + global.type = ASTGlobalFunction_T; + global.Function.name = tokenstoASTExpression(t).Literal; + + tok = enextTokenType(t, TokenOpeningParenthesis); + while ((tok = enextToken(t)) != NULL) { + /* TODO: parameters */ + if (tok->type == TokenClosingParenthesis) + break; + } + + global.Function.body = tokenstoASTStatement(t); + + return global; +} + +ASTModule +tokenstoASTModule(Token *tdata, size_t tlen) +{ + ASTModule module; + Token *tok; + Tokenizer t = {tdata, tlen, -1}; + + newVector(module); + + while ((tok = nextTokenType(&t, TokenIdentifier)) != NULL) { + if (!Strccmp(tok->str, "function")) { + pushVector(module, tokenstoASTGlobalFunction(&t)); + } + } + + return module; +} diff --git a/err.c b/err.c @@ -0,0 +1,43 @@ +/* + hyc - Hydrogen Compiler written in C + Copyright (C) 2021 Kacper Kocot <kocotian@kocotian.pl> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +#include <err.h> +#include <stdio.h> +#include <stdlib.h> + +void +error(Token *t, const char *fmt, ...) +{ + va_list ap; + + if (t != NULL) + fprintf(stderr, "error:%ld:%ld: ", + t->line + 1, t->col + 1); + else + fprintf(stderr, "error: "); + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + fputc('\n', stderr); + + exit(1); +} diff --git a/hyc.c b/hyc.c @@ -26,6 +26,7 @@ #include <unistd.h> #include <arg.h> +#include <ast.h> #include <str.h> #include <tokenize.h> #include <util.h> @@ -95,14 +96,17 @@ main(int argc, char *argv[]) for (i = 0; i < argc; ++i) { Array(Token) tokens; + ASTModule module; if ((signed)(s.len = (unsigned)mapfile(argv[i], &(s.data))) < 0) die("mapfile(%s):", argv[i]); + /* write(STDOUT_FILENO, s.data, s.len); */ + if ((signed)(tokens.len = (unsigned)tokenize(s, &tokens.data)) < 0) die("tokenize(%s):", argv[i]); - /* write(STDOUT_FILENO, s.data, s.len); */ + /* { size_t j; for (j = 0; j < tokens.len; ++j) { @@ -111,6 +115,10 @@ main(int argc, char *argv[]) (int)tokens.data[j].str.len, tokens.data[j].str.data); } } + */ + + module = tokenstoASTModule(tokens.data, tokens.len); + free(tokens.data); munmap(s.data, s.len); } diff --git a/include/ast.h b/include/ast.h @@ -22,6 +22,7 @@ #define _AST_H #include <str.h> +#include <tokenize.h> /* Expressions */ struct ASTExpressionAny; @@ -109,7 +110,7 @@ typedef struct ASTGlobalAny { typedef struct ASTGlobalFunction { enum ASTGlobalType type; - String name; + struct ASTExpressionLiteral name; union ASTStatement body; } ASTGlobalFunction; @@ -122,9 +123,12 @@ typedef union ASTGlobal { /**/ typedef struct ASTModule { - enum ASTGlobalType type; union ASTGlobal *data; size_t len; } ASTModule; +/**/ + +ASTModule tokenstoASTModule(Token *tdata, size_t tlen); + #endif diff --git a/include/err.h b/include/err.h @@ -0,0 +1,29 @@ +/* + hyc - Hydrogen Compiler written in C + Copyright (C) 2021 Kacper Kocot <kocotian@kocotian.pl> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +#ifndef _ERR_H +#define _ERR_H + +#include <stdarg.h> +#include <tokenize.h> + +void error(Token *t, const char *fmt, ...); + +#endif diff --git a/include/str.h b/include/str.h @@ -30,6 +30,7 @@ typedef struct String { int Strcmp(String s1, String s2); int Strccmp(const String s, const char *cs); +String Strdup(const String s); #define Strevalf(S) (int)((S).len), (S).data diff --git a/include/tokenize.h b/include/tokenize.h @@ -37,6 +37,7 @@ typedef enum { typedef struct Token { TokenType type; String str; + size_t line, col; } Token; Token newToken(void); diff --git a/str.c b/str.c @@ -19,6 +19,7 @@ */ #include <str.h> +#include <stdlib.h> #include <string.h> int @@ -36,3 +37,11 @@ Strccmp(const String s, const char *cs) return (int)(s.len - strlen(cs)); return strncmp(s.data, cs, s.len); } + +String +Strdup(const String s) +{ + String r = { strncpy(malloc(s.len + 1), s.data, s.len), s.len }; + r.data[s.len] = 0; + return r; +} diff --git a/tokenize.c b/tokenize.c @@ -38,21 +38,30 @@ tokenize(String input, Token **output) #define CHARPLUS(N) (i + (N) < input.len ? input.data[i + (N)] : 0) #define CURTOK ((*output)[tokcount - 1]) #define TYPE(T) (CURTOK.type = Token##T) +#define SEEKCHAR (++i, ++col) { size_t tokcount; size_t i, initial_i; + size_t ln, col; *output = malloc(tokcount = 0); - i = 0; + i = ln = col = 0; while (NOT_OVERFLOW) { *output = realloc(*output, (++tokcount) * sizeof (**output)); CURTOK = newToken(); - while (isspace(CURCHAR) && NOT_OVERFLOW) - ++i; + while (isspace(CURCHAR) && NOT_OVERFLOW) { + if (CURCHAR == '\n') { + ++ln; col = 0; + ++i; + } else SEEKCHAR; + } CURTOK.str.data = input.data + i; + CURTOK.line = ln; + CURTOK.col = col; + initial_i = i; if (!NOT_OVERFLOW) { @@ -62,51 +71,51 @@ tokenize(String input, Token **output) || (CURCHAR >= 'A' && CURCHAR <= 'Z') || (CURCHAR == '_')) { TYPE(Identifier); - ++i; + SEEKCHAR; while (((CURCHAR >= 'a' && CURCHAR <= 'z') || (CURCHAR >= 'A' && CURCHAR <= 'Z') || (CURCHAR >= '0' && CURCHAR <= '9') || (CURCHAR == '_')) - && NOT_OVERFLOW) ++i; + && NOT_OVERFLOW) SEEKCHAR; /* Integer */ } else if (CURCHAR >= '0' && CURCHAR <= '9') { TYPE(Integer); - ++i; + SEEKCHAR; while ((CURCHAR >= '0' && CURCHAR <= '9') - && NOT_OVERFLOW) ++i; + && NOT_OVERFLOW) SEEKCHAR; /* String */ } else if (CURCHAR == '"') { TYPE(String); - ++i; - while (CURCHAR != '"' && NOT_OVERFLOW) ++i; - ++i; + SEEKCHAR; + while (CURCHAR != '"' && NOT_OVERFLOW) SEEKCHAR; + SEEKCHAR; /* Brackets */ - } else if (CURCHAR == '(') { TYPE(OpeningParenthesis); ++i; - } else if (CURCHAR == ')') { TYPE(ClosingParenthesis); ++i; - } else if (CURCHAR == '[') { TYPE(OpeningBracket); ++i; - } else if (CURCHAR == ']') { TYPE(ClosingBracket); ++i; - } else if (CURCHAR == '{') { TYPE(OpeningBrace); ++i; - } else if (CURCHAR == '}') { TYPE(ClosingBrace); ++i; + } else if (CURCHAR == '(') { TYPE(OpeningParenthesis); SEEKCHAR; + } else if (CURCHAR == ')') { TYPE(ClosingParenthesis); SEEKCHAR; + } else if (CURCHAR == '[') { TYPE(OpeningBracket); SEEKCHAR; + } else if (CURCHAR == ']') { TYPE(ClosingBracket); SEEKCHAR; + } else if (CURCHAR == '{') { TYPE(OpeningBrace); SEEKCHAR; + } else if (CURCHAR == '}') { TYPE(ClosingBrace); SEEKCHAR; /* Other single-char operators */ - } else if (CURCHAR == '*') { TYPE(Asterisk); ++i; - } else if (CURCHAR == '&') { TYPE(Amperstand); ++i; - } else if (CURCHAR == ';') { TYPE(Semicolon); ++i; - } else if (CURCHAR == ',') { TYPE(Comma); ++i; - } else if (CURCHAR == '.') { TYPE(Dot); ++i; + } else if (CURCHAR == '*') { TYPE(Asterisk); SEEKCHAR; + } else if (CURCHAR == '&') { TYPE(Amperstand); SEEKCHAR; + } else if (CURCHAR == ';') { TYPE(Semicolon); SEEKCHAR; + } else if (CURCHAR == ',') { TYPE(Comma); SEEKCHAR; + } else if (CURCHAR == '.') { TYPE(Dot); SEEKCHAR; } else if (CURCHAR == ':') { TYPE(Colon); if (NEXTCHAR == ':') { - TYPE(DoubleColon); ++i; + TYPE(DoubleColon); SEEKCHAR; } - ++i; + SEEKCHAR; } else { - ++i; + SEEKCHAR; } CURTOK.str.len = i - initial_i; } - return (signed)tokcount; + return (signed)--tokcount; } #undef NOT_OVERFLOW #undef CURCHAR @@ -114,6 +123,7 @@ tokenize(String input, Token **output) #undef CHARPLUS #undef CURTOK #undef TYPE +#undef SEEKCHAR char * strTokenType(TokenType type)