ls

simple syscall based programming language from scratch
git clone git://git.kocotian.pl/ls.git
Log | Files | Refs | README

commit 6820ec8440f6beee5ed2fc224df922b9f94bf39e
parent 1c968edc792e4ecfeddf806cd592ddebb092113c
Author: kocotian <kocotian@kocotian.pl>
Date:   Fri, 26 Feb 2021 18:00:27 +0100

gramar, many changes in parsing

Diffstat:
MMakefile | 16+++++++++++++---
Mconfig.mk | 2+-
Agentokentypes | 19+++++++++++++++++++
Agrammar.c | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrammar.h | 6++++++
Mlsc.c | 124++++++++++++++++++++++++++++++++++++-------------------------------------------
Alsc.h | 21+++++++++++++++++++++
Atokentypes | 18++++++++++++++++++
Mutil.c | 20++++++++++++++++++++
Mutil.h | 1+
10 files changed, 220 insertions(+), 71 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,6 +1,6 @@ include config.mk -SRC = lsc.c util.c +SRC = lsc.c util.c grammar.c OBJ = ${SRC:.c=.o} all: options lsc @@ -14,12 +14,22 @@ options: .c.o: ${CC} -c -ggdb ${CFLAGS} ${CPPFLAGS} $< -${OBJ}: config.mk +${OBJ}: config.mk tokentype.h tokentype.c -lsc: ${OBJ} +lsc: ${OBJ} lsc.h ${CC} -o $@ ${OBJ} clean: rm -f lsc ${OBJ} +grammar.c: tokentype.c + +lsc.h: tokentype.h + +tokentype.h: tokentypes + ./gentokentypes + +tokentype.c: tokentypes + ./gentokentypes + .PHONY: all options clean diff --git a/config.mk b/config.mk @@ -3,7 +3,7 @@ PREFIX = /usr/local MANPREFIX = ${PREFIX}/share/man # flags -CFLAGS = -std=c99 -Wall -Wextra -Os +CFLAGS = -std=c99 # -Wall -Wextra -Os CPPFLAGS = -D_DEFAULT_SOURCE -D_XOPEN_SOURCE=700 # compiler and linker diff --git a/gentokentypes b/gentokentypes @@ -0,0 +1,19 @@ +#!/bin/sh +tokens="$(cat tokentypes | + sed -E 's/#.*//g;/^\s*$/d')" + +( +echo -n "typedef enum { " +echo "$tokens" | while read token +do + echo -n "$token, " +done +echo -n "} TokenType;" +) > tokentype.h + +( +echo "$tokens" | while read token +do + echo " case $token: return \"${token##Token}\"; break;" +done +) > tokentype.c diff --git a/grammar.c b/grammar.c @@ -0,0 +1,64 @@ +#include <stdio.h> + +#include "grammar.h" + +static char * +g_typetostr(TokenType type) +{ + switch (type) { +#include "tokentype.c" + default: + return "<unknown>"; break; + } +} + +static int +g_expecttype(Token token, TokenType type) +{ + if (token.type != type) + errwarn("expected \033[1m%s\033[0m, got \033[1m%s\033[0m", 1, + g_typetostr(type), g_typetostr(token.type)); + return 0; +} + +size_t +g_statement(Token *tokens, size_t toksize) +{ + size_t i; + i = 0; + if (tokens[i].type == TokenBrace) { + ++i; + while (tokens[i].type != TokenBrace && i < toksize); + g_statement(tokens + i, toksize - i); ++i; + } +} + +size_t +g_function(Token *tokens, size_t toksize) +{ + size_t i; + i = 0; + fputs("identifier\n", stderr); + g_expecttype(tokens[i++], TokenIdentifier); + fputs("parenthesis\n", stderr); + g_expecttype(tokens[i], TokenParenthesis); + fputs("loop:\n", stderr); + do { + ++i; + fputs("\tidentifier\n", stderr); + g_expecttype(tokens[i++], TokenIdentifier); + } while (tokens[i].type == TokenComma); + fputs("parenthesis\n", stderr); + g_expecttype(tokens[i++], TokenParenthesis); + g_statement(tokens + i, toksize - i); + return i; +} + +size_t +g_main(Token *tokens, size_t toksize) +{ + size_t i; + for (i = 0; i < toksize; ++i) { + i += g_function(tokens + i, toksize - i); + } +} diff --git a/grammar.h b/grammar.h @@ -0,0 +1,6 @@ +#include "lsc.h" + +static int g_expecttype(Token token, TokenType type); + +size_t g_function(Token *tokens, size_t toksize); +size_t g_main(Token *tokens, size_t toksize); diff --git a/lsc.c b/lsc.c @@ -1,5 +1,4 @@ #include <stdarg.h> -#include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -28,30 +27,14 @@ #define ISNUMCHAR(ch) (ISNUM(ch) || ((ch) > 0x60 && (ch) < 0x67) || \ ((ch) > 0x40 && (ch) < 0x47)) -typedef enum { - TokenNull, - TokenNumber, TokenIdentifier, TokenString, - TokenComma, - TokenParenthesis, TokenBracket, TokenBrace, - TokenEqualSign -} TokenType; - -typedef struct { - char *val; - size_t len; - TokenType type; -} Token; - -static int getsyscallbyname(char *name); -static ssize_t parseline(char *input, size_t ilen, char *output, size_t olen, - int lnum, char *filename); -static void usage(void); - -static void errwarn(const char *fmt, int iserror, const char *filename, - const char *line, int fileline, int filecol, ...); +#include "lsc.h" +#include "grammar.h" char *argv0; +char *filename, *line; +int fileline; + static int getsyscallbyname(char *name) { @@ -63,18 +46,20 @@ getsyscallbyname(char *name) } static ssize_t -parseline(char *input, size_t ilen, char *output, size_t olen, int lnum, char *filename) +parseline(char *input, size_t ilen, size_t off, Token **tokens, size_t *toksiz, size_t *tokiter) { TokenType type; - size_t i, j, li, tokiter; - char ch, *valstart; - *output = '\0'; - Token *tokens = malloc(sizeof(Token) * 128); + size_t i, j, li, valstart; + char ch; + + line = input; - for (tokiter = i = j = li = type = 0; i < ilen; ++i, ++j, ++li) { + for (i = j = li = type = 0; i < ilen; ++i, ++j, ++li) { + if ((*tokiter >= (*toksiz - 1))) + *tokens = realloc(*tokens, sizeof(Token) * (*toksiz += 128)); ch = input[i]; if (!type) { - valstart = input + i; + valstart = off + i; if (ISNUM(ch)) type = TokenNumber; else if (ISIDENSTARTCHAR(ch)) @@ -87,9 +72,12 @@ parseline(char *input, size_t ilen, char *output, size_t olen, int lnum, char *f --j; continue; } else if (ISPAR(ch) || ISBRK(ch) || ISBRC(ch) || ISCOMM(ch) || ISEQUSIGN(ch)) { - tokens[tokiter].val = valstart; - tokens[tokiter].len = j + 1; - tokens[tokiter++].type = + (*tokens)[*tokiter].file = filename; + (*tokens)[*tokiter].line = fileline; + (*tokens)[*tokiter].col = valstart - off + 1; + (*tokens)[*tokiter].off = valstart; + (*tokens)[*tokiter].len = j + 1; + (*tokens)[(*tokiter)++].type = ISPAR(ch) ? TokenParenthesis : ISBRK(ch) ? TokenBracket : ISBRC(ch) ? TokenBrace : @@ -99,27 +87,22 @@ parseline(char *input, size_t ilen, char *output, size_t olen, int lnum, char *f j = -1; } else errwarn("unexpected character: \033[1m%c \033[0m(\033[1m\\%o\033[0m)", - 1, filename, input, lnum, i + 1, ch, ch & 0xff); + 1, ch, ch & 0xff); } else if ((type == TokenNumber && !ISNUMCHAR(ch)) || (type == TokenIdentifier && !ISIDENCHAR(ch)) || (type == TokenString && ISQUOT(ch))) { - tokens[tokiter].val = valstart; - tokens[tokiter].len = j + (type == TokenString ? 1 : 0); - tokens[tokiter++].type = type; + (*tokens)[*tokiter].file = filename; + (*tokens)[*tokiter].line = fileline; + (*tokens)[*tokiter].col = valstart - off + 1; + (*tokens)[*tokiter].off = valstart; + (*tokens)[*tokiter].len = j + (type == TokenString ? 1 : 0); + (*tokens)[(*tokiter)++].type = type; if (type != TokenString) --i; type = TokenNull; j = -1; } } - const char space = ' '; - for (j = 0; j < tokiter; ++j) { - write(1, tokens[j].val, tokens[j].len); - write(1, &space, 1); - } - puts(""); - - free(tokens); return i; } @@ -129,40 +112,47 @@ usage(void) die("usage: %s", argv0); } -static void -errwarn(const char *fmt, int iserror, const char *filename, const char *line, - int fileline, int filecol, ...) -{ - va_list ap; - fprintf(stderr, "\033[0;1m%s:%d:%d: \033[1;3%s: \033[0m", - filename, fileline, filecol, iserror ? "1merror" : "3mwarning"); - - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); - - fprintf(stderr, "\n% 5d | %s%c", fileline, line, - line[strlen(line) - 1] != '\n' ? '\n' : 0); - - if (iserror) exit(1); -} - int main(int argc, char *argv[]) { - char buffer[BUFSIZ], tmpbuf[BUFSIZ]; + char buffer[BUFSIZ], *contents; ssize_t rb; + size_t csiz, toksiz, tokiter; int lindex; + Token *tokens; - void *data, *bss, *text; + /* void *data, *bss, *text; */ ARGBEGIN { default: usage(); } ARGEND - for (rb = lindex = 0; (rb = nextline(0, buffer, BUFSIZ)) > 0; ++lindex) { - parseline(buffer, rb, tmpbuf, BUFSIZ, - lindex + 1, "<stdin>"); + contents = malloc(csiz = 0); + tokens = malloc(sizeof(*tokens) * (toksiz = 128)); + + filename = "<stdin>"; + + for (rb = lindex = tokiter = 0; (rb = nextline(0, buffer, BUFSIZ)) > 0; ++lindex) { + contents = realloc(contents, csiz += rb); + memcpy(contents + (csiz - rb), buffer, rb); + fileline = lindex + 1; + parseline(contents + (csiz - rb), rb, (csiz - rb), &tokens, &toksiz, &tokiter); } + + { + const char space = ' '; + int j; + for (j = 0; j < tokiter; ++j) { + write(1, contents + tokens[j].off, tokens[j].len); + write(1, &space, 1); + } + write(1, "\n", 1); + } + + printf("tokiter: %d\n", tokiter); + g_main(tokens, tokiter); + + free(tokens); + free(contents); } diff --git a/lsc.h b/lsc.h @@ -0,0 +1,21 @@ +#ifndef _LSC_H +#define _LSC_H + +#include "tokentype.h" +#include "util.h" + +typedef struct { + short line, col; + char *file; + size_t off, len; + TokenType type; +} Token; + +static int +getsyscallbyname(char *name); +static ssize_t +parseline(char *input, size_t ilen, size_t off, Token **tokens, size_t *toksiz, size_t *tokiter); +static void +usage(void); + +#endif diff --git a/tokentypes b/tokentypes @@ -0,0 +1,18 @@ +# first of all, null token +TokenNull + +# basic tokens +TokenNumber +TokenIdentifier +TokenString + +# "punctuation" +TokenComma + +# any brackets +TokenParenthesis +TokenBracket +TokenBrace + +# operators +TokenEqualSign diff --git a/util.c b/util.c @@ -60,3 +60,23 @@ nextline(int fd, char *buf, size_t size) buf[count] = '\0'; return (ssize_t)count; } + +void +errwarn(const char *fmt, int iserror, ...) +{ + extern const char *filename, *line; + extern int fileline; + + va_list ap; + fprintf(stderr, "\033[0;1m%s:%d: \033[1;3%s: \033[0m", + filename, fileline, iserror ? "1merror" : "3mwarning"); + + va_start(ap, iserror); + vfprintf(stderr, fmt, ap); + va_end(ap); + + fprintf(stderr, "\n% 5d | %s%c", fileline, line, + line[strlen(line) - 1] != '\n' ? '\n' : 0); + + if (iserror) exit(1); +} diff --git a/util.h b/util.h @@ -8,3 +8,4 @@ void die(const char *fmt, ...); void *ecalloc(size_t nmemb, size_t size); ssize_t nextline(int fd, char *buf, size_t size); +void errwarn(const char *fmt, int iserror, ...);