Compare commits

27 Commits

Author SHA1 Message Date
485f7f9770 FUNCTION PARSER FINALLY IMPLEMENTED 2025-11-17 14:15:49 +03:00
b7b1eab99f added blocks, dont know if it works 2025-11-17 13:45:52 +03:00
188acbfe88 added funcparser for debug 2025-11-17 13:31:13 +03:00
097ef85507 closer to getting func def working 2025-11-17 12:09:09 +03:00
2e485c9eab func parser kinda works, need to parse body and uhardcode int 2025-11-16 22:47:43 +03:00
5d5040168c parser improved 2025-11-12 14:25:54 +03:00
6aea9c60f2 parser improved 2025-11-12 14:20:11 +03:00
0a2a255993 parser improved 2025-11-12 14:15:12 +03:00
bf9d827ae1 parser improved 2025-11-12 14:13:45 +03:00
047f1a672e parser improved 2025-11-12 14:06:19 +03:00
a16f5af280 parser improved 2025-11-12 13:27:44 +03:00
6892ac26d7 parser improved 2025-11-12 13:24:30 +03:00
5406f8c368 fixed print repr 2025-11-06 19:08:07 +03:00
1b5b4e53fb func parser base ready 2025-11-06 19:06:43 +03:00
5b476f08a3 added flag lib 2025-11-06 10:06:12 +03:00
aa026676a4 updated readme 2025-11-06 10:02:36 +03:00
a89b982db5 simple bytecode compiler 2025-11-06 10:01:45 +03:00
4a754d6b16 simple bytecode compiler 2025-11-06 10:00:37 +03:00
5ea5cd6f24 static vm 2025-11-05 23:34:31 +03:00
f1849c906b fixed paths 2025-11-05 23:31:36 +03:00
1f2cb4be8b finished base 2025-11-05 23:23:49 +03:00
dcd33c9578 better structure etc 2025-11-05 23:21:29 +03:00
595fdbe653 basic VM working 2025-11-05 23:14:59 +03:00
7c1d431f28 starting RPN 2025-11-05 16:03:14 +03:00
e5b3d5e1e7 adding strlits 2025-11-05 15:29:29 +03:00
db4eb9bf6f adding strlits 2025-11-05 15:27:57 +03:00
a8c8870c33 adding strlits 2025-11-05 14:28:45 +03:00
13 changed files with 1181 additions and 505 deletions

View File

@@ -2,9 +2,9 @@
> Uninished, Not meant to be used by others. idk if it works on windows.
----------
## Simple Interpreter implemented in C.
## Simple programming language VM implemented in C. (look at examples folder)
Usage:
```
cc -o builder nob.c
./builder
cc -o builder builder.c
./builder <source file>
```

View File

@@ -7,13 +7,14 @@ int main(int argc, char **argv){
nb_arr cmd;
nb_append(&cmd, "gcc");
nb_append(&cmd, "-Wall -Wextra");
nb_append(&cmd, "lexer.c");
nb_append(&cmd, "-o lex");
// nb_append(&cmd, "-Wall -Wextra");
nb_append(&cmd, "-static");
nb_append(&cmd, "./src/hlc.c");
nb_append(&cmd, "-o hlc");
nb_cmd(&cmd);
nb_append(&cmd, "./lex");
nb_append(&cmd, "./hlc");
for (int i=1; i<argc; ++i){
nb_append(&cmd, argv[i]);
}

4
examples/main.hl Normal file
View File

@@ -0,0 +1,4 @@
print(((1+29.9)*3)/5)
print("hello world")

32
funcparser.c Normal file
View File

@@ -0,0 +1,32 @@
#include "src/parser.h"
int main(int argc, char** argv){
// if (argc < 2){
// fprintf(stderr, "No file provided. %s <file>\n", argv[0]);
// return 1;
// }
Token tk = tokenize_all("fn add(x: int, y: int) int {\n let z = x+y; return z;\n }");
size_t i=0;
Block final = {0};
block_init(&final, 9191);
SymbolTable tb = {0};
symbol_table_init(&tb, 1212);
printf("===================\n");
for (size_t j = 0; j < tk.size; ++j) {
printf("[%zu] type=%s text='%s'\n", j, token_type_to_string(tk.type[j]), tk.text[j]);
}
printf("===================\n");
while (i<tk.size && tk.type[i] != TOKEN_EOF){
skip_space(&tk, &i);
if (tk.type[i] == TOKEN_FN){
final = *parse_func_def(&tk, &i, &tb);
} else break;
}
asm("int3");
return 0;
}

205
lexer.h
View File

@@ -1,205 +0,0 @@
#include <assert.h>
#include <ctype.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
typedef enum {
TOKEN_PLUS,
TOKEN_MINUS,
TOKEN_INTEGER,
TOKEN_FLOAT,
TOKEN_SPACE,
TOKEN_STRING,
TOKEN_MUL,
TOKEN_DIV,
TOKEN_UNKNOWN,
TOKEN_EOF,
TOKEN_NEWLINE,
TOKEN_LPAREN,
TOKEN_RPAREN,
TOKEN_COMMA
} symbols;
typedef enum {
BHV_STACK,
BHV_UNDEFINED,
BHV_NUMBER,
BHV_STRING,
BHV_FLOAT,
} symbol_bhv;
typedef struct {
symbols *type;
char **text;
size_t *text_len;
symbol_bhv *behaviour;
unsigned int *cursor_skip;
symbols *previous_token;
float *fval;
int *ival;
size_t capacity;
size_t size;
} Token;
void token_init(Token *tok, size_t capacity) {
tok->capacity = capacity;
tok->size = 0;
tok->type = (symbols*)malloc(sizeof(symbols) * capacity);
tok->text = (char**)malloc(sizeof(char *) * capacity);
tok->text_len = (size_t*)malloc(sizeof(size_t) * capacity);
tok->behaviour = (symbol_bhv*)malloc(sizeof(symbol_bhv) * capacity);
tok->cursor_skip = (unsigned int*)malloc(sizeof(unsigned int) * capacity);
tok->ival = (int*)malloc(sizeof(int) * capacity);
tok->fval = (float*)malloc(sizeof(float) * capacity);
tok->previous_token = (symbols*)malloc(sizeof(symbols) * capacity);
assert(tok->type && tok->text && tok->text_len &&
tok->behaviour && tok->cursor_skip && tok->previous_token);
}
void token_grow(Token *tok) {
size_t new_capacity = (tok->capacity == 0 ? 8 : tok->capacity * 2);
tok->type = (symbols*)realloc(tok->type, new_capacity * sizeof(symbols));
tok->text = (char**)realloc(tok->text, new_capacity * sizeof(char *));
tok->text_len = (size_t*)realloc(tok->text_len, new_capacity * sizeof(size_t));
tok->behaviour = (symbol_bhv*)realloc(tok->behaviour, new_capacity * sizeof(symbol_bhv));
tok->cursor_skip = (unsigned int*)realloc(tok->cursor_skip, new_capacity * sizeof(unsigned int));
tok->ival = (int*)realloc(tok->ival, new_capacity * sizeof(int));
tok->fval = (float*)realloc(tok->fval, new_capacity * sizeof(float));
tok->previous_token = (symbols*)realloc(tok->previous_token, new_capacity * sizeof(symbols));
assert(tok->type && tok->text && tok->text_len &&
tok->behaviour && tok->cursor_skip && tok->previous_token);
tok->capacity = new_capacity;
}
void token_push(Token *tok, symbols type, const char *text,
symbol_bhv behaviour, size_t cursor_skip) {
if (tok->size >= tok->capacity) {
token_grow(tok);
}
size_t i = tok->size;
tok->type[i] = type;
tok->text[i] = strdup(text);
tok->text_len[i] = strlen(text);
tok->behaviour[i] = behaviour;
tok->cursor_skip[i] = cursor_skip;
if (i > 0)
tok->previous_token[i] = tok->type[i - 1];
else
tok->previous_token[i] = TOKEN_UNKNOWN;
tok->size++;
}
void token_free(Token *tok) {
for (size_t i = 0; i < tok->size; i++) {
free(tok->text[i]);
}
free(tok->type);
free(tok->text);
free(tok->text_len);
free(tok->behaviour);
free(tok->cursor_skip);
free(tok->previous_token);
}
int str_to_int(char *strint) { return atoi(strint); }
float str_to_float(char *strif) { return strtof(strif, NULL); }
char *token_type_to_string(symbols type) {
switch (type) {
case TOKEN_PLUS: return "TOKEN_PLUS";
case TOKEN_MINUS: return "TOKEN_MINUS";
case TOKEN_INTEGER: return "TOKEN_INTEGER";
case TOKEN_FLOAT: return "TOKEN_FLOAT";
case TOKEN_SPACE: return "TOKEN_SPACE";
case TOKEN_STRING: return "TOKEN_STRING";
case TOKEN_MUL: return "TOKEN_MUL";
case TOKEN_DIV: return "TOKEN_DIV";
case TOKEN_LPAREN: return "TOKEN_LPAREN";
case TOKEN_RPAREN: return "TOKEN_RPAREN";
case TOKEN_COMMA: return "TOKEN_COMMA";
case TOKEN_EOF: return "TOKEN_EOF";
case TOKEN_NEWLINE: return "TOKEN_NEWLINE";
case TOKEN_UNKNOWN: return "TOKEN_UNKNOWN";
default: return "UNKNOWN_SYMBOL";
}
}
size_t read_from_tok(Token *tok, const char *input, size_t cursor) {
char buf[64];
size_t start = cursor;
size_t i = 0;
if (isdigit(input[cursor])) {
int dots_seen = 0;
while (isdigit(input[cursor]) || input[cursor] == '.') {
if (input[cursor] == '.') dots_seen++;
buf[i++] = input[cursor++];
}
buf[i] = '\0';
if (dots_seen == 0) {
token_push(tok, TOKEN_INTEGER, buf, BHV_NUMBER, cursor - start);
} else {
token_push(tok, TOKEN_FLOAT, buf, BHV_FLOAT, cursor - start);
}
} else if (isalpha(input[cursor])) {
while (isalpha(input[cursor])) {
buf[i++] = input[cursor++];
}
buf[i] = '\0';
token_push(tok, TOKEN_STRING, buf, BHV_STRING, cursor - start);
//refactor into separate function to use in parsing functions and definitions
} else {
buf[0] = input[cursor];
buf[1] = '\0';
switch (input[cursor]) {
case '+': token_push(tok, TOKEN_PLUS, "+", BHV_STACK, 1); break;
case '-': token_push(tok, TOKEN_MINUS, "-", BHV_STACK, 1); break;
case '*': token_push(tok, TOKEN_MUL, "*", BHV_STACK, 1); break;
case '/': token_push(tok, TOKEN_DIV, "/", BHV_STACK, 1); break;
case ' ': token_push(tok, TOKEN_SPACE, " ", BHV_UNDEFINED, 1); break;
case '\n': token_push(tok, TOKEN_NEWLINE, "\\n", BHV_UNDEFINED, 1); break;
case '(': token_push(tok, TOKEN_LPAREN, "(", BHV_STACK, 1); break;
case ')': token_push(tok, TOKEN_RPAREN, ")", BHV_STACK, 1); break;
case ',': token_push(tok, TOKEN_COMMA, ",", BHV_STACK, 1); break;
default: token_push(tok, TOKEN_UNKNOWN, buf, BHV_UNDEFINED, 1); break;
}
cursor++;
}
return cursor - start;
}
Token tokenize_all(const char *input) {
Token tok;
token_init(&tok, 8);
size_t i = 0;
size_t length = strlen(input);
while (i < length) {
i += read_from_tok(&tok, input, i);
}
token_push(&tok, TOKEN_EOF, "EOF", BHV_UNDEFINED, 0);
return tok;
}

205
nb.h
View File

@@ -6,6 +6,13 @@
#include <stdbool.h>
#include <string.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
int debug;
} nb_opt;
typedef struct{
int capacity;
@@ -13,17 +20,33 @@ typedef struct{
char** value;
} nb_arr;
typedef struct{
FILE *filep;
size_t filesize;
int chars;
char *buf;
} nb_file;
typedef struct {
char** urls;
char** filenames;
size_t size;
size_t capacity;
} nb_downloads;
static nb_downloads nb_default_down;
#define nb_append_da(nb_arr, ...) \
nb_append_va(nb_arr, \
((const char*[]){__VA_ARGS__}), \
(sizeof((const char*[]){__VA_ARGS__})/sizeof(const char*)))
typedef struct{
FILE *filep;
size_t filesize;
int chars;
char *buf;
} nb_file;
#define nb_qsortsa(arr) nb_qsorts_impl((arr), sizeof(arr)/sizeof(arr[0]))
#define nb_qsortf(arr) nb_qsortf_impl((arr), sizeof(arr)/sizeof(arr[0]))
#define nb_qsorti(arr) nb_qsorti_impl((arr), sizeof(arr)/sizeof(arr[0]))
#define nb_split(string, ...) nb_split_impl(string, (nb_opt) {__VA_ARGS__})
void nb_init(nb_arr *newarr, int initial_capacity); // obsolete
@@ -39,32 +62,44 @@ char* nb_strdup(const char* s); // make this void that uses realloc later.
void nb_print(nb_arr *newarr);
void nb_print_info(nb_arr *newarr);
void nb_cmd(nb_arr *newarr);
// File utils
void nb_copy_file(char* old_file_name, char* new_file_name);
char* nb_read_file(char* file_name);
void nb_write_file(char* name, char* buf);
nb_file nb_read_file_c(char* file_name);
bool nb_did_file_change(char *filename);
bool nb_does_file_exist(char *filename);
//bool needs_rebuild(); // need to implement rename file first to .old or something like nob does
void nb_rebuild(int argc, char **argv);
void nb_end();
void include_http_custom(const char* url, const char* filename);
//bool needs_rebuild(); // need to implement rename file first to .old or something like nob does TODO
// Misc utils
int nb_compf(const void *a, const void *b);
int nb_compi(const void *a, const void *b);
char* nb_slice_str(char* a, size_t start, size_t end); // python slicing in c :Kappa:
void nb_qsortf_impl(void *base, size_t nmemb); // these functions macros
void nb_qsorti_impl(void *base, size_t nmemb); // two have
#ifdef NB_IMPLEMENTATION // make sure to define this before using the header
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char* nb_slice_str(char* a, size_t start, size_t end){
size_t len = end-start;
char* result = malloc(len+1);
memmove(result, a+start, len);
result[len] = '\0';
return result;
}
/*
char* nb_strdup(const char* s) {
@@ -86,7 +121,7 @@ void nb_init(nb_arr *newarr, int initial_capacity){
void nb_append(nb_arr *newarr, char *newval){
if (newarr->value == NULL){
newarr->capacity =16;
if ((newarr->capacity > 16) | (newarr->arrsize > newarr->capacity)) {
if (newarr->capacity > 16 | newarr->arrsize > newarr->capacity) {
newarr->capacity *=2;
}
newarr->value = (char**)realloc(newarr->value, sizeof(char*) * newarr->capacity);
@@ -178,14 +213,30 @@ void nb_com(nb_arr *newarr){
}
// void append_c_file(FILE *filepointer){
// filepointer = NULL;
// }
void append_c_file(FILE *filepointer){
}
void nb_write_file(char* name, char* buf){ // old name shouldnt be nobuild.c. it should be the name of the current file.
nb_file new_file;
new_file.filep = fopen(name, "wb");
fwrite(buf, 1, strlen(buf), new_file.filep);
fclose(new_file.filep);
// printf("Current buf size: %zu\n", strlen(buf));
}
void nb_copy_file(char* old_file_name, char* new_file_name){ // old name shouldnt be nobuild.c. it should be the name of the current file.
nb_file old_file;
nb_file new_file;
if (!nb_does_file_exist){
printf("%s does not exit", old_file_name);
return;
}
old_file.filep = fopen(old_file_name, "rb");
fseek(old_file.filep, 0, SEEK_END);
@@ -203,6 +254,11 @@ void nb_copy_file(char* old_file_name, char* new_file_name){ // old name shouldn
bool nb_did_file_change(char *filename){
struct stat file_old;
stat(filename, &file_old);
if (!nb_does_file_exist){
printf("%s does not exist\n", filename);
return 0;
}
struct stat file_new;
char buf[64];
@@ -216,8 +272,9 @@ bool nb_did_file_change(char *filename){
bool nb_does_file_exist(char *filename){
if (access(filename, F_OK) == 0){
return true;
}
} else {
return false;
}
}
void nb_rebuild(int argc, char **argv){
@@ -269,7 +326,8 @@ void nb_rebuild(int argc, char **argv){
}
}
char* nb_read_file(char* file_name){ // old name shouldnt be nobuild.c. it should be the name of the current file. I should think more about adding error handling
nb_file nb_read_file_c(char* file_name){ // old name shouldnt be nobuild.c. it should be the name of the current file. I should think more about adding error handling
nb_file file;
file.filep = fopen(file_name, "rb");
@@ -281,6 +339,22 @@ char* nb_read_file(char* file_name){ // old name shouldnt be nobuild.c. it shoul
fread(file.buf, 1, file.filesize, file.filep);
fclose(file.filep);
file.buf[file.filesize] = '\0';
return file;
}
char* nb_read_file(char* file_name){ // old name shouldnt be nobuild.c. it should be the name of the current file. I should think more about adding error handling
nb_file file;
file.filep = fopen(file_name, "r");
fseek(file.filep, 0, SEEK_END);
file.filesize = ftell(file.filep);
file.buf = (char*)malloc(file.filesize+1);
fseek(file.filep, 0, SEEK_SET);
fread(file.buf, 1, file.filesize, file.filep);
file.buf[file.filesize] = '\0'; // null termination
fclose(file.filep);
return file.buf;
}
@@ -290,5 +364,94 @@ void nb_append_va(nb_arr *newarr, const char *items[], int count) {
}
}
int nb_compf(const void *a, const void *b){
float fa = *(const float*)a;
float fb = *(const float*)b;
if (fa < fb) return -1;
else if (fa > fb) return 1;
else return 0;
}
int nb_compi(const void *a, const void *b){
float ia = *(const int*)a;
float ib = *(const int*)b;
if (ia < ib) return -1;
else if (ia > ib) return 1;
else return 0;
}
int nb_compsa(const void *a, const void *b) {
const char *sa = *(const char **)a;
const char *sb = *(const char **)b;
size_t la = strlen(sa);
size_t lb = strlen(sb);
if (la < lb) return -1;
else if (la > lb) return 1;
else return 0;
}
void nb_qsortf_impl(void *base, size_t nmemb){
qsort(base, nmemb, sizeof(float), nb_compf);
}
void nb_qsortsa_impl(void *base, size_t nmemb){
qsort(base, nmemb, sizeof(char*), nb_compsa);
}
void nb_qsorti_impl(void *base, size_t nmemb){
qsort(base, nmemb, sizeof(int), nb_compi);
}
char** nb_split_impl(char* string, nb_opt opt){
size_t n = strlen(string);
char** split = malloc(sizeof(char*)*n);
for (int i=0; i<n; ++i){
split[i] = malloc(2);
split[i][0] = string[i];
split[i][1] = '\0';
}
split[n] = NULL;
if (opt.debug){
printf("[");
for (int i=0; i<n; ++i){
printf("%s,", split[i]);
}
printf("]\n");
}
return split;
}
void include_http_custom(const char* url, const char* filename){ // this function is for builder not regular c file.
nb_arr cmd = {0};
if (nb_default_down.capacity == 0) {
nb_default_down.capacity = 256;
nb_default_down.size = 0;
nb_default_down.filenames = malloc(sizeof(char*) * nb_default_down.capacity);
nb_default_down.urls = malloc(sizeof(char*) * nb_default_down.capacity);
}
if (nb_default_down.size >= nb_default_down.capacity) {
nb_default_down.capacity*=2;
nb_default_down.filenames = realloc(nb_default_down.filenames, nb_default_down.capacity);
nb_default_down.urls = realloc(nb_default_down.urls, nb_default_down.capacity);
}
nb_default_down.urls[nb_default_down.size] = (char*)url;
nb_default_down.filenames[nb_default_down.size] = (char*)filename;
nb_default_down.size++;
nb_append_da(&cmd, "wget", "-q", "-O", filename, url); // TODO: use libcurl or implement own http thingy
nb_cmd(&cmd);
}
void nb_end(){
for (size_t i=0; i<nb_default_down.size; ++i){
// printf("debug\n");
if (!remove(nb_default_down.filenames[i])) exit(-1);
// printf("removed file: %s\n", nb_default_down.filenames[i]);
}
}
#endif //NB_IMPLEMENTATION
// TODO: add #ifdef NB_STRIP_PREFIX in the future

119
parser.c
View File

@@ -1,119 +0,0 @@
#include <assert.h>
#include <stddef.h>
#include <stdio.h>
#define NB_IMPLEMENTATION
#include "lexer.h"
#include "nb.h"
typedef struct {
Token *left;
Token *right;
size_t prec;
symbols op;
} ASTNode;
typedef struct {
ASTNode *nodes;
size_t size;
} ASTTree;
Token *copy_single_token(const Token *src, size_t i) {
Token *t = calloc(1, sizeof(Token));
assert(t);
t->size = 1;
t->type = malloc(sizeof(int));
t->text = malloc(sizeof(char*));
assert(t->type && t->text);
t->type[0] = src->type[i];
t->text[0] = strdup(src->text[i]);
return t;
}
ssize_t find_prev_token(const Token *tok, size_t start) {
for (ssize_t i = (ssize_t)start; i >= 0; --i) {
if (tok->type[i] != TOKEN_SPACE &&
tok->type[i] != TOKEN_NEWLINE &&
tok->type[i] != TOKEN_EOF) {
return i;
}
}
return -1;
}
ssize_t find_next_token(const Token *tok, size_t start) {
for (size_t i = start; i < tok->size; ++i) {
if (tok->type[i] != TOKEN_SPACE &&
tok->type[i] != TOKEN_NEWLINE &&
tok->type[i] != TOKEN_EOF) {
return i;
}
}
return -1;
}
size_t token_precedence(Token token, size_t idx){
switch (token.type[idx]) {
case TOKEN_PLUS:
return 1;
break;
case TOKEN_MINUS:
return 1;
break;
case TOKEN_MUL:
return 2;
break;
case TOKEN_DIV:
return 3;
break;
default:
return 0;
break;
}
}
ASTTree ast_walk(Token token) {
ASTTree ops = {0};
ops.nodes = calloc(token.size, sizeof(ASTNode));
assert(ops.nodes);
for (size_t i = 0; i < token.size; ++i) {
switch (token_precedence(token, i) > 0) {
case true: {
ssize_t l = find_prev_token(&token, i - 1);
ssize_t r = find_next_token(&token, i + 1);
assert(l >= 0 && r >= 0);
ASTNode op = {0};
op.left = copy_single_token(&token, l);
op.right = copy_single_token(&token, r);
op.prec = token_precedence(token, i);
op.op = token.type[i];
ops.nodes[ops.size++] = op;
break;
}
}
}
return ops;
}
int main(int argc, char **argv){
Token to_tokenize = {0};
if (argc > 1) {
to_tokenize = tokenize_all(nb_read_file(argv[1]));
}
for (size_t i=0; i<to_tokenize.size; ++i){
printf("Type: %s\nText: %s\n\n", token_type_to_string(to_tokenize.type[i]), to_tokenize.text[i]);
}
ASTTree walked = ast_walk(to_tokenize);
for (int i=0; i<walked.size;++i){
printf("op: %s, left: %s, right: %s, prec %zu\n\n", token_type_to_string(walked.nodes[i].op), walked.nodes[i].left->text[0], walked.nodes[i].right->text[0], walked.nodes[i].prec);
}
return 0;
}

View File

@@ -1,87 +0,0 @@
#include "lexer.h"
#include <stddef.h>
typedef struct {
char* left;
char* right;
symbols node;
size_t cursor;
size_t prec;
} ASTNode;
typedef struct {
ASTNode* nodes;
size_t size;
size_t capacity;
} ASTTree;
void tree_init(ASTTree* a){
if (a->capacity == 0) a->capacity = 128;
a->nodes = malloc(sizeof(*a->nodes)*a->capacity);
}
void construct_nodes(ASTTree* a, Token t){
if (a->capacity == 0) tree_init(a);
if (a->size >= a->capacity) {
a->capacity *=2;
a->nodes = realloc(a->nodes, sizeof(*a->nodes)*a->capacity);
}
size_t nc = 0;
for (size_t i=0; i<t.size; ++i){
switch (t.type[i]){
case TOKEN_PLUS:
assert(i > 0 && i < t.size - 1);
a->nodes[nc].node = TOKEN_PLUS;
a->nodes[nc].left = t.text[i-1];
a->nodes[nc].right = t.text[i+1];
a->nodes[nc].prec = 1;
a->nodes[nc].cursor = nc;
nc++;
break;
case TOKEN_MINUS:
assert(i > 0 && i < t.size - 1);
a->nodes[nc].node = TOKEN_MINUS;
a->nodes[nc].left = t.text[i-1];
a->nodes[nc].right = t.text[i+1];
a->nodes[nc].prec = 1;
a->nodes[nc].cursor = nc;
nc++;
break;
case TOKEN_DIV:
assert(i > 0 && i < t.size - 1);
a->nodes[nc].node = TOKEN_DIV;
a->nodes[nc].left = t.text[i-1];
a->nodes[nc].right = t.text[i+1];
a->nodes[nc].prec = 2;
a->nodes[nc].cursor = nc;
nc++;
break;
case TOKEN_MUL:
assert(i > 0 && i < t.size - 1);
a->nodes[nc].node = TOKEN_MUL;
a->nodes[nc].left = t.text[i-1];
a->nodes[nc].right = t.text[i+1];
a->nodes[nc].prec = 2;
a->nodes[nc].cursor = nc;
nc++;
break;
default:
break;
}
}
a->size = nc;
}
int main(int argc, char** argv){
Token tokens = tokenize_all("1+2 3-4 1/2 2*7"); //invalid syntax
ASTTree tree = {0};
construct_nodes(&tree, tokens);
printf("node count: %zu\n", tree.size);
for (size_t i=0; i<tree.size; ++i){
printf("op: %s, left: %s right: %s\n",
token_type_to_string(tree.nodes[i].node),
tree.nodes[i].left,
tree.nodes[i].right);
}
}

135
src/flag.h Normal file
View File

@@ -0,0 +1,135 @@
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <stdio.h>
typedef struct {
char** label;
char** desc;
void (**func_ptr)(void);
size_t count;
size_t capacity;
} fg_flags;
typedef struct {
size_t current;
size_t total;
} fg_progressbar;
#define fg_index fg_naive_index
#define FG_PB_COUNT 33
void fg_append(fg_flags *flags, char *label, char *desc);
size_t fg_naive_index(fg_flags *flags, const char* value);
void fg_run(fg_flags *flags, int argc, char** argv);
void fg_append_ptr(fg_flags *flags, char *label, void (*func)(void));
#ifdef FG_IMPLEMENTATION
void fg_append(fg_flags *flags, char *label, char *desc){
if (flags->capacity == 0){
flags->capacity = 128;
flags->label = (char**)malloc(sizeof(char*)*flags->capacity);
flags->desc = (char**)malloc(sizeof(char*)*flags->capacity);
} if (flags->count >= flags->capacity){
flags->capacity *=2;
flags->label = (char**)realloc(flags->label, sizeof(char*) * flags->capacity);
flags->desc = (char**)realloc(flags->desc, sizeof(char*) * flags->capacity);
}
flags->label[flags->count] = strdup(label);
flags->desc[flags->count] = strdup(desc);
flags->count++;
}
void fg_append_ptr(fg_flags *flags, char *label, void(*func)(void)){
if (flags->capacity == 0){
flags->capacity = 128;
flags->label = (char**)malloc(sizeof(char*)*flags->capacity);
flags->desc = (char**)malloc(sizeof(char*)*flags->capacity);
flags->func_ptr = (void(**) (void)) malloc(sizeof(void(*)(void))*flags->capacity);
} if (flags->count >= flags->capacity){
flags->capacity *=2;
flags->label = (char**)realloc(flags->label, sizeof(char*) * flags->capacity);
flags->desc = (char**)realloc(flags->desc, sizeof(char*) * flags->capacity);
flags->func_ptr = (void(**) (void)) realloc(flags->func_ptr, sizeof(void(*)(void)) * flags->capacity);
}
flags->label[flags->count] = strdup(label);
flags->desc[flags->count] = "\0";
flags->func_ptr[flags->count] = (void(*)(void))func;
// TODO: remove desc null terminator and fix in terms on fg_run to not print \n
//flags->func_ptr = func;
// func();
flags->count++;
}
size_t fg_naive_index(fg_flags *flags, const char* value){
bool found;
for(size_t i=0; i<flags->count; ++i){
if (strcmp(flags->label[i], value) == 0){
return i;
found = true;
}
}
if (!found){
printf("error: unrecognized command-line option %s.\n", value); // TODO: did you mean %s?
return -1;
}
}
void fg_run(fg_flags *flags, int argc, char** argv){
bool called = false;
if (flags->count > 0){
if (argc < 2){
printf("All commands:\n");
for (size_t i=0; i<flags->count; ++i){
if (!flags->desc[i]) flags->desc[i] = "no given desc"; // impossible case
printf(" --%s, -%s %s\n", flags->label[i], flags->label[i], flags->desc[i]); //use strlen comp to align by longest string
}
} else {
// here add logic for parsing flags
for (size_t i=0; i<argc; ++i){
//printf("%zu\n", i);
if (argv[i][0] == '-' && argv[i][1] != '-'){ // && argv[i][1] == '-' //printf("found {-} in %s\n", argv[i]);
memmove(argv[i], argv[i]+1, strlen(argv[i]));
if (fg_index(flags, argv[i]) == -1) exit(-1);
if (flags->func_ptr[fg_index(flags, argv[i])] != NULL && !called){
flags->func_ptr[fg_index(flags, argv[i])]();
called = true;
} else printf("%s\n", flags->desc[fg_index(flags, argv[i])]);
// in python it would be if flags->labels[argv[i]] != null
}
if (argv[i][0] == '-' && argv[i][1] == '-'){
memmove(argv[i], argv[i]+2, strlen(argv[i]));
if (fg_index(flags, argv[i]) == -1) exit(-1);
if (flags->func_ptr[fg_index(flags, argv[i])] != NULL && !called){
flags->func_ptr[fg_index(flags, argv[i])]();
called = true;
} else printf("%s\n", flags->desc[fg_index(flags, argv[i])]);
}
// TODO: run from func pointer if not NULL
}
}
}
}
void fg_progress_update(fg_progressbar *pb, size_t total) {
pb->total = total;
pb->current++;
if (pb->current > total)
pb->current = total;
float progress = (float)pb->current / total;
size_t filled = (size_t)(progress * FG_PB_COUNT);
char buf[FG_PB_COUNT + 1];
for (size_t i = 0; i < FG_PB_COUNT; ++i)
buf[i] = (i < filled) ? '#' : ' ';
buf[FG_PB_COUNT] = '\0';
printf("\r[%s] %3.0f%%", buf, progress * 100);
fflush(stdout);
}
#endif

51
src/hlc.c Normal file
View File

@@ -0,0 +1,51 @@
#define FG_IMPLEMENTATION
#include "flag.h"
#include "vm.h"
VM global_vm = {0};
void emit_bytecode_call(){
printf("[debug] emit_bytecode_call() triggered\n");
emit_bytecode(&global_vm);
}
void flag_handling(int argc, char** argv){
fg_flags flags = {0};
fg_append_ptr(&flags, "emit-bytecode", emit_bytecode_call);
fg_run(&flags, argc, argv);
}
int main(int argc, char **argv) {
if (argc < 2) {
fprintf(stderr, "Usage: %s <source file>\n", argv[0]);
return 1;
}
char* read = nb_read_file(argv[1]);
//printf("INPUT: %s\n", read);
Token tk = tokenize_all(read);
SymbolTable table = {0};
symbol_table_init(&table, 32);
Token rpn = build_rpn(&tk, &table);
//print_token(&rpn);
size_t prog_size = 0;
instruct *prog = rpn_to_bytecode(&rpn, &prog_size);
VM vm = {
.program = prog,
.program_size = prog_size,
.inst_p = 0,
.st_p = 0,
.running = true,
};
global_vm = vm;
flag_handling(argc, argv);
vm_run(&vm);
return 0;
}

View File

@@ -13,15 +13,25 @@ typedef enum {
TOKEN_INTEGER,
TOKEN_FLOAT,
TOKEN_SPACE,
TOKEN_STRING,
TOKEN_STRING, // idx 5
TOKEN_IDENTIFIER,
TOKEN_MUL,
TOKEN_DIV,
TOKEN_UNKNOWN,
TOKEN_UNKNOWN, // idx 9
TOKEN_EOF,
TOKEN_NEWLINE,
TOKEN_LPAREN,
TOKEN_RPAREN,
TOKEN_COMMA
TOKEN_COMMA,
TOKEN_LCURLY,
TOKEN_RCURLY,
TOKEN_COLON,
TOKEN_SEMI,
TOKEN_FN,
TOKEN_LET,
TOKEN_IDENT_INT, //TODO: unhardcode
TOKEN_EQU,
TOKEN_RETURN
} symbols;
typedef enum {
@@ -30,12 +40,46 @@ typedef enum {
BHV_NUMBER,
BHV_STRING,
BHV_FLOAT,
BHV_IDENT,
} symbol_bhv;
char *token_type_to_string(symbols type) {
switch (type) {
case TOKEN_PLUS: return "TOKEN_PLUS";
case TOKEN_MINUS: return "TOKEN_MINUS";
case TOKEN_INTEGER: return "TOKEN_INTEGER";
case TOKEN_FLOAT: return "TOKEN_FLOAT";
case TOKEN_SPACE: return "TOKEN_SPACE";
case TOKEN_STRING: return "TOKEN_STRING";
case TOKEN_MUL: return "TOKEN_MUL";
case TOKEN_DIV: return "TOKEN_DIV";
case TOKEN_LPAREN: return "TOKEN_LPAREN";
case TOKEN_RPAREN: return "TOKEN_RPAREN";
case TOKEN_COMMA: return "TOKEN_COMMA";
case TOKEN_EOF: return "TOKEN_EOF";
case TOKEN_NEWLINE: return "TOKEN_NEWLINE";
case TOKEN_IDENTIFIER: return "TOKEN_IDENTIFIER";
case TOKEN_LCURLY: return "TOKEN_LCURLY";
case TOKEN_RCURLY: return "TOKEN_RCURLY";
case TOKEN_SEMI: return "TOKEN_SEMI";
case TOKEN_COLON: return "TOKEN_COLON";
case TOKEN_UNKNOWN: return "TOKEN_UNKNOWN";
case TOKEN_FN: return "TOKEN_FN";
case TOKEN_LET: return "TOKEN_LET";
case TOKEN_IDENT_INT: return "TOKEN_IDENT_INT";
case TOKEN_EQU: return "TOKEN_EQU";
case TOKEN_RETURN: return "TOKEN_RETURN";
// default: return "UNKNOWN_SYMBOL";
}
}
typedef struct {
symbols *type;
char **text;
char **tktype;
size_t *text_len;
symbol_bhv *behaviour;
unsigned int *cursor_skip;
@@ -55,7 +99,7 @@ void token_init(Token *tok, size_t capacity) {
tok->behaviour = malloc(sizeof(symbol_bhv) * capacity);
tok->cursor_skip = malloc(sizeof(unsigned int) * capacity);
tok->previous_token = malloc(sizeof(symbols) * capacity);
tok->tktype = malloc(sizeof(char*) * capacity);
assert(tok->type && tok->text && tok->text_len &&
tok->behaviour && tok->cursor_skip && tok->previous_token);
}
@@ -69,7 +113,7 @@ void token_grow(Token *tok) {
tok->behaviour = realloc(tok->behaviour, new_capacity * sizeof(symbol_bhv));
tok->cursor_skip = realloc(tok->cursor_skip, new_capacity * sizeof(unsigned int));
tok->previous_token = realloc(tok->previous_token, new_capacity * sizeof(symbols));
tok->tktype = realloc(tok->tktype, new_capacity*sizeof(char*));
assert(tok->type && tok->text && tok->text_len &&
tok->behaviour && tok->cursor_skip && tok->previous_token);
@@ -89,7 +133,8 @@ void token_push(Token *tok, symbols type, const char *text,
tok->text_len[i] = strlen(text);
tok->behaviour[i] = behaviour;
tok->cursor_skip[i] = cursor_skip;
tok->tktype[i] = token_type_to_string(tok->type[i]);
if (i > 0)
tok->previous_token[i] = tok->type[i - 1];
else
@@ -114,25 +159,6 @@ void token_free(Token *tok) {
int str_to_int(char *strint) { return atoi(strint); }
float str_to_float(char *strif) { return strtof(strif, NULL); }
char *token_type_to_string(symbols type) {
switch (type) {
case TOKEN_PLUS: return "TOKEN_PLUS";
case TOKEN_MINUS: return "TOKEN_MINUS";
case TOKEN_INTEGER: return "TOKEN_INTEGER";
case TOKEN_FLOAT: return "TOKEN_FLOAT";
case TOKEN_SPACE: return "TOKEN_SPACE";
case TOKEN_STRING: return "TOKEN_STRING";
case TOKEN_MUL: return "TOKEN_MUL";
case TOKEN_DIV: return "TOKEN_DIV";
case TOKEN_LPAREN: return "TOKEN_LPAREN";
case TOKEN_RPAREN: return "TOKEN_RPAREN";
case TOKEN_COMMA: return "TOKEN_COMMA";
case TOKEN_EOF: return "TOKEN_EOF";
case TOKEN_NEWLINE: return "TOKEN_NEWLINE";
case TOKEN_UNKNOWN: return "TOKEN_UNKNOWN";
default: return "UNKNOWN_SYMBOL";
}
}
size_t read_from_tok(Token *tok, const char *input, size_t cursor) {
@@ -140,46 +166,91 @@ size_t read_from_tok(Token *tok, const char *input, size_t cursor) {
size_t start = cursor;
size_t i = 0;
if (isdigit(input[cursor])) {
if (isdigit((unsigned char)input[cursor])) {
int dots_seen = 0;
while (isdigit(input[cursor]) || input[cursor] == '.') {
while (isdigit((unsigned char)input[cursor]) || input[cursor] == '.') {
if (input[cursor] == '.') dots_seen++;
buf[i++] = input[cursor++];
if (i >= sizeof(buf) - 1) break;
}
buf[i] = '\0';
if (dots_seen == 0) {
token_push(tok, TOKEN_INTEGER, buf, BHV_NUMBER, cursor - start);
} else {
token_push(tok, TOKEN_FLOAT, buf, BHV_FLOAT, cursor - start);
}
} else if (isalpha(input[cursor])) {
while (isalpha(input[cursor])) {
buf[i++] = input[cursor++];
}
buf[i] = '\0';
token_push(tok, TOKEN_STRING, buf, BHV_STRING, cursor - start);
//refactor into separate function to use in parsing functions and definitions
} else {
buf[0] = input[cursor];
buf[1] = '\0';
switch (input[cursor]) {
case '+': token_push(tok, TOKEN_PLUS, "+", BHV_STACK, 1); break;
case '-': token_push(tok, TOKEN_MINUS, "-", BHV_STACK, 1); break;
case '*': token_push(tok, TOKEN_MUL, "*", BHV_STACK, 1); break;
case '/': token_push(tok, TOKEN_DIV, "/", BHV_STACK, 1); break;
case ' ': token_push(tok, TOKEN_SPACE, " ", BHV_UNDEFINED, 1); break;
case '\n': token_push(tok, TOKEN_NEWLINE, "\\n", BHV_UNDEFINED, 1); break;
case '(': token_push(tok, TOKEN_LPAREN, "(", BHV_STACK, 1); break;
case ')': token_push(tok, TOKEN_RPAREN, ")", BHV_STACK, 1); break;
case ',': token_push(tok, TOKEN_COMMA, ",", BHV_STACK, 1); break;
default: token_push(tok, TOKEN_UNKNOWN, buf, BHV_UNDEFINED, 1); break;
}
cursor++;
token_push(tok, dots_seen == 0 ? TOKEN_INTEGER : TOKEN_FLOAT,
buf, dots_seen == 0 ? BHV_NUMBER : BHV_FLOAT,
cursor - start);
return cursor - start; // all digits handled
}
else if (input[cursor] == '"') {
cursor++; // skip opening quote
while (input[cursor] != '"' && input[cursor] != '\0') {
buf[i++] = input[cursor++];
if (i >= sizeof(buf) - 1) break;
}
buf[i] = '\0';
if (input[cursor] == '"') cursor++; // skip closing quote
token_push(tok, TOKEN_STRING, buf, BHV_STRING, cursor - start);
return cursor - start;
}
else if (isalpha((unsigned char)input[cursor])) {
while (isalpha((unsigned char)input[cursor])) {
buf[i++] = input[cursor++];
if (i >= sizeof(buf) - 1) break;
}
buf[i] = '\0';
if (strcmp(buf, "let") == 0) token_push(tok, TOKEN_LET, buf, BHV_UNDEFINED, cursor - start);
else if (strcmp(buf, "fn") == 0) token_push(tok, TOKEN_FN, buf, BHV_UNDEFINED, cursor - start);
else if (strcmp(buf, "return") == 0) token_push(tok, TOKEN_RETURN, buf, BHV_UNDEFINED, cursor - start);
else if (strcmp(buf, "int") == 0) token_push(tok, TOKEN_IDENT_INT, buf, BHV_UNDEFINED, cursor - start); // TODO: unhardcode
else token_push(tok, TOKEN_IDENTIFIER, buf, BHV_IDENT, cursor - start);
return cursor - start;
}
// Single-character tokens and symbols
switch (input[cursor]) {
case '+': token_push(tok, TOKEN_PLUS, "+", BHV_STACK, 1); break;
case '-': token_push(tok, TOKEN_MINUS, "-", BHV_STACK, 1); break;
case '*': token_push(tok, TOKEN_MUL, "*", BHV_STACK, 1); break;
case '/': token_push(tok, TOKEN_DIV, "/", BHV_STACK, 1); break;
case '{': token_push(tok, TOKEN_LCURLY, "{", BHV_STACK, 1); break;
case '}': token_push(tok, TOKEN_RCURLY, "}", BHV_STACK, 1); break;
case ';': token_push(tok, TOKEN_SEMI, ";", BHV_STACK, 1); break;
case ':': token_push(tok, TOKEN_COLON, ":", BHV_STACK, 1); break;
case '=': token_push(tok, TOKEN_EQU, "=", BHV_STACK, 1); break;
case '(':
token_push(tok, TOKEN_LPAREN, "(", BHV_STACK, 1);
break;
case ')':
token_push(tok, TOKEN_RPAREN, ")", BHV_STACK, 1);
break;
case ',':
token_push(tok, TOKEN_COMMA, ",", BHV_STACK, 1);
break;
case ' ':
// you can skip space tokens if you don't need them
token_push(tok, TOKEN_SPACE, " ", BHV_UNDEFINED, 1);
break;
case '\n':
token_push(tok, TOKEN_NEWLINE, "\\n", BHV_UNDEFINED, 1);
break;
case '\0':
return 0; // end of input
default: {
buf[0] = input[cursor];
buf[1] = '\0';
token_push(tok, TOKEN_UNKNOWN, buf, BHV_UNDEFINED, 1);
break;
}
}
cursor++; // move forward exactly one char for symbol cases
return cursor - start;
}
Token tokenize_all(const char *input) {
Token tok;
token_init(&tok, 8);
@@ -196,15 +267,4 @@ Token tokenize_all(const char *input) {
}
int main() {
char *input = "1 + 2 * 3 print";
Token tokens = tokenize_all(input);
for (size_t i = 0; i < tokens.size; i++) {
printf("[%s] \"%s\"\n", token_type_to_string(tokens.type[i]), tokens.text[i]);
}
token_free(&tokens);
return 0;
}

441
src/parser.h Normal file
View File

@@ -0,0 +1,441 @@
#include "./lexer.h"
#define NB_IMPLEMENTATION
#include "../nb.h"
int get_prec(symbols op){
switch (op) {
case TOKEN_MUL:
case TOKEN_DIV:
return 2; break;
case TOKEN_PLUS:
case TOKEN_MINUS:
return 1; break;
default: return 0;
}
}
// parse
bool is_left_asc(symbols op){
switch (op) {
case TOKEN_MUL:
case TOKEN_DIV:
case TOKEN_PLUS:
case TOKEN_MINUS:
return true; break;
default: return false;
}
}
Token *global_tok = NULL;
typedef enum {
SYM_VAR,
SYM_FUNC,
} SymbolKind;
typedef struct {
const char* name;
size_t ret_count;
size_t arg_count;
symbols arg_types[16];
symbols ret_type;
SymbolKind symbol_kind;
bool builtin;
} Symbol;
// static Symbol builtins[] = {
// { "print", 1, 1, { TOKEN_UNKNOWN }, TOKEN_EOF, SYM_FUNC, true },
// };
typedef struct {
Symbol *symbols;
size_t size;
size_t capacity;
} SymbolTable;
typedef struct {
Token* statements;
size_t size;
size_t capacity;
} Block;
void block_init(Block *b, size_t initial_cap){
b->statements = malloc(sizeof(Token) * initial_cap);
b->capacity = initial_cap;
b->size = 0;
}
void block_append(Block *b, Token t){
// if (b->capacity == 0) {
// b->capacity = 192;
// b->size = 0;
// b->statements = (Token*)malloc(sizeof(Token)*b->capacity);
// }
if (b->size >= b->capacity) {
b->capacity *=2;
b = (Block*)realloc(b, sizeof(Token)*b->capacity);
}
b->statements[b->size] = t; // probably wrong
b->size++;
}
// static int builtin_num = sizeof(builtins)/sizeof(builtins[0]);
// static SymbolTable global_env = {
// .size = sizeof(builtins)/sizeof(builtins[0]),
// .capacity = sizeof(builtins)/sizeof(builtins[0]),
// .symbols = builtins};
Symbol *symbol_lookup(SymbolTable *table, const char *n){
for (size_t i=0; i<table->size; ++i){
if(strcmp(n, table->symbols[i].name) == 0){
return &table->symbols[i];
}
}
return NULL;
}
// fn add(x: int, y: int) int {
// return x+y;
// }
void symbol_table_init(SymbolTable *table, size_t initial_capacity) {
table->symbols = (Symbol*)malloc(sizeof(Symbol) * initial_capacity);
if (!table->symbols) {
fprintf(stderr, "symbol_table_init: malloc failed\n"); // should not happen
exit(1);
}
table->size = 0;
table->capacity = initial_capacity;
}
void symbol_table_add(SymbolTable *table, Symbol sym) {
if (table->size >= table->capacity) {
table->capacity = (table->capacity == 0) ? 8 : table->capacity * 2;
table->symbols = (Symbol*)realloc(table->symbols, sizeof(Symbol) * table->capacity);
if (!table->symbols) {
fprintf(stderr, "symbol_table_add: realloc failed\n");
exit(1);
}
}
table->symbols[table->size++] = sym;
}
void symbol_table_free(SymbolTable *table) {
free(table->symbols);
table->symbols = NULL;
table->size = 0;
table->capacity = 0;
}
Token build_rpn(Token *inp, SymbolTable *symtab) {
Token output;
Token stack;
token_init(&output, 16);
token_init(&stack, 16);
for (size_t i = 0; i < inp->size; ++i) {
symbols type = inp->type[i];
const char *text = inp->text[i];
if (type == TOKEN_IDENTIFIER && i + 1 < inp->size && inp->type[i + 1] == TOKEN_LPAREN) {
Symbol *found = symbol_lookup(symtab, text);
if (!found) {
Symbol sym = {
.name = strdup(text),
.arg_count = 0,
.ret_type = TOKEN_EOF,
.symbol_kind = SYM_FUNC,
.builtin = false
};
symbol_table_add(symtab, sym);
}
token_push(&stack, type, text, inp->behaviour[i], 0);
} else if (type == TOKEN_IDENTIFIER) {
Symbol *found = symbol_lookup(symtab, text);
if (!found) {
Symbol sym = {
.name = strdup(text),
.arg_count = 0,
.ret_type = TOKEN_UNKNOWN,
.symbol_kind = SYM_VAR,
.builtin = false
};
symbol_table_add(symtab, sym);
}
token_push(&output, type, text, inp->behaviour[i], 0);
} else if (type == TOKEN_LPAREN) {
token_push(&stack, type, text, inp->behaviour[i], 0);
} else if (type == TOKEN_RPAREN) {
while (stack.size > 0 && stack.type[stack.size - 1] != TOKEN_LPAREN) {
token_push(&output, stack.type[stack.size - 1],
stack.text[stack.size - 1],
stack.behaviour[stack.size - 1], 0);
stack.size--;
}
if (stack.size > 0 && stack.type[stack.size - 1] == TOKEN_LPAREN)
stack.size--;
if (stack.size > 0 && stack.type[stack.size - 1] == TOKEN_IDENTIFIER) {
token_push(&output, stack.type[stack.size - 1],
stack.text[stack.size - 1],
stack.behaviour[stack.size - 1], 0);
stack.size--;
}
} else if (type == TOKEN_INTEGER || type == TOKEN_FLOAT || type == TOKEN_STRING) {
token_push(&output, type, text, inp->behaviour[i], 0);
} else if (is_left_asc(type)) {
while (stack.size > 0 && stack.type[stack.size - 1] != TOKEN_LPAREN &&
(get_prec(stack.type[stack.size - 1]) > get_prec(type) ||
get_prec(stack.type[stack.size - 1]) == get_prec(type)) &&
is_left_asc(type)) {
token_push(&output, stack.type[stack.size - 1],
stack.text[stack.size - 1],
stack.behaviour[stack.size - 1], 0);
stack.size--;
}
token_push(&stack, type, text, inp->behaviour[i], 0);
}
}
while (stack.size > 0) {
token_push(&output, stack.type[stack.size - 1],
stack.text[stack.size - 1],
stack.behaviour[stack.size - 1], 0);
stack.size--;
}
token_push(&output, TOKEN_EOF, "EOF", BHV_UNDEFINED, 0);
return output;
}
void print_token(Token *tk){
for (size_t i=0; i<tk->size; ++i){
printf("TokenNum: %zu Type: %s Value: %s\n", i, tk->tktype[i], tk->text[i]);
}
}
void skip_space(Token *inp, size_t *idx){
while (inp->type[*idx] == TOKEN_SPACE || inp->type[*idx] == TOKEN_NEWLINE) (*idx)++;
}
Token slice_token(Token *inp, size_t a, size_t z){ // probably should be implemented in lexer but not bothered
Token t = {0};
token_init(&t, z-a+1);
for (size_t i=a; i<z; ++i){
token_push(&t, inp->type[i], inp->text[i], inp->behaviour[i], inp->cursor_skip[i]);
}
return t;
}
Token parse_statement(Token *inp, size_t *idx, SymbolTable *sym){
skip_space(inp, idx);
if (inp->type[*idx] == TOKEN_LET){
(*idx)++;
skip_space(inp, idx);
if (inp->type[*idx] != TOKEN_IDENTIFIER){
fprintf(stderr, "Expected Identifier after 'let'");
exit(1);
}
char *var_name = inp->text[*idx];
(*idx)++;
skip_space(inp, idx);
if (inp->type[*idx] != TOKEN_EQU){
fprintf(stderr, "Expected '=' after identifier");
exit(1);
}
(*idx)++;
skip_space(inp, idx);
size_t expr_start = *idx;
while (inp->type[*idx] != TOKEN_SEMI && inp->type[*idx] != TOKEN_EOF){
(*idx)++;
}
size_t expr_end = *idx;
Token expr = slice_token(inp, expr_start, expr_end);
Token rpn = build_rpn(&expr, sym);
Symbol exprn =
{
.name=strdup(var_name),
.symbol_kind = SYM_VAR,
.builtin = false,
.ret_type = TOKEN_UNKNOWN
};
symbol_table_add(sym, exprn);
skip_space(inp, idx);
if (inp->type[*idx] == TOKEN_SEMI) {
(*idx)++;
skip_space(inp, idx);
return rpn;
}
} else if (inp->type[*idx] == TOKEN_RETURN) {
(*idx)++;
skip_space(inp, idx);
size_t expr_start = *idx;
while (inp->type[*idx] != TOKEN_SEMI && inp->type[*idx] != TOKEN_EOF){
(*idx)++;
}
size_t expr_end = *idx;
Token expr = slice_token(inp, expr_start, expr_end);
Token rpn = build_rpn(&expr, sym);
(*idx)++;
if (inp->type[*idx] == TOKEN_SEMI) {
(*idx)++;
skip_space(inp, idx);
return rpn;
}
} else {
fprintf(stderr, "Unexpected statement '%s\n'", inp->text[*idx]);
exit(1);
}
}
Block *parse_func_def(Token *inp, size_t *idx, SymbolTable *sym) {
skip_space(inp, idx);
if (inp->type[*idx] != TOKEN_FN) {
fprintf(stderr, "Expected 'fn'\n");
exit(1);
}
(*idx)++;
skip_space(inp, idx);
if (inp->type[*idx] != TOKEN_IDENTIFIER) {
fprintf(stderr, "Expected function name after 'fn'\n");
exit(1);
}
const char *fname = inp->text[*idx];
(*idx)++;
skip_space(inp, idx);
if (inp->type[*idx] != TOKEN_LPAREN) {
fprintf(stderr, "Expected '('\n");
exit(1);
}
(*idx)++;
skip_space(inp, idx);
Symbol func = {0};
func.name = strdup(fname);
func.symbol_kind = SYM_FUNC;
func.ret_type = TOKEN_UNKNOWN;
func.arg_count = 0;
func.builtin = false;
while (inp->type[*idx] != TOKEN_RPAREN) {
skip_space(inp, idx);
if (inp->type[*idx] != TOKEN_IDENTIFIER) {
fprintf(stderr, "Expected argument name\n");
exit(1);
}
(*idx)++;
skip_space(inp, idx);
if (inp->type[*idx] != TOKEN_COLON) {
fprintf(stderr, "Expected ':' after argument name\n");
exit(1);
}
(*idx)++;
skip_space(inp, idx);
if (inp->type[*idx] != TOKEN_IDENT_INT) {
fprintf(stderr, "Expected type after ':'\n");
exit(1);
}
func.arg_types[func.arg_count++] = inp->type[*idx];
(*idx)++;
skip_space(inp, idx);
if (inp->type[*idx] == TOKEN_COMMA) {
(*idx)++;
continue;
} else if (inp->type[*idx] == TOKEN_RPAREN) {
break;
} else {
fprintf(stderr, "Expected ',' or ')' after argument type\n");
exit(1);
}
}
(*idx)++;
skip_space(inp, idx);
if (inp->type[*idx] != TOKEN_IDENT_INT) {
fprintf(stderr, "Expected return type after ')'\n");
exit(1);
}
func.ret_type = inp->type[*idx];
(*idx)++;
skip_space(inp, idx);
if (inp->type[*idx] != TOKEN_LCURLY) {
fprintf(stderr, "Expected '{'\n");
exit(1);
}
(*idx)++;
skip_space(inp, idx);
Block *block = (Block*)malloc(sizeof(Block));
block_init(block, 55);
Token statement = {0};
while (inp->type[*idx] != TOKEN_RCURLY && inp->type[*idx] != TOKEN_EOF) {
statement = parse_statement(inp, idx, sym);
skip_space(inp, idx);
block_append(block, statement);
}
if (inp->type[*idx] != TOKEN_RCURLY) {
fprintf(stderr, "Expected '}' at end of function\n");
exit(1);
}
(*idx)++;
symbol_table_add(sym, func);
return block; // TODO: return block aka multiple statements
}
// int main(int argc, char **argv){
// if (argc < 2) return -1;
// const char ts[] = "\"hello\" hi + 2 2.312";
// const char math[] = "print(((1+2)*6)/18)"; // = 1
// const char print[] = "print(\"hello\")";
// const char simple[] = "1 + ( 3 + 3 )/4+4*3";
// char* read = nb_read_file(argv[1]);
// Token tk = tokenize_all(read);
// printf("INPUT: %s\n", read);
// SymbolTable table = {0};
// symbol_table_init(&table, 32);
// Token rpn = build_rpn(&tk, &table);
// print_token(&rpn);
// }

200
src/vm.h Normal file
View File

@@ -0,0 +1,200 @@
#include "parser.h"
#include <string.h>
typedef enum {
OP_PUSH_INT,
OP_PUSH_FLOAT,
OP_PUSH_STRING,
OP_ADD,
OP_SUB,
OP_MUL,
OP_DIV,
OP_PRINT,
OP_HALT
} OPcode;
typedef struct {
OPcode op;
double num;
char *strlit;
} instruct;
typedef enum {
VAL_INT,
VAL_FLOAT,
VAL_STRING,
} ValueType;
typedef struct {
ValueType type;
union {
long i;
double f;
char *s;
};
} Value;
typedef struct {
instruct *program;
size_t inst_p;
size_t program_size;
Value stack[256];
size_t st_p;
bool running;
} VM;
instruct *rpn_to_bytecode(Token *rpn, size_t *out){
size_t cap = 64;
size_t size = 0;
instruct *prog = malloc(sizeof(instruct) * cap);
for (size_t i=0; i<rpn->size; ++i){
symbols t = rpn->type[i];
const char *text = rpn->text[i];
instruct ins = {0};
switch (t){
case TOKEN_INTEGER: ins.op = OP_PUSH_INT; ins.num = atof(text); break;
case TOKEN_FLOAT: ins.op = OP_PUSH_FLOAT; ins.num = atof(text); break;
case TOKEN_STRING: ins.op = OP_PUSH_STRING; ins.strlit = strdup(text); break;
case TOKEN_PLUS: ins.op = OP_ADD; break;
case TOKEN_MINUS: ins.op = OP_SUB; break;
case TOKEN_MUL: ins.op = OP_MUL; break;
case TOKEN_DIV: ins.op = OP_DIV; break;
case TOKEN_IDENTIFIER:
if (strcmp(text, "print") == 0) {
ins.op = OP_PRINT;
} else {
printf("[WARNING] Uknown Identifier '%s'\n", text);
}
break; //TODO: unhardcode this
case TOKEN_EOF: ins.op = OP_HALT; break;
default: continue;
}
if (size >= cap){
cap*=2;
prog = realloc(prog, sizeof(instruct)*cap);
}
prog[size++] = ins;
}
*out = size;
return prog;
}
void vm_run(VM *vm) {
vm->running = true;
vm->inst_p = 0;
vm->st_p = 0;
while (vm->running && vm->inst_p < vm->program_size) {
instruct ins = vm->program[vm->inst_p++];
switch (ins.op) {
case OP_PUSH_INT: {
Value v = { .type = VAL_INT, .i = ins.num };
vm->stack[vm->st_p++] = v;
} break;
case OP_PUSH_FLOAT: {
Value v = { .type = VAL_FLOAT, .f = ins.num };
vm->stack[vm->st_p++] = v;
} break;
case OP_PUSH_STRING: {
Value v = { .type = VAL_STRING, .s = strdup(ins.strlit) };
vm->stack[vm->st_p++] = v;
} break;
case OP_ADD:
case OP_SUB:
case OP_MUL:
case OP_DIV: {
if (vm->st_p < 2) {
fprintf(stderr, "not enough values on stack.\n");
vm->running = false;
break;
}
Value b = vm->stack[--vm->st_p];
Value a = vm->stack[--vm->st_p];
double av = (a.type == VAL_INT) ? a.i : a.f;
double bv = (b.type == VAL_INT) ? b.i : b.f;
double result = 0;
switch (ins.op) {
case OP_ADD: result = av + bv; break;
case OP_SUB: result = av - bv; break;
case OP_MUL: result = av * bv; break;
case OP_DIV:
if (bv == 0) {
fprintf(stderr, "division by zero.\n");
vm->running = false;
} else result = av / bv;
break;
default: break;
}
Value v = { .type = VAL_FLOAT, .f = result };
vm->stack[vm->st_p++] = v;
} break;
case OP_PRINT: {
if (vm->st_p == 0) {
fprintf(stderr, "cant print an empty stack\n");
vm->running = false;
break;
}
Value v = vm->stack[--vm->st_p];
switch (v.type) {
case VAL_INT: printf("%ld\n", v.i); break;
case VAL_FLOAT: printf("%g\n", v.f); break;
case VAL_STRING:
printf("%s\n", v.s);
free(v.s);
break;
}
} break;
case OP_HALT:
vm->running = false;
break;
default:
fprintf(stderr, "unknown opcode %d\n", ins.op);
vm->running = false;
break;
}
}
}
char *op_to_str(OPcode o){
switch (o){
case OP_ADD: return "OP_ADD"; break;
case OP_SUB: return "OP_SUB"; break;
case OP_MUL: return "OP_MUL"; break;
case OP_DIV: return "OP_DIV"; break;
case OP_PRINT: return "OP_PRINT"; break;
case OP_HALT: return "OP_HALT"; break;
default: break;
}
return NULL;
}
void emit_bytecode(VM *v){
size_t i =0;
while (i < v->program_size){
if (v->program[i].op == OP_PUSH_INT) printf("OP_PUSH_INT(%f)\n", v->program[i].num);
else if (v->program[i].op == OP_PUSH_FLOAT) printf("OP_PUSH_FLOAT(%f)\n", v->program[i].num);
else if (v->program[i].op == OP_PUSH_STRING) printf("OP_PUSH_STRING(\"%s\")\n", v->program[i].strlit);
else printf("%s\n", op_to_str(v->program[i].op));
i++;
}
printf("\n");
}