basic VM working
This commit is contained in:
@@ -8,12 +8,12 @@ int main(int argc, char **argv){
|
||||
|
||||
nb_append(&cmd, "gcc");
|
||||
nb_append(&cmd, "-Wall -Wextra");
|
||||
nb_append(&cmd, "lexer.c");
|
||||
nb_append(&cmd, "-o lex");
|
||||
nb_append(&cmd, "vm.c");
|
||||
nb_append(&cmd, "-o vm");
|
||||
nb_cmd(&cmd);
|
||||
|
||||
|
||||
nb_append(&cmd, "./lex");
|
||||
nb_append(&cmd, "./vm");
|
||||
for (int i=1; i<argc; ++i){
|
||||
nb_append(&cmd, argv[i]);
|
||||
}
|
||||
|
||||
85
lexer.h
85
lexer.h
@@ -22,7 +22,11 @@ typedef enum {
|
||||
TOKEN_NEWLINE,
|
||||
TOKEN_LPAREN,
|
||||
TOKEN_RPAREN,
|
||||
TOKEN_COMMA
|
||||
TOKEN_COMMA,
|
||||
TOKEN_LCURLY,
|
||||
TOKEN_RCURLY,
|
||||
TOKEN_COLON,
|
||||
TOKEN_SEMI
|
||||
} symbols;
|
||||
|
||||
typedef enum {
|
||||
@@ -52,6 +56,10 @@ char *token_type_to_string(symbols type) {
|
||||
case TOKEN_EOF: return "TOKEN_EOF";
|
||||
case TOKEN_NEWLINE: return "TOKEN_NEWLINE";
|
||||
case TOKEN_IDENTIFIER: return "TOKEN_IDENTIFIER";
|
||||
case TOKEN_LCURLY: return "TOKEN_LCURLY";
|
||||
case TOKEN_RCURLY: return "TOKEN_RCURLY";
|
||||
case TOKEN_SEMI: return "TOKEN_SEMI";
|
||||
case TOKEN_COLON: return "TOKEN_COLON";
|
||||
case TOKEN_UNKNOWN: return "TOKEN_UNKNOWN";
|
||||
// default: return "UNKNOWN_SYMBOL";
|
||||
}
|
||||
@@ -141,61 +149,90 @@ int str_to_int(char *strint) { return atoi(strint); }
|
||||
float str_to_float(char *strif) { return strtof(strif, NULL); }
|
||||
|
||||
|
||||
|
||||
size_t read_from_tok(Token *tok, const char *input, size_t cursor) {
|
||||
char buf[64];
|
||||
size_t start = cursor;
|
||||
size_t i = 0;
|
||||
|
||||
if (isdigit(input[cursor])) {
|
||||
if (isdigit((unsigned char)input[cursor])) {
|
||||
int dots_seen = 0;
|
||||
// dont allow for 3.3.3 (example)
|
||||
while (isdigit(input[cursor]) || input[cursor] == '.') {
|
||||
while (isdigit((unsigned char)input[cursor]) || input[cursor] == '.') {
|
||||
if (input[cursor] == '.') dots_seen++;
|
||||
buf[i++] = input[cursor++];
|
||||
if (i >= sizeof(buf) - 1) break;
|
||||
}
|
||||
buf[i] = '\0';
|
||||
if (dots_seen == 0) {
|
||||
token_push(tok, TOKEN_INTEGER, buf, BHV_NUMBER, cursor - start);
|
||||
} else {
|
||||
token_push(tok, TOKEN_FLOAT, buf, BHV_FLOAT, cursor - start);
|
||||
token_push(tok, dots_seen == 0 ? TOKEN_INTEGER : TOKEN_FLOAT,
|
||||
buf, dots_seen == 0 ? BHV_NUMBER : BHV_FLOAT,
|
||||
cursor - start);
|
||||
return cursor - start; // all digits handled
|
||||
}
|
||||
} else if (input[cursor] == '"'){
|
||||
cursor++;
|
||||
|
||||
else if (input[cursor] == '"') {
|
||||
cursor++; // skip opening quote
|
||||
while (input[cursor] != '"' && input[cursor] != '\0') {
|
||||
buf[i++] = input[cursor++];
|
||||
if (i >= sizeof(buf) - 1) break;
|
||||
}
|
||||
buf[i] = '\0';
|
||||
if (input[cursor] == '"') cursor ++;
|
||||
if (input[cursor] == '"') cursor++; // skip closing quote
|
||||
token_push(tok, TOKEN_STRING, buf, BHV_STRING, cursor - start);
|
||||
} else if (isalpha(input[cursor])) { // should be after checking for strlit
|
||||
while (isalpha(input[cursor])) {
|
||||
return cursor - start;
|
||||
}
|
||||
|
||||
else if (isalpha((unsigned char)input[cursor])) {
|
||||
while (isalpha((unsigned char)input[cursor])) {
|
||||
buf[i++] = input[cursor++];
|
||||
if (i >= sizeof(buf) - 1) break;
|
||||
}
|
||||
buf[i] = '\0';
|
||||
token_push(tok, TOKEN_IDENTIFIER, buf, BHV_IDENT, cursor - start);
|
||||
//refactor into separate function to use in parsing functions and definitions
|
||||
} else {
|
||||
buf[0] = input[cursor];
|
||||
buf[1] = '\0';
|
||||
return cursor - start;
|
||||
}
|
||||
|
||||
// Single-character tokens and symbols
|
||||
switch (input[cursor]) {
|
||||
case '+': token_push(tok, TOKEN_PLUS, "+", BHV_STACK, 1); break;
|
||||
case '-': token_push(tok, TOKEN_MINUS, "-", BHV_STACK, 1); break;
|
||||
case '*': token_push(tok, TOKEN_MUL, "*", BHV_STACK, 1); break;
|
||||
case '/': token_push(tok, TOKEN_DIV, "/", BHV_STACK, 1); break;
|
||||
case ' ': token_push(tok, TOKEN_SPACE, " ", BHV_UNDEFINED, 1); break;
|
||||
case '\n': token_push(tok, TOKEN_NEWLINE, "\\n", BHV_UNDEFINED, 1); break;
|
||||
case '(': token_push(tok, TOKEN_LPAREN, "(", BHV_STACK, 1); break;
|
||||
case ')': token_push(tok, TOKEN_RPAREN, ")", BHV_STACK, 1); break;
|
||||
case ',': token_push(tok, TOKEN_COMMA, ",", BHV_STACK, 1); break;
|
||||
default: token_push(tok, TOKEN_UNKNOWN, buf, BHV_UNDEFINED, 1); break;
|
||||
case '{': token_push(tok, TOKEN_LCURLY, "{", BHV_STACK, 1); break;
|
||||
case '}': token_push(tok, TOKEN_RCURLY, "}", BHV_STACK, 1); break;
|
||||
case ';': token_push(tok, TOKEN_SEMI, ";", BHV_STACK, 1); break;
|
||||
case ':': token_push(tok, TOKEN_COLON, ":", BHV_STACK, 1); break;
|
||||
|
||||
case '(':
|
||||
token_push(tok, TOKEN_LPAREN, "(", BHV_STACK, 1);
|
||||
break;
|
||||
case ')':
|
||||
token_push(tok, TOKEN_RPAREN, ")", BHV_STACK, 1);
|
||||
break;
|
||||
case ',':
|
||||
token_push(tok, TOKEN_COMMA, ",", BHV_STACK, 1);
|
||||
break;
|
||||
case ' ':
|
||||
// you can skip space tokens if you don't need them
|
||||
token_push(tok, TOKEN_SPACE, " ", BHV_UNDEFINED, 1);
|
||||
break;
|
||||
case '\n':
|
||||
token_push(tok, TOKEN_NEWLINE, "\\n", BHV_UNDEFINED, 1);
|
||||
break;
|
||||
case '\0':
|
||||
return 0; // end of input
|
||||
default: {
|
||||
buf[0] = input[cursor];
|
||||
buf[1] = '\0';
|
||||
token_push(tok, TOKEN_UNKNOWN, buf, BHV_UNDEFINED, 1);
|
||||
break;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
|
||||
cursor++; // move forward exactly one char for symbol cases
|
||||
return cursor - start;
|
||||
}
|
||||
|
||||
|
||||
Token tokenize_all(const char *input) {
|
||||
Token tok;
|
||||
token_init(&tok, 8);
|
||||
|
||||
219
parser.h
Normal file
219
parser.h
Normal file
@@ -0,0 +1,219 @@
|
||||
#include "./lexer.h"
|
||||
#define NB_IMPLEMENTATION
|
||||
#include "./nb.h"
|
||||
|
||||
int get_prec(symbols op){
|
||||
switch (op) {
|
||||
case TOKEN_MUL:
|
||||
case TOKEN_DIV:
|
||||
return 2; break;
|
||||
case TOKEN_PLUS:
|
||||
case TOKEN_MINUS:
|
||||
return 1; break;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
// parse
|
||||
|
||||
bool is_left_asc(symbols op){
|
||||
switch (op) {
|
||||
case TOKEN_MUL:
|
||||
case TOKEN_DIV:
|
||||
case TOKEN_PLUS:
|
||||
case TOKEN_MINUS:
|
||||
return true; break;
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
|
||||
Token *global_tok = NULL;
|
||||
|
||||
typedef enum {
|
||||
SYM_VAR,
|
||||
SYM_FUNC,
|
||||
} SymbolKind;
|
||||
|
||||
typedef struct {
|
||||
const char* name;
|
||||
size_t ret_count;
|
||||
size_t arg_count;
|
||||
symbols arg_types[16];
|
||||
symbols ret_type;
|
||||
SymbolKind symbol_kind;
|
||||
bool builtin;
|
||||
} Symbol;
|
||||
|
||||
|
||||
static Symbol builtins[] = {
|
||||
{ "print", 1, 1, { TOKEN_UNKNOWN }, TOKEN_EOF, SYM_FUNC, true },
|
||||
};
|
||||
|
||||
|
||||
typedef struct {
|
||||
Symbol *symbols;
|
||||
size_t size;
|
||||
size_t capacity;
|
||||
} SymbolTable;
|
||||
|
||||
|
||||
static int builtin_num = sizeof(builtins)/sizeof(builtins[0]);
|
||||
|
||||
static SymbolTable global_env = {
|
||||
.size = sizeof(builtins)/sizeof(builtins[0]),
|
||||
.capacity = sizeof(builtins)/sizeof(builtins[0]),
|
||||
.symbols = builtins};
|
||||
|
||||
|
||||
Symbol *symbol_lookup(SymbolTable *table, const char *n){
|
||||
for (size_t i=0; i<table->size; ++i){
|
||||
if(strcmp(n, table->symbols[i].name) == 0){
|
||||
return &table->symbols[i];
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// fn add(x: int, y: int) int {
|
||||
// return x+y;
|
||||
// }
|
||||
|
||||
|
||||
void symbol_table_init(SymbolTable *table, size_t initial_capacity) {
|
||||
table->symbols = malloc(sizeof(Symbol) * initial_capacity);
|
||||
if (!table->symbols) {
|
||||
fprintf(stderr, "symbol_table_init: malloc failed\n");
|
||||
exit(1);
|
||||
}
|
||||
table->size = 0;
|
||||
table->capacity = initial_capacity;
|
||||
}
|
||||
|
||||
void symbol_table_add(SymbolTable *table, Symbol sym) {
|
||||
if (table->size >= table->capacity) {
|
||||
table->capacity = (table->capacity == 0) ? 8 : table->capacity * 2;
|
||||
table->symbols = realloc(table->symbols, sizeof(Symbol) * table->capacity);
|
||||
if (!table->symbols) {
|
||||
fprintf(stderr, "symbol_table_add: realloc failed\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
table->symbols[table->size++] = sym;
|
||||
}
|
||||
|
||||
|
||||
void symbol_table_free(SymbolTable *table) {
|
||||
free(table->symbols);
|
||||
table->symbols = NULL;
|
||||
table->size = 0;
|
||||
table->capacity = 0;
|
||||
}
|
||||
|
||||
|
||||
Token build_rpn(Token *inp, SymbolTable *symtab) {
|
||||
Token output;
|
||||
Token stack;
|
||||
|
||||
token_init(&output, 16);
|
||||
token_init(&stack, 16);
|
||||
|
||||
for (size_t i = 0; i < inp->size; ++i) {
|
||||
symbols type = inp->type[i];
|
||||
const char *text = inp->text[i];
|
||||
|
||||
if (type == TOKEN_IDENTIFIER && i + 1 < inp->size && inp->type[i + 1] == TOKEN_LPAREN) {
|
||||
Symbol *found = symbol_lookup(symtab, text);
|
||||
if (!found) {
|
||||
Symbol sym = {
|
||||
.name = strdup(text),
|
||||
.arg_count = 0,
|
||||
.ret_type = TOKEN_EOF,
|
||||
.symbol_kind = SYM_FUNC,
|
||||
.builtin = false
|
||||
};
|
||||
symbol_table_add(symtab, sym);
|
||||
}
|
||||
token_push(&stack, type, text, inp->behaviour[i], 0);
|
||||
} else if (type == TOKEN_IDENTIFIER) {
|
||||
Symbol *found = symbol_lookup(symtab, text);
|
||||
if (!found) {
|
||||
Symbol sym = {
|
||||
.name = strdup(text),
|
||||
.arg_count = 0,
|
||||
.ret_type = TOKEN_UNKNOWN,
|
||||
.symbol_kind = SYM_VAR,
|
||||
.builtin = false
|
||||
};
|
||||
symbol_table_add(symtab, sym);
|
||||
}
|
||||
token_push(&output, type, text, inp->behaviour[i], 0);
|
||||
} else if (type == TOKEN_LPAREN) {
|
||||
token_push(&stack, type, text, inp->behaviour[i], 0);
|
||||
} else if (type == TOKEN_RPAREN) {
|
||||
while (stack.size > 0 && stack.type[stack.size - 1] != TOKEN_LPAREN) {
|
||||
token_push(&output, stack.type[stack.size - 1],
|
||||
stack.text[stack.size - 1],
|
||||
stack.behaviour[stack.size - 1], 0);
|
||||
stack.size--;
|
||||
}
|
||||
if (stack.size > 0 && stack.type[stack.size - 1] == TOKEN_LPAREN)
|
||||
stack.size--;
|
||||
if (stack.size > 0 && stack.type[stack.size - 1] == TOKEN_IDENTIFIER) {
|
||||
token_push(&output, stack.type[stack.size - 1],
|
||||
stack.text[stack.size - 1],
|
||||
stack.behaviour[stack.size - 1], 0);
|
||||
stack.size--;
|
||||
}
|
||||
} else if (type == TOKEN_INTEGER || type == TOKEN_FLOAT || type == TOKEN_STRING) {
|
||||
token_push(&output, type, text, inp->behaviour[i], 0);
|
||||
} else if (is_left_asc(type)) {
|
||||
while (stack.size > 0 && stack.type[stack.size - 1] != TOKEN_LPAREN &&
|
||||
(get_prec(stack.type[stack.size - 1]) > get_prec(type) ||
|
||||
get_prec(stack.type[stack.size - 1]) == get_prec(type)) &&
|
||||
is_left_asc(type)) {
|
||||
token_push(&output, stack.type[stack.size - 1],
|
||||
stack.text[stack.size - 1],
|
||||
stack.behaviour[stack.size - 1], 0);
|
||||
stack.size--;
|
||||
}
|
||||
token_push(&stack, type, text, inp->behaviour[i], 0);
|
||||
}
|
||||
}
|
||||
|
||||
while (stack.size > 0) {
|
||||
token_push(&output, stack.type[stack.size - 1],
|
||||
stack.text[stack.size - 1],
|
||||
stack.behaviour[stack.size - 1], 0);
|
||||
stack.size--;
|
||||
}
|
||||
|
||||
token_push(&output, TOKEN_EOF, "EOF", BHV_UNDEFINED, 0);
|
||||
return output;
|
||||
}
|
||||
|
||||
void print_token(Token *tk){
|
||||
for (size_t i=0; i<tk->size; ++i){
|
||||
printf("TokenNum: %zu Type: %s Value: %s\n", i, tk->tktype[i], tk->text[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// int main(int argc, char **argv){
|
||||
// if (argc < 2) return -1;
|
||||
// const char ts[] = "\"hello\" hi + 2 2.312";
|
||||
// const char math[] = "print(((1+2)*6)/18)"; // = 1
|
||||
// const char print[] = "print(\"hello\")";
|
||||
// const char simple[] = "1 + ( 3 + 3 )/4+4*3";
|
||||
|
||||
|
||||
// char* read = nb_read_file(argv[1]);
|
||||
// Token tk = tokenize_all(read);
|
||||
// printf("INPUT: %s\n", read);
|
||||
// SymbolTable table = {0};
|
||||
// symbol_table_init(&table, 32);
|
||||
|
||||
|
||||
// Token rpn = build_rpn(&tk, &table);
|
||||
// print_token(&rpn);
|
||||
// }
|
||||
194
parser3.c
194
parser3.c
@@ -1,4 +1,6 @@
|
||||
#include "./lexer.h"
|
||||
#define NB_IMPLEMENTATION
|
||||
#include "./nb.h"
|
||||
|
||||
int get_prec(symbols op){
|
||||
switch (op) {
|
||||
@@ -24,18 +26,194 @@ bool is_left_asc(symbols op){
|
||||
}
|
||||
}
|
||||
|
||||
void build_rpn();
|
||||
Token *global_tok = NULL;
|
||||
|
||||
typedef enum {
|
||||
SYM_VAR,
|
||||
SYM_FUNC,
|
||||
} SymbolKind;
|
||||
|
||||
typedef struct {
|
||||
const char* name;
|
||||
size_t ret_count;
|
||||
size_t arg_count;
|
||||
symbols arg_types[16];
|
||||
symbols ret_type;
|
||||
SymbolKind symbol_kind;
|
||||
bool builtin;
|
||||
} Symbol;
|
||||
|
||||
|
||||
static Symbol builtins[] = {
|
||||
{ "print", 1, 1, { TOKEN_UNKNOWN }, TOKEN_EOF, SYM_FUNC, true },
|
||||
};
|
||||
|
||||
|
||||
typedef struct {
|
||||
Symbol *symbols;
|
||||
size_t size;
|
||||
size_t capacity;
|
||||
} SymbolTable;
|
||||
|
||||
int main(void){
|
||||
const char ts[] = "\"hello\" hi + 2";
|
||||
const char math[] = "((1+2)*6)/18"; // = 1
|
||||
Token tk = tokenize_all(math);
|
||||
for (size_t i=0; i<tk.size; ++i){
|
||||
printf("TokenNum: %zu Type: %s Value: %s\n", i, tk.tktype[i], tk.text[i]);
|
||||
|
||||
static int builtin_num = sizeof(builtins)/sizeof(builtins[0]);
|
||||
|
||||
static SymbolTable global_env = {
|
||||
.size = sizeof(builtins)/sizeof(builtins[0]),
|
||||
.capacity = sizeof(builtins)/sizeof(builtins[0]),
|
||||
.symbols = builtins};
|
||||
|
||||
|
||||
Symbol *symbol_lookup(SymbolTable *table, const char *n){
|
||||
for (size_t i=0; i<table->size; ++i){
|
||||
if(strcmp(n, table->symbols[i].name) == 0){
|
||||
return &table->symbols[i];
|
||||
}
|
||||
// printf("token count: %zu\n", tk.size);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// fn add(x: int, y: int) int {
|
||||
// return x+y;
|
||||
// }
|
||||
|
||||
|
||||
void symbol_table_init(SymbolTable *table, size_t initial_capacity) {
|
||||
table->symbols = malloc(sizeof(Symbol) * initial_capacity);
|
||||
if (!table->symbols) {
|
||||
fprintf(stderr, "symbol_table_init: malloc failed\n");
|
||||
exit(1);
|
||||
}
|
||||
table->size = 0;
|
||||
table->capacity = initial_capacity;
|
||||
}
|
||||
|
||||
void symbol_table_add(SymbolTable *table, Symbol sym) {
|
||||
if (table->size >= table->capacity) {
|
||||
table->capacity = (table->capacity == 0) ? 8 : table->capacity * 2;
|
||||
table->symbols = realloc(table->symbols, sizeof(Symbol) * table->capacity);
|
||||
if (!table->symbols) {
|
||||
fprintf(stderr, "symbol_table_add: realloc failed\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
table->symbols[table->size++] = sym;
|
||||
}
|
||||
|
||||
|
||||
void symbol_table_free(SymbolTable *table) {
|
||||
free(table->symbols);
|
||||
table->symbols = NULL;
|
||||
table->size = 0;
|
||||
table->capacity = 0;
|
||||
}
|
||||
|
||||
|
||||
Token build_rpn(Token *inp, SymbolTable *symtab) {
|
||||
Token output;
|
||||
Token stack;
|
||||
|
||||
token_init(&output, 16);
|
||||
token_init(&stack, 16);
|
||||
|
||||
for (size_t i = 0; i < inp->size; ++i) {
|
||||
symbols type = inp->type[i];
|
||||
const char *text = inp->text[i];
|
||||
|
||||
if (type == TOKEN_IDENTIFIER && i + 1 < inp->size && inp->type[i + 1] == TOKEN_LPAREN) {
|
||||
Symbol *found = symbol_lookup(symtab, text);
|
||||
if (!found) {
|
||||
Symbol sym = {
|
||||
.name = strdup(text),
|
||||
.arg_count = 0,
|
||||
.ret_type = TOKEN_EOF,
|
||||
.symbol_kind = SYM_FUNC,
|
||||
.builtin = false
|
||||
};
|
||||
symbol_table_add(symtab, sym);
|
||||
}
|
||||
token_push(&stack, type, text, inp->behaviour[i], 0);
|
||||
} else if (type == TOKEN_IDENTIFIER) {
|
||||
Symbol *found = symbol_lookup(symtab, text);
|
||||
if (!found) {
|
||||
Symbol sym = {
|
||||
.name = strdup(text),
|
||||
.arg_count = 0,
|
||||
.ret_type = TOKEN_UNKNOWN,
|
||||
.symbol_kind = SYM_VAR,
|
||||
.builtin = false
|
||||
};
|
||||
symbol_table_add(symtab, sym);
|
||||
}
|
||||
token_push(&output, type, text, inp->behaviour[i], 0);
|
||||
} else if (type == TOKEN_LPAREN) {
|
||||
token_push(&stack, type, text, inp->behaviour[i], 0);
|
||||
} else if (type == TOKEN_RPAREN) {
|
||||
while (stack.size > 0 && stack.type[stack.size - 1] != TOKEN_LPAREN) {
|
||||
token_push(&output, stack.type[stack.size - 1],
|
||||
stack.text[stack.size - 1],
|
||||
stack.behaviour[stack.size - 1], 0);
|
||||
stack.size--;
|
||||
}
|
||||
if (stack.size > 0 && stack.type[stack.size - 1] == TOKEN_LPAREN)
|
||||
stack.size--;
|
||||
if (stack.size > 0 && stack.type[stack.size - 1] == TOKEN_IDENTIFIER) {
|
||||
token_push(&output, stack.type[stack.size - 1],
|
||||
stack.text[stack.size - 1],
|
||||
stack.behaviour[stack.size - 1], 0);
|
||||
stack.size--;
|
||||
}
|
||||
} else if (type == TOKEN_INTEGER || type == TOKEN_FLOAT || type == TOKEN_STRING) {
|
||||
token_push(&output, type, text, inp->behaviour[i], 0);
|
||||
} else if (is_left_asc(type)) {
|
||||
while (stack.size > 0 && stack.type[stack.size - 1] != TOKEN_LPAREN &&
|
||||
(get_prec(stack.type[stack.size - 1]) > get_prec(type) ||
|
||||
get_prec(stack.type[stack.size - 1]) == get_prec(type)) &&
|
||||
is_left_asc(type)) {
|
||||
token_push(&output, stack.type[stack.size - 1],
|
||||
stack.text[stack.size - 1],
|
||||
stack.behaviour[stack.size - 1], 0);
|
||||
stack.size--;
|
||||
}
|
||||
token_push(&stack, type, text, inp->behaviour[i], 0);
|
||||
}
|
||||
}
|
||||
|
||||
while (stack.size > 0) {
|
||||
token_push(&output, stack.type[stack.size - 1],
|
||||
stack.text[stack.size - 1],
|
||||
stack.behaviour[stack.size - 1], 0);
|
||||
stack.size--;
|
||||
}
|
||||
|
||||
token_push(&output, TOKEN_EOF, "EOF", BHV_UNDEFINED, 0);
|
||||
return output;
|
||||
}
|
||||
|
||||
void print_token(Token *tk){
|
||||
for (size_t i=0; i<tk->size; ++i){
|
||||
printf("TokenNum: %zu Type: %s Value: %s\n", i, tk->tktype[i], tk->text[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int main(int argc, char **argv){
|
||||
if (argc < 2) return -1;
|
||||
const char ts[] = "\"hello\" hi + 2 2.312";
|
||||
const char math[] = "print(((1+2)*6)/18)"; // = 1
|
||||
const char print[] = "print(\"hello\")";
|
||||
const char simple[] = "1 + ( 3 + 3 )/4+4*3";
|
||||
|
||||
|
||||
char* read = nb_read_file(argv[1]);
|
||||
Token tk = tokenize_all(read);
|
||||
printf("INPUT: %s\n", read);
|
||||
SymbolTable table = {0};
|
||||
symbol_table_init(&table, 32);
|
||||
|
||||
|
||||
Token rpn = build_rpn(&tk, &table);
|
||||
print_token(&rpn);
|
||||
}
|
||||
|
||||
204
vm.c
Normal file
204
vm.c
Normal file
@@ -0,0 +1,204 @@
|
||||
#include "parser.h"
|
||||
#include <string.h>
|
||||
|
||||
typedef enum {
|
||||
OP_PUSH_INT,
|
||||
OP_PUSH_FLOAT,
|
||||
OP_PUSH_STRING,
|
||||
OP_ADD,
|
||||
OP_SUB,
|
||||
OP_MUL,
|
||||
OP_DIV,
|
||||
OP_PRINT,
|
||||
OP_HALT
|
||||
} OPcode;
|
||||
|
||||
typedef struct {
|
||||
OPcode op;
|
||||
double num;
|
||||
char *strlit;
|
||||
} instruct;
|
||||
|
||||
typedef enum {
|
||||
VAL_INT,
|
||||
VAL_FLOAT,
|
||||
VAL_STRING,
|
||||
} ValueType;
|
||||
|
||||
typedef struct {
|
||||
ValueType type;
|
||||
union {
|
||||
long i;
|
||||
double f;
|
||||
char *s;
|
||||
};
|
||||
} Value;
|
||||
|
||||
typedef struct {
|
||||
instruct *program;
|
||||
size_t inst_p;
|
||||
size_t program_size;
|
||||
Value stack[256];
|
||||
size_t st_p;
|
||||
bool running;
|
||||
} VM;
|
||||
|
||||
instruct *rpn_to_bytecode(Token *rpn, size_t *out){
|
||||
size_t cap = 64;
|
||||
size_t size = 0;
|
||||
|
||||
instruct *prog = malloc(sizeof(instruct) * cap);
|
||||
|
||||
for (size_t i=0; i<rpn->size; ++i){
|
||||
symbols t = rpn->type[i];
|
||||
const char *text = rpn->text[i];
|
||||
|
||||
instruct ins = {0};
|
||||
|
||||
switch (t){
|
||||
case TOKEN_INTEGER: ins.op = OP_PUSH_INT; ins.num = atof(text); break;
|
||||
case TOKEN_FLOAT: ins.op = OP_PUSH_FLOAT; ins.num = atof(text); break;
|
||||
case TOKEN_STRING: ins.op = OP_PUSH_STRING; ins.strlit = strdup(text); break;
|
||||
case TOKEN_PLUS: ins.op = OP_ADD; break;
|
||||
case TOKEN_MINUS: ins.op = OP_SUB; break;
|
||||
case TOKEN_MUL: ins.op = OP_MUL; break;
|
||||
case TOKEN_DIV: ins.op = OP_DIV; break;
|
||||
|
||||
case TOKEN_IDENTIFIER:
|
||||
if (strcmp(text, "print") == 0) {
|
||||
ins.op = OP_PRINT;
|
||||
} else {
|
||||
printf("[WARNING] Uknown Identifier '%s'\n", text);
|
||||
}
|
||||
break; //TODO: unhardcode this
|
||||
case TOKEN_EOF: ins.op = OP_HALT; break;
|
||||
default: continue;
|
||||
}
|
||||
if (size >= cap){
|
||||
cap*=2;
|
||||
prog = realloc(prog, sizeof(instruct)*cap);
|
||||
}
|
||||
prog[size++] = ins;
|
||||
}
|
||||
*out = size;
|
||||
return prog;
|
||||
}
|
||||
|
||||
void vm_run(VM *vm) {
|
||||
vm->running = true;
|
||||
vm->inst_p = 0;
|
||||
vm->st_p = 0;
|
||||
|
||||
while (vm->running && vm->inst_p < vm->program_size) {
|
||||
instruct ins = vm->program[vm->inst_p++];
|
||||
|
||||
switch (ins.op) {
|
||||
case OP_PUSH_INT: {
|
||||
Value v = { .type = VAL_INT, .i = ins.num };
|
||||
vm->stack[vm->st_p++] = v;
|
||||
} break;
|
||||
|
||||
case OP_PUSH_FLOAT: {
|
||||
Value v = { .type = VAL_FLOAT, .f = ins.num };
|
||||
vm->stack[vm->st_p++] = v;
|
||||
} break;
|
||||
|
||||
case OP_PUSH_STRING: {
|
||||
Value v = { .type = VAL_STRING, .s = strdup(ins.strlit) };
|
||||
vm->stack[vm->st_p++] = v;
|
||||
} break;
|
||||
|
||||
case OP_ADD:
|
||||
case OP_SUB:
|
||||
case OP_MUL:
|
||||
case OP_DIV: {
|
||||
if (vm->st_p < 2) {
|
||||
fprintf(stderr, "not enough values on stack.\n");
|
||||
vm->running = false;
|
||||
break;
|
||||
}
|
||||
|
||||
Value b = vm->stack[--vm->st_p];
|
||||
Value a = vm->stack[--vm->st_p];
|
||||
|
||||
double av = (a.type == VAL_INT) ? a.i : a.f;
|
||||
double bv = (b.type == VAL_INT) ? b.i : b.f;
|
||||
double result = 0;
|
||||
|
||||
switch (ins.op) {
|
||||
case OP_ADD: result = av + bv; break;
|
||||
case OP_SUB: result = av - bv; break;
|
||||
case OP_MUL: result = av * bv; break;
|
||||
case OP_DIV:
|
||||
if (bv == 0) {
|
||||
fprintf(stderr, "division by zero.\n");
|
||||
vm->running = false;
|
||||
} else result = av / bv;
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
|
||||
Value v = { .type = VAL_FLOAT, .f = result };
|
||||
vm->stack[vm->st_p++] = v;
|
||||
} break;
|
||||
|
||||
case OP_PRINT: {
|
||||
if (vm->st_p == 0) {
|
||||
fprintf(stderr, "cant print an empty stack\n");
|
||||
vm->running = false;
|
||||
break;
|
||||
}
|
||||
|
||||
Value v = vm->stack[--vm->st_p];
|
||||
switch (v.type) {
|
||||
case VAL_INT: printf("%ld\n", v.i); break;
|
||||
case VAL_FLOAT: printf("%g\n", v.f); break;
|
||||
case VAL_STRING:
|
||||
printf("%s\n", v.s);
|
||||
free(v.s);
|
||||
break;
|
||||
}
|
||||
} break;
|
||||
|
||||
case OP_HALT:
|
||||
vm->running = false;
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(stderr, "unknown opcode %d\n", ins.op);
|
||||
vm->running = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "Usage: %s <source file>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char* read = nb_read_file(argv[1]);
|
||||
//printf("INPUT: %s\n", read);
|
||||
|
||||
Token tk = tokenize_all(read);
|
||||
SymbolTable table = {0};
|
||||
symbol_table_init(&table, 32);
|
||||
|
||||
Token rpn = build_rpn(&tk, &table);
|
||||
//print_token(&rpn);
|
||||
|
||||
size_t prog_size = 0;
|
||||
instruct *prog = rpn_to_bytecode(&rpn, &prog_size);
|
||||
VM vm = {
|
||||
.program = prog,
|
||||
.program_size = prog_size,
|
||||
.inst_p = 0,
|
||||
.st_p = 0,
|
||||
.running = true,
|
||||
};
|
||||
|
||||
vm_run(&vm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user