From 449d50ed49a48634e7c024dc912c58939178efd3 Mon Sep 17 00:00:00 2001 From: Zander Thannhauser Date: Sun, 17 Aug 2025 22:16:10 -0500 Subject: [PATCH] added testing and robustness tests --- main.c | 3102 +++++++++++++++++++++++++++++++++++++++--------------- makefile | 4 +- 2 files changed, 2282 insertions(+), 824 deletions(-) diff --git a/main.c b/main.c index 014d282..9fc118e 100644 --- a/main.c +++ b/main.c @@ -19,6 +19,7 @@ #include "avl.h" #define TODO assert(!"TODO"); +#define CHECK assert(!"CHECK"); #define argv0 program_invocation_name @@ -64,15 +65,11 @@ struct string* new_string(const char* data_ro) return this; } -struct string* new_string_from_fmt(const char* fmt, ...) +struct string* new_string_from_fmt_va(const char* fmt, va_list va) { struct string* this = malloc(sizeof(*this)); - va_list ap; - - va_start(ap, fmt); - - int x = vasprintf(&this->data, fmt, ap); + int x = vasprintf(&this->data, fmt, va); if (x < 0) { @@ -81,6 +78,17 @@ struct string* new_string_from_fmt(const char* fmt, ...) this->refcount = 1; + return this; +} + +struct string* new_string_from_fmt(const char* fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + + struct string* this = new_string_from_fmt_va(fmt, ap); + va_end(ap); return this; @@ -94,6 +102,18 @@ struct string* inc_string(struct string* this) return this; } +int compare_strings(const struct string* a, const struct string* b) +{ + return strcmp(a->data, b->data); +} + +int compare_strings_vv(const void* a, const void* b) +{ + return compare_strings( + (const struct string*) a, + (const struct string*) b); +} + void free_string(struct string* this) { if (this && !--this->refcount) @@ -103,6 +123,103 @@ void free_string(struct string* this) } } +void free_string_v(void* ptr) +{ + free_string((struct string*) ptr); +} + +struct stringset +{ + struct avl_tree_t* tree; + unsigned refcount; +}; + +struct stringset* new_stringset(void) +{ + struct stringset* this = malloc(sizeof(*this)); + + this->tree = avl_alloc_tree(compare_strings_vv, free_string_v); + + this->refcount = 1; + + return this; +} + +bool stringset_add(struct stringset* this, struct string* new) +{ + bool is_new; + + struct avl_node_t* node = avl_insert(this->tree, new); + + if (node) + { + inc_string(new); + + is_new = true; + } + else + { + TODO; + } + + return is_new; +} + +bool stringset_update(struct stringset* this, const struct stringset* updateme) +{ + bool is_new = false; + + for (struct avl_node_t* i = updateme->tree->head; i; i = i->next) + { + struct avl_node_t* node = avl_insert(this->tree, i->item); + + if (node) + { + inc_string(i->item); + + is_new = true; + } + else if (errno == EEXIST) + { + ; + } + else + { + TODO; + } + } + + return is_new; +} + +struct stringset* inc_stringset(struct stringset* this) +{ + if (this) + this->refcount++; + + return this; +} + +void stringset_println(const struct stringset* this) +{ + for (struct avl_node_t* i = this->tree->head; i; i = i->next) + { + const struct string* str = i->item; + + printf("%s\n", str->data); + } +} + +void free_stringset(struct stringset* this) +{ + if (this && !--this->refcount) + { + avl_free_tree(this->tree); + + free(this); + } +} + struct value; struct value* inc_value(struct value* this); @@ -125,6 +242,13 @@ struct expression ek_subtract, ek_multiply, ek_divide, + ek_integer_divide, + ek_remainder, + ek_exponent, + + // bitwise operator: + ek_leftbitshift, + ek_rightbitshift, // comparision operators: ek_greater_than, @@ -148,6 +272,8 @@ struct expression ek_comma, } kind; + unsigned column; + struct string* string; struct expression* center; @@ -162,15 +288,18 @@ struct expression struct expression* inc_expression(struct expression* this); struct expression* new_syntax_error_expression( - struct string* error_message, - struct expression* subexpression) + unsigned column, + struct string* error_message) { struct expression* this = malloc(sizeof(*this)); this->kind = ek_syntax_error; + + this->column = column; this->string = inc_string(error_message); - this->center = inc_expression(subexpression); - this->left = this->right = NULL; + this->center = NULL; + this->left = NULL; + this->right = NULL; this->value = NULL; this->refcount = 1; @@ -280,6 +409,25 @@ struct expression* new_binary_expression( return this; } +struct expression* new_ternary_expression( + struct expression* center, + struct expression* left, + struct expression* right) +{ + struct expression* this = malloc(sizeof(*this)); + + this->kind = ek_ternary; + this->string = NULL; + this->center = inc_expression(center); + this->left = inc_expression(left); + this->right = inc_expression(right); + this->value = NULL; + + this->refcount = 1; + + return this; +} + struct expression* inc_expression(struct expression* this) { if (this) @@ -304,16 +452,89 @@ void free_expression(struct expression* this) } } +enum value_kind +{ + vk_number, + vk_error, +}; + struct value { + enum value_kind kind; + + struct stringset* errors; + mpq_t mpq; + unsigned refcount; }; +struct value* new_error_value(struct stringset* errors) +{ + struct value* this = malloc(sizeof(*this)); + + this->kind = vk_error; + + this->errors = inc_stringset(errors); + + mpq_init(this->mpq); + + this->refcount = 1; + + return this; +} + +struct value* new_error_value_from_fmt(const char* fmt, ...) +{ + va_list va; + + va_start(va, fmt); + + struct string* message = new_string_from_fmt_va(fmt, va); + + struct stringset* set = new_stringset(); + + stringset_add(set, message); + + struct value* this = new_error_value(set); + + free_stringset(set); + + free_string(message); + + va_end(va); + + return this; +} + +struct value* new_error_value_from_union( + const struct value* left, + const struct value* right) +{ + assert(left->kind == vk_error); + assert(right->kind == vk_error); + + struct stringset* set = new_stringset(); + + stringset_update(set, left->errors); + + stringset_update(set, right->errors); + + struct value* retval = new_error_value(set); + + free_stringset(set); + + return retval; +} + struct value* new_value_from_int(int x) { struct value* this = malloc(sizeof(*this)); + this->kind = vk_number; + + this->errors = NULL; + mpq_init(this->mpq); mpq_set_si(this->mpq, x, 1); @@ -327,6 +548,10 @@ struct value* new_value(mpq_t mpq) { struct value* this = malloc(sizeof(*this)); + this->kind = vk_number; + + this->errors = NULL; + mpq_init(this->mpq); mpq_set(this->mpq, mpq); @@ -350,6 +575,8 @@ void free_value(struct value* this) { mpq_clear(this->mpq); + free_stringset(this->errors); + free(this); } } @@ -358,19 +585,48 @@ struct value* scan(const char* text) { const char* moving = text; + const char* digits = "0123456789"; + + unsigned ibase = 10; + + if (text[0] == '0' && text[1] == 'b') + { + digits = "01"; + ibase = 2; + moving += 2; + } + else if (text[0] == '0' && text[1] == 'x') + { + digits = "0123456789ABCDEF"; + ibase = 16; + moving += 2; + } + mpq_t base; mpq_init(base); mpq_t tmp; mpq_init(tmp); mpq_t value; mpq_init(value); mpq_set_si(value, 0, 1); - mpq_set_si(base, 10, 1); + mpq_set_si(base, ibase, 1); - while ('0' <= *moving && *moving <= '9') + while ('_' == *moving || (*moving && index(digits, toupper(*moving)))) { - mpq_set_si(tmp, *moving++ - '0', 1); + if ('_' == *moving) + { + moving++; + } + else + { + mpq_set_si(tmp, index(digits, toupper(*moving++)) - digits, 1); - mpq_mul(value, value, base); - mpq_add(value, value, tmp); + mpq_mul(value, value, base); + mpq_add(value, value, tmp); + } + } + + while ('_' == *moving) + { + moving++; } if (*moving == '.') @@ -379,23 +635,35 @@ struct value* scan(const char* text) moving++; - mpq_set_si(factor, 1, 10); + mpq_set_si(factor, 1, ibase); - while ('0' <= *moving && *moving <= '9') + while ('_' == *moving || (*moving && index(digits, toupper(*moving)))) { - mpq_set_si(tmp, *moving++ - '0', 1); + if ('_' == *moving) + { + moving++; + } + else + { + mpq_set_si(tmp, index(digits, toupper(*moving++)) - digits, 1); - mpq_mul(tmp, factor, tmp); + mpq_mul(tmp, factor, tmp); - mpq_add(value, value, tmp); + mpq_add(value, value, tmp); - mpq_div(factor, factor, base); + mpq_div(factor, factor, base); + } } mpq_clear(factor); } - assert(!*moving); + while ('_' == *moving) + { + moving++; + } + + assert(*moving == '\0'); struct value* retval = new_value(value); @@ -422,6 +690,7 @@ struct variable* new_variable(struct string* name) struct variable* this = malloc(sizeof(*this)); this->name = inc_string(name); + this->value = NULL; return this; @@ -477,773 +746,1378 @@ void free_scope(struct scope* this) free(this); } -struct expression* parse(const char* text) +enum token { - enum { - tk_uninitialized, + t_uninitialized, - // primary: - tk_identifier, - tk_literal, + t_error, - // brakets: - tk_oparen, - tk_cparen, + // primary: + t_identifier, + t_literal, + t_binary, + t_hexadecimal, - // arithmetic operators: - tk_plus, - tk_minus, - tk_asterisk, - tk_slash, + // brakets: + t_oparen, + t_cparen, - // comparision operators: - tk_less_than, - tk_less_than_eq, + // arithmetic operators: + t_plus, + t_minus, + t_asterisk, + t_slash, + t_percent, + t_slash_slash, + t_asterisk_asterisk, - tk_greater_than, - tk_greater_than_eq, + // comparision operators: + t_lessthan, + t_lessthan_equals, + t_lessthan_lessthan, - tk_emarkequals, - tk_equalsequals, + t_greaterthan, + t_greaterthan_equals, + t_greaterthan_greaterthan, - // logical operators: - tk_vbarvbar, - tk_ampersandampersand, + t_emark_equals, + t_equals_equals, - // misc: - tk_emark, - tk_qmark, - tk_colon, - tk_equals, - tk_comma, + // logical operators: + t_vbar_vbar, + t_ampersand_ampersand, - tk_EOF, + // misc: + t_emark, + t_qmark, + t_colon, + t_equals, + t_comma, - number_of_tokens, - } tokenkind = tk_uninitialized; + t_EOF, - static const char* const tokennames[number_of_tokens] = { - [tk_EOF] = "EOF", + number_of_tokens, +}; - [tk_identifier] = "identifier", - [tk_literal] = "literal", - [tk_oparen] = "(", - [tk_cparen] = ")", - [tk_plus] = "+", - [tk_minus] = "-", - [tk_asterisk] = "*", - [tk_slash] = "/", - [tk_less_than] = "<", - [tk_less_than_eq] = "<=", - [tk_greater_than] = ">", - [tk_greater_than_eq] = ">=", - [tk_emarkequals] = "!=", - [tk_equalsequals] = "==", - [tk_vbarvbar] = "||", - [tk_ampersandampersand] = "&&", - [tk_emark] = "!", - [tk_qmark] = "?", - [tk_colon] = ":", - [tk_equals] = "=", - [tk_comma] = ",", - }; +static const char* const tokennames[number_of_tokens] = { + [t_EOF] = "EOF", - struct { + [t_identifier] = "identifier", + [t_literal] = "literal", + [t_binary] = "binary-literal", + [t_hexadecimal] = "hexadecimal-literal", + [t_oparen] = "open parentheses", + [t_cparen] = "close parentheses", + [t_plus] = "plus", + [t_minus] = "minus", + [t_asterisk] = "asterisk", + [t_asterisk_asterisk] = "double-asterisk", + [t_percent] = "percent", + [t_slash] = "slash", + [t_slash_slash] = "double-slash", + [t_lessthan] = "less-than", + [t_lessthan_equals] = "less-than-or-equal-to", + [t_greaterthan] = "greater-than", + [t_greaterthan_equals] = "greater-than-or-equal-to", + [t_emark_equals] = "not-equal-to", + [t_equals_equals] = "equal-to", + [t_vbar_vbar] = "logical-or", + [t_ampersand_ampersand] = "logical-and", + [t_emark] = "exclamation mark", + [t_qmark] = "question mark", + [t_colon] = "colon", + [t_equals] = "assignment", + [t_comma] = "comma", +}; + +struct tokenizer +{ + const char* text; + + unsigned start_index; + unsigned index; + + enum token token; + + struct buffer { char* data; size_t n, cap; - } buffer = {}; + } buffer; +}; - void append(char c) +struct tokenizer* new_tokenizer(const char* text) +{ + struct tokenizer* this = malloc(sizeof(*this)); + + this->text = text; + + this->start_index = 0; + this->index = 0; + + this->token = t_uninitialized; + + this->buffer.data = NULL; + this->buffer.n = 0; + this->buffer.cap = 0; + + return this; +} + +void tokenizer_append_char( + struct tokenizer* this, + char code) +{ + if (this->buffer.n == this->buffer.cap) { - if (buffer.n == buffer.cap) - { - buffer.cap = buffer.cap << 1 ?: 1; - buffer.data = realloc(buffer.data, sizeof(*buffer.data) * buffer.cap); - } + this->buffer.cap = this->buffer.cap << 1 ?: 1; - buffer.data[buffer.n++] = c; + this->buffer.data = realloc( + this->buffer.data, + sizeof(*this->buffer.data) * this->buffer.cap); } - const char* moving = text; + this->buffer.data[this->buffer.n++] = code; +} - void next_token(void) +enum tokenizer_state { + ts_error, + + ts_EOF, + + ts_literal, + + ts_identifier, + + ts_equals, + ts_comma, + ts_slash, + ts_plus, + ts_minus, + ts_qmark, + ts_emark, + ts_colon, + ts_percent, + ts_asterisk, + ts_asterisk_asterisk, + ts_lessthan, + ts_lessthan_equals, + ts_lessthan_lessthan, + ts_greaterthan, + ts_greaterthan_equals, + ts_greaterthan_greaterthan, + ts_equals_equals, + ts_slash_slash, + + ts_oparen, + ts_cparen, + + ts_vbar_vbar, + + ts_start, + + ts_reading_literal_prefix, + ts_reading_literal, + ts_reading_literal2, + ts_reading_literal_binary, + ts_reading_literal_binary2, + ts_reading_literal_hexadecimal, + ts_reading_literal_hexadecimal2, + + ts_reading_identifier, + + ts_reading_equals, + ts_reading_equals_equals, + ts_reading_slash, + ts_reading_slash_slash, + ts_reading_comma, + ts_reading_plus, + ts_reading_minus, + ts_reading_qmark, + ts_reading_emark, + ts_reading_colon, + ts_reading_asterisk, + ts_reading_asterisk_asterisk, + ts_reading_percent, + ts_reading_lessthan, + ts_reading_lessthan_equals, + ts_reading_lessthan_lessthan, + ts_reading_greaterthan, + ts_reading_greaterthan_equals, + ts_reading_greaterthan_greaterthan, + + ts_reading_oparen, + ts_reading_cparen, + + ts_reading_vbar, + ts_reading_vbar_vbar, + + number_of_tokenizer_states, +} tokenizer_lookup[number_of_tokenizer_states][128] = { + #define ANY 0 ... 127 + + // skip whitespace: + [ts_start][' '] = ts_start, + + // EOF token: + [ts_start][0] = ts_EOF, + + // binary: "0b" ['_', '0', '1']+ ('.' ['_', '0', '1']*)? + // hexadecimal: "0x" [_0-9a-fA-F]+ (.[_0-9a-fA-F]*)? + // literal: [0-9]+ [_0-9]* (. [_0-9]*)? + [ts_start]['0'] = ts_reading_literal_prefix, + [ts_reading_literal_prefix][ANY] = ts_literal, + [ts_reading_literal_prefix]['b'] = ts_reading_literal_binary, + [ts_reading_literal_binary][ ANY ] = ts_literal, + [ts_reading_literal_binary]['0' ... '1'] = ts_reading_literal_binary, + [ts_reading_literal_binary][ '_' ] = ts_reading_literal_binary, + [ts_reading_literal_binary][ '.' ] = ts_reading_literal_binary2, + [ts_reading_literal_binary2][ ANY ] = ts_literal, + [ts_reading_literal_binary2][ '_' ] = ts_reading_literal_binary2, + [ts_reading_literal_binary2]['0' ... '1'] = ts_reading_literal_binary2, + [ts_reading_literal_prefix]['x'] = ts_reading_literal_hexadecimal, + [ts_reading_literal_hexadecimal][ ANY ] = ts_literal, + [ts_reading_literal_hexadecimal]['0' ... '9'] = ts_reading_literal_hexadecimal, + [ts_reading_literal_hexadecimal]['a' ... 'f'] = ts_reading_literal_hexadecimal, + [ts_reading_literal_hexadecimal]['A' ... 'F'] = ts_reading_literal_hexadecimal, + [ts_reading_literal_hexadecimal][ '_' ] = ts_reading_literal_hexadecimal, + [ts_reading_literal_hexadecimal][ '.' ] = ts_reading_literal_hexadecimal2, + [ts_reading_literal_hexadecimal2][ ANY ] = ts_literal, + [ts_reading_literal_hexadecimal2][ '_' ] = ts_reading_literal_hexadecimal2, + [ts_reading_literal_hexadecimal2]['0' ... '9'] = ts_reading_literal_hexadecimal2, + [ts_reading_literal_hexadecimal2]['a' ... 'f'] = ts_reading_literal_hexadecimal2, + [ts_reading_literal_hexadecimal2]['A' ... 'F'] = ts_reading_literal_hexadecimal2, + [ts_reading_literal_prefix]['_'] = ts_reading_literal, + [ts_reading_literal_prefix]['.'] = ts_reading_literal2, + [ts_reading_literal_prefix]['0' ... '9'] = ts_reading_literal, + [ts_start]['1' ... '9'] = ts_reading_literal, + [ts_reading_literal][ ANY ] = ts_literal, + [ts_reading_literal][ '_' ] = ts_reading_literal, + [ts_reading_literal]['0' ... '9'] = ts_reading_literal, + [ts_reading_literal]['a' ... 'z'] = ts_error, + [ts_reading_literal][ '.' ] = ts_reading_literal2, + [ts_reading_literal2][ ANY ] = ts_literal, + [ts_reading_literal2][ '_' ] = ts_reading_literal2, + [ts_reading_literal2]['0' ... '9'] = ts_reading_literal2, + + // identifiers: + [ts_start][ '_' ] = ts_reading_identifier, + [ts_start]['a' ... 'z'] = ts_reading_identifier, + [ts_reading_identifier][ ANY ] = ts_identifier, + [ts_reading_identifier][ '_' ] = ts_reading_identifier, + [ts_reading_identifier]['0' ... '9'] = ts_reading_identifier, + [ts_reading_identifier]['a' ... 'z'] = ts_reading_identifier, + + // symbols: + + [ts_start]['+'] = ts_reading_plus, [ts_reading_plus][ANY] = ts_plus, + + [ts_start]['-'] = ts_reading_minus, [ts_reading_minus][ANY] = ts_minus, + + [ts_start]['%'] = ts_reading_percent, [ts_reading_percent][ANY] = ts_percent, + + [ts_start]['?'] = ts_reading_qmark, [ts_reading_qmark][ANY] = ts_qmark, + + [ts_start]['!'] = ts_reading_emark, [ts_reading_emark][ANY] = ts_emark, + + [ts_start][':'] = ts_reading_colon, [ts_reading_colon][ANY] = ts_colon, + + [ts_start][','] = ts_reading_comma, [ts_reading_comma][ANY] = ts_comma, + + [ts_start]['('] = ts_reading_oparen, [ts_reading_oparen][ANY] = ts_oparen, + [ts_start][')'] = ts_reading_cparen, [ts_reading_cparen][ANY] = ts_cparen, + + // '*' or '**' + [ts_start]['*'] = ts_reading_asterisk, + [ts_reading_asterisk][ANY] = ts_asterisk, + [ts_reading_asterisk]['*'] = ts_reading_asterisk_asterisk, + [ts_reading_asterisk_asterisk][ANY] = ts_asterisk_asterisk, + + // '/' or '//': + [ts_start]['/'] = ts_reading_slash, + [ts_reading_slash][ANY] = ts_slash, + [ts_reading_slash]['/'] = ts_reading_slash_slash, + [ts_reading_slash_slash][ANY] = ts_slash_slash, + + // '=' or '==': + [ts_start]['='] = ts_reading_equals, + [ts_reading_equals][ANY] = ts_equals, + [ts_reading_equals]['='] = ts_reading_equals_equals, + [ts_reading_equals_equals][ANY] = ts_equals_equals, + + // '<' or '<=' or '<<': + [ts_start]['<'] = ts_reading_lessthan, + [ts_reading_lessthan][ANY] = ts_lessthan, + [ts_reading_lessthan]['='] = ts_reading_lessthan_equals, + [ts_reading_lessthan_equals][ANY] = ts_lessthan_equals, + [ts_reading_lessthan]['<'] = ts_reading_lessthan_lessthan, + [ts_reading_lessthan_lessthan][ANY] = ts_lessthan_lessthan, + + // '>' or '>=' or '>>': + [ts_start]['>'] = ts_reading_greaterthan, + [ts_reading_greaterthan][ANY] = ts_greaterthan, + [ts_reading_greaterthan]['='] = ts_reading_greaterthan_equals, + [ts_reading_greaterthan_equals][ANY] = ts_greaterthan_equals, + [ts_reading_greaterthan]['>'] = ts_reading_greaterthan_greaterthan, + [ts_reading_greaterthan_greaterthan][ANY] = ts_greaterthan_greaterthan, + + // '||': + [ts_start]['|'] = ts_reading_vbar, + [ts_reading_vbar]['|'] = ts_reading_vbar_vbar, + [ts_reading_vbar_vbar][ANY] = ts_vbar_vbar, +}; + +void tokenizer_next( + struct tokenizer* this) +{ + this->buffer.n = 0; + + this->start_index = this->index; + + enum tokenizer_state state = ts_start; + + while (state >= ts_start) { - while (*moving && *moving == ' ') - moving++; + assert(this->text[this->index] >= 0); - switch (*moving) + state = tokenizer_lookup[state][(uint8_t) this->text[this->index]]; + + if (state > ts_start) { - case 0: - tokenkind = tk_EOF; - break; + tokenizer_append_char(this, this->text[this->index]); - case '0' ... '9': + this->index++; + } + + if (state == ts_start) + { + this->buffer.n = 0; + + this->index++; + + this->start_index = this->index; + } + } + + // for sake of the error message: + if (state == ts_error) + { + tokenizer_append_char(this, this->text[this->index]); + } + + tokenizer_append_char(this, '\0'); + + switch (state) + { + case ts_error: + { + this->token = t_error; + break; + } + + case ts_EOF: + { + this->token = t_EOF; + break; + } + + case ts_literal: + { + this->token = t_literal; + break; + } + + case ts_identifier: + { + this->token = t_identifier; + break; + } + + case ts_equals: + { + this->token = t_equals; + break; + } + + case ts_comma: + { + this->token = t_comma; + break; + } + + case ts_plus: + { + this->token = t_plus; + break; + } + + case ts_qmark: + { + this->token = t_qmark; + break; + } + + case ts_minus: + { + this->token = t_minus; + break; + } + + case ts_slash: + { + this->token = t_slash; + break; + } + + case ts_equals_equals: + { + this->token = t_equals_equals; + break; + } + + case ts_emark: + { + this->token = t_emark; + break; + } + + case ts_oparen: + { + this->token = t_oparen; + break; + } + + case ts_cparen: + { + this->token = t_cparen; + break; + } + + case ts_percent: + { + this->token = t_percent; + break; + } + + case ts_lessthan: + { + this->token = t_lessthan; + break; + } + + case ts_lessthan_equals: + { + this->token = t_lessthan_equals; + break; + } + + case ts_colon: + { + this->token = t_colon; + break; + } + + case ts_asterisk: + { + this->token = t_asterisk; + break; + } + + case ts_vbar_vbar: + { + this->token = t_vbar_vbar; + break; + } + + case ts_slash_slash: + { + this->token = t_slash_slash; + break; + } + + case ts_lessthan_lessthan: + { + this->token = t_lessthan_lessthan; + break; + } + + case ts_greaterthan_greaterthan: + { + this->token = t_greaterthan_greaterthan; + break; + } + + case ts_asterisk_asterisk: + { + this->token = t_asterisk_asterisk; + break; + } + + default: + TODO; + break; + } +} + +void free_tokenizer( + struct tokenizer* this) +{ + free(this->buffer.data); + free(this); +} + +struct string* make_unexpected_token_message( + struct tokenizer* tokenizer) +{ + if (tokenizer->token == t_error) + { + return new_string_from_fmt( + "unexpected unknown token '%s'", + tokenizer->buffer.data); + } + else + { + assert(tokennames[tokenizer->token]); + + return new_string_from_fmt( + "unexpected %s token", + tokennames[tokenizer->token]); + } +} + +struct expression* parse_root(struct tokenizer* tokenizer); + +struct expression* parse_primary(struct tokenizer* tokenizer) +{ + struct expression* retval = NULL; + + switch (tokenizer->token) + { + case t_error: + { + struct string* message = new_string_from_fmt( + "unknown token '%s'", tokenizer->buffer.data); + + retval = new_syntax_error_expression( + /* column: */ tokenizer->start_index + 1, + /* message: */ message); + + tokenizer_next(tokenizer); + + free_string(message); + break; + } + + case t_literal: + { + struct value* value = scan(tokenizer->buffer.data); + + retval = new_literal_expression(value); + + tokenizer_next(tokenizer); + + free_value(value); + break; + } + + case t_hexadecimal: + { + TODO; + break; + } + + case t_identifier: + { + struct string* name = new_string(tokenizer->buffer.data); + + retval = new_variable_expression(name); + + tokenizer_next(tokenizer); + + free_string(name); + break; + } + + case t_oparen: + { + tokenizer_next(tokenizer); + + struct expression* sub = parse_root(tokenizer); + + if (sub->kind == ek_syntax_error) { - buffer.n = 0; + retval = inc_expression(sub); + } + else if (tokenizer->token == t_cparen) + { + tokenizer_next(tokenizer); - while (false - || *moving == '_' - || *moving == '.' - || ('0' <= *moving && *moving <= '9')) - { - append(*moving++); - } - - append(0); - - tokenkind = tk_literal; - break; + retval = inc_expression(sub); + } + else + { + TODO; } - case '_': - case 'a' ... 'z': - case 'A' ... 'Z': + free_expression(sub); + break; + } + + default: + { + struct string* message = make_unexpected_token_message(tokenizer); + + retval = new_syntax_error_expression( + /* column: */ tokenizer->start_index + 1, + /* message: */ message); + + tokenizer_next(tokenizer); + + free_string(message); + break; + } + } + + assert(retval); + + return retval; +} + +struct expression* parse_prefix(struct tokenizer* tokenizer) +{ + struct expression* retval = NULL; + + switch (tokenizer->token) + { + case t_emark: + { + tokenizer_next(tokenizer); + + struct expression* sub = parse_prefix(tokenizer); + + if (sub->kind == ek_syntax_error) { - buffer.n = 0; + retval = sub; + } + else + { + TODO; + #if 0 + struct expression* retval = + new_logical_not_expression(sub); - while (false - || *moving == '_' - || ('a' <= *moving && *moving <= 'z') - || ('A' <= *moving && *moving <= 'Z')) - { - append(*moving++); - } + free_expression(sub); - append(0); - - tokenkind = tk_identifier; - break; + return retval; + #endif } - case '+': tokenkind = tk_plus, moving++; break; - case '-': tokenkind = tk_minus, moving++; break; - case '*': tokenkind = tk_asterisk, moving++; break; - case '/': tokenkind = tk_slash, moving++; break; + break; + } - case '(': tokenkind = tk_oparen, moving++; break; - case ')': tokenkind = tk_cparen, moving++; break; + case t_plus: + { + tokenizer_next(tokenizer); - case '?': tokenkind = tk_qmark, moving++; break; - case ':': tokenkind = tk_colon, moving++; break; + struct expression* sub = parse_prefix(tokenizer); - case ',': tokenkind = tk_comma, moving++; break; - - case '|': + if (sub->kind == ek_syntax_error) { - moving++; + retval = sub; + } + else + { + TODO; + #if 0 + retval = new_positive_expression(sub); - switch (*moving) + free_expression(sub); + #endif + } + + break; + } + + case t_minus: + { + tokenizer_next(tokenizer); + + struct expression* sub = parse_prefix(tokenizer); + + if (sub->kind == ek_syntax_error) + { + retval = inc_expression(sub); + } + else + { + retval = new_negative_expression(sub); + } + + free_expression(sub); + + break; + } + + default: + { + retval = parse_primary(tokenizer); + + break; + } + } + + assert(retval); + + return retval; +} + +struct expression* parse_exponent(struct tokenizer* tokenizer) +{ + struct expression* left = parse_prefix(tokenizer); + + while (true + && left->kind != ek_syntax_error + && (tokenizer->token == t_asterisk_asterisk)) + { + tokenizer_next(tokenizer); + + struct expression* right = parse_prefix(tokenizer); + + if (right->kind == ek_syntax_error) + { + free_expression(left); + + left = inc_expression(right); + } + else + { + struct expression* retval = + new_binary_expression(ek_exponent, left, right); + + left = retval; + } + + free_expression(right); + } + + return left; +} + +struct expression* parse_multiplicative(struct tokenizer* tokenizer) +{ + struct expression* left = parse_exponent(tokenizer); + + while (true + && left->kind != ek_syntax_error + && (false + || tokenizer->token == t_asterisk + || tokenizer->token == t_slash + || tokenizer->token == t_slash_slash + || tokenizer->token == t_percent)) + { + switch (tokenizer->token) + { + case t_asterisk: + { + tokenizer_next(tokenizer); + + struct expression* right = parse_exponent(tokenizer); + + if (right->kind == ek_syntax_error) { - case '|': - tokenkind = tk_vbarvbar, moving++; - break; + free_expression(left); - default: - { - TODO; - break; - } + left = inc_expression(right); } + else + { + struct expression* retval = + new_binary_expression(ek_multiply, left, right); + + left = retval; + } + + free_expression(right); break; } - case '&': + case t_slash: { - moving++; + tokenizer_next(tokenizer); - switch (*moving) + struct expression* right = parse_exponent(tokenizer); + + if (right->kind == ek_syntax_error) { - case '&': - tokenkind = tk_ampersandampersand, moving++; - break; + free_expression(left); - default: - { - TODO; - break; - } + left = inc_expression(right); } + else + { + struct expression* retval = + new_binary_expression(ek_divide, left, right); + + free_expression(left); + + left = retval; + } + + free_expression(right); break; } - case '!': + case t_slash_slash: { - moving++; + tokenizer_next(tokenizer); - switch (*moving) + struct expression* right = parse_exponent(tokenizer); + + if (right->kind == ek_syntax_error) { - case '=': - tokenkind = tk_emarkequals, moving++; - break; + free_expression(left); - default: - tokenkind = tk_emark; - break; + left = inc_expression(right); } + else + { + struct expression* retval = + new_binary_expression(ek_integer_divide, left, right); + + free_expression(left); + + left = retval; + } + + free_expression(right); break; } - case '<': + case t_percent: { - moving++; + tokenizer_next(tokenizer); - switch (*moving) + struct expression* right = parse_exponent(tokenizer); + + if (right->kind == ek_syntax_error) { - case '=': - tokenkind = tk_less_than_eq, moving++; - break; + free_expression(left); - default: - { - tokenkind = tk_less_than; - break; - } + left = inc_expression(right); + } + else + { + struct expression* retval = + new_binary_expression(ek_remainder, left, right); + + free_expression(left); + + left = retval; } - break; - } - - case '>': - { - moving++; - - switch (*moving) - { - case '=': - tokenkind = tk_greater_than_eq, moving++; - break; - - default: - { - tokenkind = tk_greater_than; - break; - } - } - - break; - } - - case '=': - { - moving++; - - switch (*moving) - { - case '=': - tokenkind = tk_equalsequals, moving++; - break; - - default: - tokenkind = tk_equals, moving++; - break; - } + free_expression(right); break; } default: - assert(!"TODO"); break; } } - next_token(); + return left; +} - struct expression* parse_root(void) +struct expression* parse_additive(struct tokenizer* tokenizer) +{ + struct expression* left = parse_multiplicative(tokenizer); + + while (true + && left->kind != ek_syntax_error + && (false + || tokenizer->token == t_plus + || tokenizer->token == t_minus)) { - struct expression* parse_comma(void) + switch (tokenizer->token) { - struct expression* parse_assign(void) + case t_plus: { - struct expression* parse_ternary(void) + tokenizer_next(tokenizer); + + struct expression* right = parse_multiplicative(tokenizer); + + if (right->kind == ek_syntax_error) { - struct expression* parse_logicals(void) - { - struct expression* parse_equality(void) - { - struct expression* parse_comparision(void) - { - struct expression* parse_additive(void) - { - struct expression* parse_multiplicative(void) - { - struct expression* parse_prefix(void) - { - struct expression* parse_primary(void) - { - struct expression* retval; - - switch (tokenkind) - { - case tk_identifier: - { - struct string* name = - new_string(buffer.data); - - retval = new_variable_expression(name); - next_token(); - - free_string(name); - break; - } - - case tk_literal: - { - struct value* value = - scan(buffer.data); - - retval = new_literal_expression(value); - - next_token(); - - free_value(value); - break; - } - - case tk_oparen: - { - next_token(); - - retval = parse_root(); - - if (tokenkind != tk_cparen) - { - struct string* message = - new_string_from_fmt( - "unexpected '%s'", - tokennames[tokenkind]); - - retval = new_syntax_error_expression(message, retval); - - free_string(message); - } - - next_token(); - break; - } - - default: - { - struct string* message = - new_string_from_fmt( - "unexpected '%s'", - tokennames[tokenkind]); - - retval = new_syntax_error_expression(message, NULL); - - free_string(message); - break; - } - } - - return retval; - } - - switch (tokenkind) - { - case tk_emark: - { - next_token(); - - struct expression* sub = - parse_prefix(); - - struct expression* retval = - new_logical_not_expression(sub); - - free_expression(sub); - - return retval; - } - - case tk_plus: - { - next_token(); - - struct expression* sub = - parse_prefix(); - - struct expression* retval = - new_positive_expression(sub); - - free_expression(sub); - - return retval; - } - - case tk_minus: - { - next_token(); - - struct expression* sub = - parse_prefix(); - - struct expression* retval = - new_negative_expression(sub); - - free_expression(sub); - - return retval; - } - - default: - return parse_primary(); - } - } - - struct expression* left = parse_prefix(); - - again: switch (tokenkind) - { - case tk_asterisk: - { - next_token(); - - struct expression* right = - parse_prefix(); - - struct expression* retval = - new_binary_expression(ek_multiply, left, right); - - free_expression(left); - free_expression(right); - - left = retval; - - goto again; - } - - case tk_slash: - { - next_token(); - - struct expression* right = - parse_prefix(); - - struct expression* retval = - new_binary_expression(ek_divide, left, right); - - free_expression(left); - free_expression(right); - - left = retval; - - goto again; - } - - default: - break; - } - - return left; - } - - struct expression* left = parse_multiplicative(); - - again: switch (tokenkind) - { - case tk_plus: - { - next_token(); - - struct expression* right = - parse_multiplicative(); - - struct expression* retval = - new_binary_expression(ek_add, left, right); - - free_expression(left); - free_expression(right); - - left = retval; - - goto again; - } - - case tk_minus: - { - next_token(); - - struct expression* right = - parse_multiplicative(); - - struct expression* retval = - new_binary_expression(ek_subtract, left, right); - - free_expression(left); - free_expression(right); - - left = retval; - - goto again; - } - - default: - break; - } - - return left; - } - - struct expression* left = parse_additive(); - - again: switch (tokenkind) - { - case tk_less_than: - { - next_token(); - - struct expression* right = - parse_additive(); - - struct expression* retval = - new_binary_expression(ek_less_than, left, right); - - free_expression(left); - free_expression(right); - - left = retval; - - goto again; - } - - case tk_less_than_eq: - { - next_token(); - - struct expression* right = - parse_additive(); - - struct expression* retval = - new_binary_expression(ek_less_than_equal_to, left, right); - - free_expression(left); - free_expression(right); - - left = retval; - - goto again; - } - - case tk_greater_than: - { - next_token(); - - struct expression* right = - parse_additive(); - - struct expression* retval = - new_binary_expression(ek_greater_than, left, right); - - free_expression(left); - free_expression(right); - - left = retval; - - goto again; - } - - case tk_greater_than_eq: - { - next_token(); - - struct expression* right = - parse_additive(); - - struct expression* retval = - new_binary_expression(ek_greater_than_equal_to, left, right); - - free_expression(left); - free_expression(right); - - left = retval; - - goto again; - } - - default: - break; - } - - return left; - } - - struct expression* left = parse_comparision(); - - again: switch (tokenkind) - { - case tk_equalsequals: - { - next_token(); - - struct expression* right = - parse_comparision(); - - struct expression* retval = - new_binary_expression(ek_equal_to, left, right); - - free_expression(left); - free_expression(right); - - left = retval; - - goto again; - } - - case tk_emarkequals: - { - next_token(); - - struct expression* right = - parse_comparision(); - - struct expression* retval = - new_binary_expression(ek_not_equal_to, left, right); - - free_expression(left); - free_expression(right); - - left = retval; - - goto again; - } - - default: - break; - } - - return left; - } - - struct expression* left = parse_equality(); - - again: switch (tokenkind) - { - case tk_vbarvbar: - { - next_token(); - - struct expression* right = - parse_equality(); - - struct expression* retval = - new_binary_expression(ek_logical_or, left, right); - - free_expression(left); - free_expression(right); - - left = retval; - - goto again; - } - - case tk_ampersandampersand: - { - next_token(); - - struct expression* right = - parse_equality(); - - struct expression* retval = - new_binary_expression(ek_logical_and, left, right); - - free_expression(left); - free_expression(right); - - left = retval; - - goto again; - } - - default: - break; - } - - return left; - } - - struct expression* center = parse_logicals(); - - if (tokenkind == tk_qmark) - { - next_token(); - - struct expression* left = parse_logicals(); - - next_token(); - - struct expression* right = parse_logicals(); - - TODO; - } - else - { - return center; - } - } - - struct expression* left = parse_ternary(); - - if (tokenkind == tk_equals) - { - next_token(); - - struct expression* right = parse_assign(); - - struct expression* retval = new_binary_expression(ek_assign, left, right); - free_expression(left); - free_expression(right); - return retval; + left = right; } else { - return left; - } - } - - struct expression* left = parse_assign(); - - again: switch (tokenkind) - { - case tk_comma: - { - next_token(); - - struct expression* right = parse_assign(); - - struct expression* retval = new_binary_expression(ek_comma, left, right); + struct expression* retval = + new_binary_expression(ek_add, left, right); free_expression(left); free_expression(right); left = retval; - - goto again; } - default: - break; + break; } - return left; - } + case t_minus: + { + tokenizer_next(tokenizer); - return parse_comma(); + struct expression* right = parse_multiplicative(tokenizer); + + if (right->kind == ek_syntax_error) + { + TODO; + } + else + { + struct expression* retval = + new_binary_expression(ek_subtract, left, right); + + free_expression(left); + + left = retval; + } + + free_expression(right); + break; + } + + default: + TODO; + break; + } } - struct expression* root = parse_root(); + return left; +} - if (tokenkind != tk_EOF) +struct expression* parse_shift(struct tokenizer* tokenizer) +{ + struct expression* left = parse_additive(tokenizer); + + while (true + && left->kind != ek_syntax_error + && (false + || tokenizer->token == t_lessthan_lessthan + || tokenizer->token == t_greaterthan_greaterthan)) { - struct string* message = new_string_from_fmt( - "expected EOF, unexpected '%s'", tokennames[tokenkind]); + switch (tokenizer->token) + { + case t_lessthan_lessthan: + { + tokenizer_next(tokenizer); - struct expression* error = new_syntax_error_expression(message, root); + struct expression* right = parse_additive(tokenizer); + + if (right->kind == ek_syntax_error) + { + free_expression(left); + + left = right; + } + else + { + struct expression* retval = + new_binary_expression(ek_leftbitshift, left, right); + + free_expression(left); + free_expression(right); + + left = retval; + } + + break; + } + + case t_greaterthan_greaterthan: + { + tokenizer_next(tokenizer); + + struct expression* right = parse_additive(tokenizer); + + if (right->kind == ek_syntax_error) + { + TODO; + } + else + { + struct expression* retval = + new_binary_expression(ek_rightbitshift, left, right); + + free_expression(left); + + left = retval; + } + + free_expression(right); + + break; + } + + default: + TODO; + break; + } + } + + return left; +} + +struct expression* parse_comparision(struct tokenizer* tokenizer) +{ + struct expression* left = parse_shift(tokenizer); + + while (true + && left->kind != ek_syntax_error + && (false + || tokenizer->token == t_lessthan + || tokenizer->token == t_lessthan_equals + || tokenizer->token == t_greaterthan + || tokenizer->token == t_greaterthan_equals)) + { + switch (tokenizer->token) + { + case t_lessthan: + { + tokenizer_next(tokenizer); + + struct expression* right = parse_shift(tokenizer); + + if (right->kind == ek_syntax_error) + { + free_expression(left); + + left = inc_expression(right); + } + else + { + struct expression* retval = + new_binary_expression(ek_less_than, left, right); + + free_expression(left); + + left = retval; + } + + free_expression(right); + + break; + } + + case t_lessthan_equals: + { + tokenizer_next(tokenizer); + + struct expression* right = parse_additive(tokenizer); + + if (right->kind == ek_syntax_error) + { + free_expression(left); + + left = inc_expression(right); + } + else + { + struct expression* retval = + new_binary_expression(ek_less_than_equal_to, left, right); + + free_expression(left); + + left = retval; + } + + free_expression(right); + + break; + } + + case t_greaterthan: + { + TODO; + #if 0 + next_token(); + + struct expression* right = + parse_additive(); + + struct expression* retval = + new_binary_expression(ek_greater_than, left, right); + + free_expression(left); + free_expression(right); + + left = retval; + + goto again; + #endif + } + + case t_greaterthan_equals: + { + TODO; + #if 0 + next_token(); + + struct expression* right = + parse_additive(); + + struct expression* retval = + new_binary_expression(ek_greater_than_equal_to, left, right); + + free_expression(left); + free_expression(right); + + left = retval; + + goto again; + #endif + } + + default: + break; + } + } + + return left; +} + +struct expression* parse_equality(struct tokenizer* tokenizer) +{ + struct expression* left = parse_comparision(tokenizer); + + while (true + && left->kind != ek_syntax_error + && (false + || tokenizer->token == t_equals_equals + || tokenizer->token == t_emark_equals)) + { + switch (tokenizer->token) + { + case t_equals_equals: + { + tokenizer_next(tokenizer); + + struct expression* right = parse_comparision(tokenizer); + + if (right->kind == ek_syntax_error) + { + free_expression(left); + + left = inc_expression(right); + } + else + { + struct expression* retval = + new_binary_expression(ek_equal_to, left, right); + + free_expression(left); + + left = retval; + } + + free_expression(right); + + break; + } + + case t_emark_equals: + { + TODO; + #if 0 + next_token(); + + struct expression* right = + parse_comparision(); + + struct expression* retval = + new_binary_expression(ek_not_equal_to, left, right); + + free_expression(left); + free_expression(right); + + left = retval; + + goto again; + #endif + } + + default: + break; + } + } + + + return left; +} + +struct expression* parse_logical_and(struct tokenizer* tokenizer) +{ + struct expression* left = parse_equality(tokenizer); + + while (tokenizer->token == t_ampersand_ampersand) + { + TODO; + } + + return left; +} + +struct expression* parse_logical_or(struct tokenizer* tokenizer) +{ + struct expression* left = parse_logical_and(tokenizer); + + while (tokenizer->token == t_vbar_vbar) + { + tokenizer_next(tokenizer); + + struct expression* right = parse_comparision(tokenizer); + + struct expression* retval = + new_binary_expression(ek_logical_or, left, right); + + free_expression(left); + free_expression(right); + + left = retval; + } + + return left; +} + +struct expression* parse_ternary(struct tokenizer* tokenizer) +{ + struct expression* center = parse_logical_or(tokenizer); + + while (true + && center->kind != ek_syntax_error + && tokenizer->token == t_qmark) + { + tokenizer_next(tokenizer); + + struct expression* left = parse_root(tokenizer); + + if (left->kind == ek_syntax_error) + { + free_expression(center); + + center = inc_expression(left); + } + else if (tokenizer->token == t_colon) + { + tokenizer_next(tokenizer); + + struct expression* right = parse_logical_or(tokenizer); + + if (right->kind == ek_syntax_error) + { + free_expression(center); + + center = inc_expression(right); + } + else + { + struct expression* new = new_ternary_expression(center, left, right); + + free_expression(center); + + center = new; + } + + free_expression(right); + } + else + { + struct string* message = make_unexpected_token_message(tokenizer); + + struct expression* error = new_syntax_error_expression( + tokenizer->start_index + 1, message); + + free_expression(center); + + center = error; + + tokenizer_next(tokenizer); + + free_string(message); + } + + free_expression(left); + } + + return center; +} + +struct expression* parse_assign(struct tokenizer* tokenizer) +{ + struct expression* left = parse_ternary(tokenizer); + + if (tokenizer->token == t_equals) + { + tokenizer_next(tokenizer); + + struct expression* right = parse_assign(tokenizer); + + struct expression* retval = new_binary_expression(ek_assign, left, right); + + free_expression(left); + free_expression(right); + + return retval; + } + + return left; +} + +struct expression* parse_comma(struct tokenizer* tokenizer) +{ + struct expression* left = parse_assign(tokenizer); + + while (tokenizer->token == t_comma) + { + tokenizer_next(tokenizer); + + struct expression* right = parse_assign(tokenizer); + + struct expression* retval = new_binary_expression(ek_comma, left, right); + + free_expression(left); + free_expression(right); + + left = retval; + } + + return left; +} + +struct expression* parse_root(struct tokenizer* tokenizer) +{ + return parse_comma(tokenizer); +} + +struct expression* parse(const char* text) +{ + struct tokenizer* tokenizer = new_tokenizer(text); + + tokenizer_next(tokenizer); + + struct expression* root = parse_root(tokenizer); + + if (root->kind != ek_syntax_error && tokenizer->token != t_EOF) + { + struct string* message = make_unexpected_token_message(tokenizer); + + struct expression* error = new_syntax_error_expression( + tokenizer->index + 1, message); free_expression(root); @@ -1252,11 +2126,12 @@ struct expression* parse(const char* text) free_string(message); } - free(buffer.data); + free_tokenizer(tokenizer); return root; } +#if 0 struct value** evaluate_lvalue( struct expression* expression, struct scope* scope) @@ -1294,6 +2169,8 @@ struct value** evaluate_lvalue( case ek_ternary: TODO; break; + case ek_remainder: TODO; break; + case ek_assign: { TODO; @@ -1309,6 +2186,7 @@ struct value** evaluate_lvalue( return NULL; } +#endif struct value* evaluate( struct expression* expression, @@ -1330,7 +2208,9 @@ struct value* evaluate( } else { - retval = new_value_from_int(0); + retval = new_error_value_from_fmt( + "error: use of undefined variable '%s'!", + expression->string->data); } break; @@ -1347,17 +2227,42 @@ struct value* evaluate( struct value* left = evaluate(expression->left, scope); struct value* right = evaluate(expression->right, scope); - mpq_t q; + if (left->kind == vk_error && right->kind == vk_error) + { + TODO; + // new_error_value_from_unionnew_error_value_from_union + struct stringset* set = new_stringset(); - mpq_init(q); + stringset_update(set, left->errors); - mpq_add(q, left->mpq, right->mpq); + stringset_update(set, right->errors); - retval = new_value(q); + retval = new_error_value(set); + + free_stringset(set); + } + else if (left->kind == vk_error) + { + TODO; + } + else if (right->kind == vk_error) + { + TODO; + } + else + { + mpq_t q; + + mpq_init(q); + + mpq_add(q, left->mpq, right->mpq); + + retval = new_value(q); + + mpq_clear(q); + } free_value(left), free_value(right); - - mpq_clear(q); break; } @@ -1366,17 +2271,48 @@ struct value* evaluate( struct value* left = evaluate(expression->left, scope); struct value* right = evaluate(expression->right, scope); - mpq_t q; + if (left->kind == vk_error && right->kind == vk_error) + { + TODO + // new_error_value_from_union - mpq_init(q); + struct stringset* set = new_stringset(); - mpq_sub(q, left->mpq, right->mpq); + stringset_update(set, left->errors); - retval = new_value(q); + stringset_update(set, right->errors); - free_value(left), free_value(right); + retval = new_error_value(set); - mpq_clear(q); + free_stringset(set); + } + else if (left->kind == vk_error) + { + TODO; + } + else if (right->kind == vk_error) + { + TODO; + } + else + { + TODO; + #if 0 + mpq_t q; + + mpq_init(q); + + mpq_sub(q, left->mpq, right->mpq); + + retval = new_value(q); + + mpq_clear(q); + #endif + } + + free_value(left); + + free_value(right); break; } @@ -1385,17 +2321,280 @@ struct value* evaluate( struct value* left = evaluate(expression->left, scope); struct value* right = evaluate(expression->right, scope); - mpq_t q; + if (left->kind == vk_error && right->kind == vk_error) + { + TODO; + // new_error_value_from_union - mpq_init(q); + struct stringset* set = new_stringset(); - mpq_mul(q, left->mpq, right->mpq); + stringset_update(set, left->errors); - retval = new_value(q); + stringset_update(set, right->errors); + + retval = new_error_value(set); + + free_stringset(set); + } + else if (left->kind == vk_error) + { + retval = inc_value(left); + } + else if (right->kind == vk_error) + { + retval = inc_value(right); + } + else + { + TODO; + #if 0 + mpq_t q; + + mpq_init(q); + + mpq_mul(q, left->mpq, right->mpq); + + retval = new_value(q); + + mpq_clear(q); + #endif + } + + free_value(left), free_value(right); + + break; + } + + case ek_integer_divide: + { + struct value* left = evaluate(expression->left, scope); + struct value* right = evaluate(expression->right, scope); + + if (left->kind == vk_error && right->kind == vk_error) + { + TODO; + // new_error_value_from_union + struct stringset* set = new_stringset(); + + stringset_update(set, left->errors); + + stringset_update(set, right->errors); + + retval = new_error_value(set); + + free_stringset(set); + } + else if (left->kind == vk_error) + { + retval = inc_value(left); + } + else if (right->kind == vk_error) + { + retval = inc_value(right); + } + else + { + mpq_t q; + + mpq_init(q); + + mpq_div(q, left->mpq, right->mpq); + + mpz_tdiv_q(mpq_numref(q), mpq_numref(q), mpq_denref(q)); + + mpz_set_si(mpq_denref(q), 1); + + retval = new_value(q); + + mpq_clear(q); + } + + free_value(left), free_value(right); + + break; + } + + case ek_rightbitshift: + { + struct value* left = evaluate(expression->left, scope); + struct value* right = evaluate(expression->right, scope); + + if (left->kind == vk_error && right->kind == vk_error) + { + retval = new_error_value_from_union(left, right); + } + else if (left->kind == vk_error) + { + retval = inc_value(left); + } + else if (right->kind == vk_error) + { + retval = inc_value(right); + } + else if (mpz_cmp_si(mpq_denref(right->mpq), 1) == 0) + { + if (mpq_sgn(right->mpq) > 0) + { + if (mpz_fits_ulong_p(mpq_numref(right->mpq))) + { + unsigned long r = mpz_get_ui(mpq_numref(right->mpq)); + + mpq_t q; + + mpq_init(q); + + mpq_div_2exp(q, left->mpq, r); + + retval = new_value(q); + + mpq_clear(q); + } + else + { + retval = new_error_value_from_fmt( + "error: cannot right bitshift beyond ULONG_MAX"); + } + } + else + { + retval = new_error_value_from_fmt( + "error: cannot right bitshift by a negative value"); + } + } + else + { + retval = new_error_value_from_fmt( + "error: cannot right bitshift by a fractional value"); + } + + free_value(left), free_value(right); + + break; + } + + case ek_leftbitshift: + { + struct value* left = evaluate(expression->left, scope); + struct value* right = evaluate(expression->right, scope); + + if (left->kind == vk_error && right->kind == vk_error) + { + retval = new_error_value_from_union(left, right); + } + else if (left->kind == vk_error) + { + retval = inc_value(left); + } + else if (right->kind == vk_error) + { + retval = inc_value(right); + } + else if (mpz_cmp_si(mpq_denref(right->mpq), 1) == 0) + { + if (mpq_sgn(right->mpq) > 0) + { + if (mpz_fits_ulong_p(mpq_numref(right->mpq))) + { + unsigned long rshift = mpz_get_ui(mpq_numref(right->mpq)); + + mpq_t q; + + mpq_init(q); + + mpq_mul_2exp(q, left->mpq, rshift); + + retval = new_value(q); + + mpq_clear(q); + } + else + { + retval = new_error_value_from_fmt( + "error: cannot left bitshift beyond ULONG_MAX"); + } + } + else + { + retval = new_error_value_from_fmt( + "error: cannot left bitshift by a negative value"); + } + } + else + { + retval = new_error_value_from_fmt( + "error: cannot left bitshift by fractional value"); + } + + free_value(left), free_value(right); + + break; + } + + case ek_exponent: + { + struct value* left = evaluate(expression->left, scope); + struct value* right = evaluate(expression->right, scope); + + if (left->kind == vk_error && right->kind == vk_error) + { + retval = new_error_value_from_union(left, right); + } + else if (left->kind == vk_error) + { + retval = inc_value(left); + } + else if (right->kind == vk_error) + { + retval = inc_value(right); + } + else if (mpz_cmp_si(mpq_denref(right->mpq), 1) == 0) + { + if (mpq_sgn(right->mpq) > 0) + { + if (mpz_fits_ulong_p(mpq_numref(right->mpq))) + { + unsigned long rpow = mpz_get_ui(mpq_numref(right->mpq)); + + mpz_t n, d; + + mpz_init(n), mpz_init(d); + + mpz_pow_ui(n, mpq_numref(left->mpq), rpow); + mpz_pow_ui(d, mpq_denref(left->mpq), rpow); + + mpq_t q, t; + + mpq_init(q), mpq_init(t); + + mpq_set_z(q, n); + mpq_set_z(t, d); + + mpq_div(q, q, t); + + retval = new_value(q); + + mpq_clear(q), mpq_clear(t); + } + else + { + retval = new_error_value_from_fmt( + "error: cannot exponentiate beyond ULONG_MAX"); + } + } + else + { + retval = new_error_value_from_fmt( + "error: negative exponents not supported"); + } + } + else + { + retval = new_error_value_from_fmt( + "error: cannot exponentiate by a fraction"); + } free_value(left), free_value(right); - mpq_clear(q); break; } @@ -1404,17 +2603,103 @@ struct value* evaluate( struct value* left = evaluate(expression->left, scope); struct value* right = evaluate(expression->right, scope); - mpq_t q; + if (left->kind == vk_error && right->kind == vk_error) + { + TODO; + // new_error_value_from_union - mpq_init(q); + struct stringset* set = new_stringset(); - mpq_div(q, left->mpq, right->mpq); + stringset_update(set, left->errors); - retval = new_value(q); + stringset_update(set, right->errors); + + retval = new_error_value(set); + + free_stringset(set); + } + else if (left->kind == vk_error) + { + retval = inc_value(left); + } + else if (right->kind == vk_error) + { + retval = inc_value(right); + } + else + { + TODO; + #if 0 + mpq_t q; + + mpq_init(q); + + mpq_div(q, left->mpq, right->mpq); + + retval = new_value(q); + + mpq_clear(q); + #endif + } + + free_value(left), free_value(right); + + break; + } + + case ek_remainder: + { + struct value* left = evaluate(expression->left, scope); + struct value* right = evaluate(expression->right, scope); + + if (left->kind == vk_error && right->kind == vk_error) + { + TODO; + // new_error_value_from_union + + struct stringset* set = new_stringset(); + + stringset_update(set, left->errors); + + stringset_update(set, right->errors); + + retval = new_error_value(set); + + free_stringset(set); + } + else if (left->kind == vk_error) + { + retval = inc_value(left); + } + else if (right->kind == vk_error) + { + retval = inc_value(right); + } + else + { + mpq_t q; + + mpq_init(q); + + // (x % y) => x - (x // y) * y + + mpq_div(q, left->mpq, right->mpq); + + mpz_tdiv_q(mpq_numref(q), mpq_numref(q), mpq_denref(q)); + + mpz_set_si(mpq_denref(q), 1); + + mpq_mul(q, q, right->mpq); + + mpq_sub(q, left->mpq, q); + + retval = new_value(q); + + mpq_clear(q); + } free_value(left), free_value(right); - mpq_clear(q); break; } @@ -1428,33 +2713,46 @@ struct value* evaluate( { struct value* sub = evaluate(expression->center, scope); - mpq_t q; + if (sub->kind == vk_error) + { + TODO; + } + else + { + mpq_t q; - mpq_init(q); + mpq_init(q); - mpq_neg(q, sub->mpq); + mpq_neg(q, sub->mpq); - retval = new_value(q); + retval = new_value(q); - free_value(sub); + free_value(sub); + + mpq_clear(q); + } - mpq_clear(q); break; } case ek_greater_than: { + TODO; + #if 0 struct value* left = evaluate(expression->left, scope); struct value* right = evaluate(expression->right, scope); retval = new_value_from_int(mpq_cmp(left->mpq, right->mpq) > 0); free_value(left), free_value(right); + #endif break; } case ek_greater_than_equal_to: { + TODO; + #if 0 struct value* left = evaluate(expression->left, scope); struct value* right = evaluate(expression->right, scope); @@ -1462,6 +2760,7 @@ struct value* evaluate( free_value(left), free_value(right); break; + #endif } case ek_less_than: @@ -1469,7 +2768,32 @@ struct value* evaluate( struct value* left = evaluate(expression->left, scope); struct value* right = evaluate(expression->right, scope); - retval = new_value_from_int(mpq_cmp(left->mpq, right->mpq) < 0); + if (left->kind == vk_error && right->kind == vk_error) + { + TODO; + // new_error_value_from_union + struct stringset* set = new_stringset(); + + stringset_update(set, left->errors); + + stringset_update(set, right->errors); + + retval = new_error_value(set); + + free_stringset(set); + } + else if (left->kind == vk_error) + { + TODO; + } + else if (right->kind == vk_error) + { + TODO; + } + else + { + retval = new_value_from_int(mpq_cmp(left->mpq, right->mpq) < 0); + } free_value(left), free_value(right); break; @@ -1480,9 +2804,35 @@ struct value* evaluate( struct value* left = evaluate(expression->left, scope); struct value* right = evaluate(expression->right, scope); - retval = new_value_from_int(mpq_cmp(left->mpq, right->mpq) <= 0); + if (left->kind == vk_error && right->kind == vk_error) + { + TODO; + // new_error_value_from_union + struct stringset* set = new_stringset(); + + stringset_update(set, left->errors); + + stringset_update(set, right->errors); + + retval = new_error_value(set); + + free_stringset(set); + } + else if (left->kind == vk_error) + { + TODO; + } + else if (right->kind == vk_error) + { + TODO; + } + else + { + retval = new_value_from_int(mpq_cmp(left->mpq, right->mpq) <= 0); + } free_value(left), free_value(right); + break; } @@ -1491,14 +2841,42 @@ struct value* evaluate( struct value* left = evaluate(expression->left, scope); struct value* right = evaluate(expression->right, scope); - retval = new_value_from_int(mpq_cmp(left->mpq, right->mpq) == 0); + if (left->kind == vk_error && right->kind == vk_error) + { + TODO; + // new_error_value_from_union + struct stringset* set = new_stringset(); + + stringset_update(set, left->errors); + + stringset_update(set, right->errors); + + retval = new_error_value(set); + + free_stringset(set); + } + else if (left->kind == vk_error) + { + TODO; + } + else if (right->kind == vk_error) + { + TODO; + } + else + { + retval = new_value_from_int(mpq_cmp(left->mpq, right->mpq) == 0); + } free_value(left), free_value(right); + break; } case ek_not_equal_to: { + TODO; + #if 0 struct value* left = evaluate(expression->left, scope); struct value* right = evaluate(expression->right, scope); @@ -1506,20 +2884,26 @@ struct value* evaluate( free_value(left), free_value(right); break; + #endif } case ek_logical_not: { + TODO; + #if 0 struct value* sub = evaluate(expression->center, scope); retval = new_value_from_int(mpq_sgn(sub->mpq) == 0); free_value(sub); break; + #endif } case ek_logical_and: { + TODO; + #if 0 struct value* left = evaluate(expression->left, scope); struct value* right = evaluate(expression->right, scope); @@ -1528,40 +2912,119 @@ struct value* evaluate( free_value(left), free_value(right); break; + #endif } case ek_logical_or: { struct value* left = evaluate(expression->left, scope); - struct value* right = evaluate(expression->right, scope); - retval = new_value_from_int( - mpq_sgn(left->mpq) != 0 || mpq_sgn(right->mpq) != 0); + if (left->kind == vk_error) + { + struct value* right = evaluate(expression->right, scope); + + if (right->kind == vk_error) + { + TODO; + // new_error_value_from_union + struct stringset* set = new_stringset(); + + stringset_update(set, left->errors); + stringset_update(set, right->errors); + + retval = new_error_value(set); + + free_stringset(set); + } + else + { + retval = inc_value(left); + } + + free_value(right); + } + else + { + TODO; + #if 0 + struct value* right = evaluate(expression->right, scope); + + retval = new_value_from_int( + mpq_sgn(left->mpq) != 0 || mpq_sgn(right->mpq) != 0); + + free_value(left), free_value(right); + break; + #endif + } + + free_value(left); - free_value(left), free_value(right); break; } - case ek_ternary: TODO; break; + case ek_ternary: + { + struct value* conditional = evaluate(expression->center, scope); + + if (conditional->kind == vk_error) + { + retval = inc_value(conditional); + } + else if (mpq_sgn(conditional->mpq) != 0) + { + retval = evaluate(expression->left, scope); + } + else + { + retval = evaluate(expression->right, scope); + } + + free_value(conditional); + break; + } case ek_assign: { - struct value** lvalue = evaluate_lvalue(expression->left, scope); + if (expression->left->kind == ek_variable) + { + struct value** lvalue = scope_lookup( + scope, expression->left->string); - retval = evaluate(expression->right, scope); + retval = evaluate(expression->right, scope); - free_value(*lvalue), *lvalue = inc_value(retval); + free_value(*lvalue), *lvalue = inc_value(retval); + } + else + { + TODO; + + struct string* message = new_string_from_fmt( + "error: cannot assign to rvalue-expressions"); + + struct stringset* set = new_stringset(); + + stringset_add(set, message); + + retval = new_error_value(set); + + free_stringset(set); + + free_string(message); + } break; }; case ek_comma: { + TODO; + #if 0 free_value(evaluate(expression->left, scope)); retval = evaluate(expression->right, scope); break; + #endif }; } @@ -1591,7 +3054,7 @@ void print_mpz(const mpz_t x_ro, const char* digits[10]) assert(mpz_fits_uint_p(d)); - unsigned int diu = mpz_get_ui(d); + unsigned int diu = (unsigned int) mpz_get_ui(d); assert(diu < 10); @@ -1610,110 +3073,95 @@ void print_mpz(const mpz_t x_ro, const char* digits[10]) void print(struct value* this) { - mpq_t v; - - mpq_init(v); - - mpq_set(v, this->mpq); - - if (mpq_sgn(v) == 0) + switch (this->kind) { - putchar('0'); + case vk_error: + { + stringset_println(this->errors); + + break; + } + + case vk_number: + { + mpq_t v; + + mpq_init(v); + + mpq_set(v, this->mpq); + + if (mpq_sgn(v) == 0) + { + putchar('0'); + } + else + { + if (mpq_sgn(v) < 0) + { + putchar('-'); + + mpq_abs(v, v); + } + + mpz_t q; + + mpz_init(q); + + mpz_fdiv_q(q, mpq_numref(v), mpq_denref(v)); + + if (mpz_sgn(q) > 0) + { + print_mpz(q, + (const char*[10]){"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}); + + putchar(' '); + } + + mpq_t qq; + + mpq_init(qq); + + mpq_set_z(qq, q); + + mpq_sub(v, v, qq); + + if (mpq_sgn(v) > 0) + { + print_mpz(mpq_numref(v), + (const char*[10]){"⁰", "¹", "²", "³", "⁴", "⁵", "⁶", "⁷", "⁸", "⁹"}); + + putchar('/'); + + print_mpz(mpq_denref(v), + (const char*[10]){"₀", "₁", "₂", "₃", "₄", "₅", "₆", "₇", "₈", "₉"}); + } + + mpq_clear(qq); + + mpz_clear(q); + } + + mpq_clear(v); + + puts(""); + break; + } } - else - { - if (mpq_sgn(v) < 0) - { - putchar('-'); - - mpq_abs(v, v); - } - - mpz_t q; - - mpz_init(q); - - mpz_fdiv_q(q, mpq_numref(v), mpq_denref(v)); - - if (mpz_sgn(q) > 0) - { - print_mpz(q, - (const char*[10]){"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}); - - putchar(' '); - } - - mpq_t qq; - - mpq_init(qq); - - mpq_set_z(qq, q); - - mpq_sub(v, v, qq); - - if (mpq_sgn(v) > 0) - { - print_mpz(mpq_numref(v), - (const char*[10]){"⁰", "¹", "²", "³", "⁴", "⁵", "⁶", "⁷", "⁸", "⁹"}); - - putchar('/'); - - print_mpz(mpq_denref(v), - (const char*[10]){"₀", "₁", "₂", "₃", "₄", "₅", "₆", "₇", "₈", "₉"}); - } - - mpq_clear(qq); - - mpz_clear(q); - } - - mpq_clear(v); - - puts(""); } -void print_syntax_error( +void syntax_error_print( const struct expression* this) { assert(this->kind == ek_syntax_error); - printf("syntax error: %s\n", this->string->data); -} - -bool foreach_syntax_error( - struct expression* exp, - void (*callback)( - const struct expression*)) -{ - bool retval = false; - - if (exp->kind == ek_syntax_error) - { - callback(exp); - - retval = true; - } - - if (exp->center && foreach_syntax_error(exp->center, callback)) - { - retval = true; - } - - if (exp->left && foreach_syntax_error(exp->left, callback)) - { - retval = true; - } - - if (exp->right && foreach_syntax_error(exp->right, callback)) - { - retval = true; - } - - return retval; + printf("%*s↑\n", this->column - 1, ""); + printf("%*s└ %s\n", this->column - 1, "", this->string->data); } int main(int argc, char* const* argv) { + int retval = 0; + parse_args(argc, argv); if (command) @@ -1722,10 +3170,17 @@ int main(int argc, char* const* argv) struct expression* expression = parse(command); - bool any_syntax_errors = foreach_syntax_error(expression, - print_syntax_error); + if (expression->kind == ek_syntax_error) + { + printf("syntax error!\n"); - if (!any_syntax_errors) + printf("%s\n", command); + + syntax_error_print(expression); + + retval = 1; + } + else { struct value* value = evaluate(expression, scope); @@ -1752,7 +3207,8 @@ int main(int argc, char* const* argv) struct value* value = evaluate(expression, scope); - print(value); + TODO; // switch on value kind + // print(value); free_value(value); @@ -1762,7 +3218,7 @@ int main(int argc, char* const* argv) free_scope(scope); } - return 0; + return retval; } diff --git a/makefile b/makefile index 49b79e0..7a1e154 100644 --- a/makefile +++ b/makefile @@ -11,7 +11,9 @@ cc = gcc cppflags += -D _GNU_SOURCE #cppflags += -D ZDEBUG=1 -cflags = -Werror -Wall -Wextra -Wstrict-prototypes -Wfatal-errors +cflags = -Werror -Wall -Wextra -Wconversion -Wstrict-prototypes -Wfatal-errors + +cflags += -Wno-override-init # cflags += -O3 cflags += -g