#include #include #include #include #include #include "struct.h" #include "next.h" static const enum state { s_error, s_EOF, s_null, s_true, s_false, s_integer, // symbols: s_gravemark, s_oparen, s_cparen, s_identifier, s_start, s_skipping_comment, s_reading_oparen, s_reading_cparen, s_reading_integer, s_reading_identifier, s_reading_gravemark, s_reading_n, s_reading_nu, s_reading_nul, s_reading_null, s_reading_t, s_reading_tr, s_reading_tru, s_reading_true, s_reading_f, s_reading_fa, s_reading_fal, s_reading_fals, s_reading_false, number_of_states, } lookup[number_of_states][256] = { #define ANY 0 ... 255 // EOF: [s_start][0] = s_EOF, // skip whitespace [s_start][' '] = s_start, [s_start]['\t'] = s_start, [s_start]['\n'] = s_start, // skip comments: [s_start]['#'] = s_skipping_comment, [s_skipping_comment][ANY] = s_skipping_comment, [s_skipping_comment]['\n'] = s_start, // brackets: [s_start]['('] = s_reading_oparen, [s_reading_oparen][ANY] = s_oparen, [s_start][')'] = s_reading_cparen, [s_reading_cparen][ANY] = s_cparen, // symbols: [s_start]['`'] = s_reading_gravemark, [s_reading_gravemark][ANY] = s_gravemark, // integer literals: [s_start]['0' ... '9'] = s_reading_integer, [s_reading_integer][ ANY ] = s_integer, [s_reading_integer]['0' ... '9'] = s_reading_integer, // identifiers [s_start]['?'] = s_reading_identifier, [s_start]['!'] = s_reading_identifier, [s_start]['>'] = s_reading_identifier, [s_start]['<'] = s_reading_identifier, [s_start]['='] = s_reading_identifier, [s_start]['+'] = s_reading_identifier, [s_start]['*'] = s_reading_identifier, [s_start]['_'] = s_reading_identifier, [s_start]['-'] = s_reading_identifier, [s_start]['/'] = s_reading_identifier, [s_start]['a' ... 'z'] = s_reading_identifier, [s_start]['A' ... 'Z'] = s_reading_identifier, [s_reading_identifier][ANY] = s_identifier, [s_reading_identifier][':'] = s_reading_identifier, [s_reading_identifier]['='] = s_reading_identifier, [s_reading_identifier]['!'] = s_reading_identifier, [s_reading_identifier]['+'] = s_reading_identifier, [s_reading_identifier]['*'] = s_reading_identifier, [s_reading_identifier]['_'] = s_reading_identifier, [s_reading_identifier]['/'] = s_reading_identifier, [s_reading_identifier]['-'] = s_reading_identifier, [s_reading_identifier]['a' ... 'z'] = s_reading_identifier, [s_reading_identifier]['A' ... 'Z'] = s_reading_identifier, [s_reading_identifier]['0' ... '9'] = s_reading_identifier, // "null" keyword: [s_start]['n'] = s_reading_n, [s_reading_n][ANY] = s_identifier, [s_reading_n]['_'] = s_reading_identifier, [s_reading_n]['-'] = s_reading_identifier, [s_reading_n]['a' ... 'z'] = s_reading_identifier, [s_reading_n]['A' ... 'Z'] = s_reading_identifier, [s_reading_n]['u'] = s_reading_nu, [s_reading_nu][ANY] = s_identifier, [s_reading_nu]['_'] = s_reading_identifier, [s_reading_nu]['-'] = s_reading_identifier, [s_reading_nu]['a' ... 'z'] = s_reading_identifier, [s_reading_nu]['A' ... 'Z'] = s_reading_identifier, [s_reading_nu]['l'] = s_reading_nul, [s_reading_nul][ANY] = s_identifier, [s_reading_nul]['_'] = s_reading_identifier, [s_reading_nul]['-'] = s_reading_identifier, [s_reading_nul]['a' ... 'z'] = s_reading_identifier, [s_reading_nul]['A' ... 'Z'] = s_reading_identifier, [s_reading_nul]['l'] = s_reading_null, [s_reading_null][ANY] = s_null, [s_reading_null]['_'] = s_reading_identifier, [s_reading_null]['-'] = s_reading_identifier, [s_reading_null]['a' ... 'z'] = s_reading_identifier, [s_reading_null]['A' ... 'Z'] = s_reading_identifier, // "true" keyword: [s_start]['t'] = s_reading_t, [s_reading_t][ANY] = s_identifier, [s_reading_t]['_'] = s_reading_identifier, [s_reading_t]['-'] = s_reading_identifier, [s_reading_t]['a' ... 'z'] = s_reading_identifier, [s_reading_t]['A' ... 'Z'] = s_reading_identifier, [s_reading_t]['r'] = s_reading_tr, [s_reading_tr][ANY] = s_identifier, [s_reading_tr]['_'] = s_reading_identifier, [s_reading_tr]['-'] = s_reading_identifier, [s_reading_tr]['a' ... 'z'] = s_reading_identifier, [s_reading_tr]['A' ... 'Z'] = s_reading_identifier, [s_reading_tr]['u'] = s_reading_tru, [s_reading_tru][ANY] = s_identifier, [s_reading_tru]['_'] = s_reading_identifier, [s_reading_tru]['-'] = s_reading_identifier, [s_reading_tru]['a' ... 'z'] = s_reading_identifier, [s_reading_tru]['A' ... 'Z'] = s_reading_identifier, [s_reading_tru]['e'] = s_reading_true, [s_reading_true][ANY] = s_true, [s_reading_true]['_'] = s_reading_identifier, [s_reading_true]['-'] = s_reading_identifier, [s_reading_true]['a' ... 'z'] = s_reading_identifier, [s_reading_true]['A' ... 'Z'] = s_reading_identifier, // "false" keyword: [s_start]['f'] = s_reading_f, [s_reading_f][ANY] = s_identifier, [s_reading_f]['_'] = s_reading_identifier, [s_reading_f]['-'] = s_reading_identifier, [s_reading_f]['a' ... 'z'] = s_reading_identifier, [s_reading_f]['A' ... 'Z'] = s_reading_identifier, [s_reading_f]['a'] = s_reading_fa, [s_reading_fa][ANY] = s_identifier, [s_reading_fa]['_'] = s_reading_identifier, [s_reading_fa]['-'] = s_reading_identifier, [s_reading_fa]['a' ... 'z'] = s_reading_identifier, [s_reading_fa]['A' ... 'Z'] = s_reading_identifier, [s_reading_fa]['l'] = s_reading_fal, [s_reading_fal][ANY] = s_identifier, [s_reading_fal]['_'] = s_reading_identifier, [s_reading_fal]['-'] = s_reading_identifier, [s_reading_fal]['a' ... 'z'] = s_reading_identifier, [s_reading_fal]['A' ... 'Z'] = s_reading_identifier, [s_reading_fal]['s'] = s_reading_fals, [s_reading_fals][ANY] = s_identifier, [s_reading_fals]['_'] = s_reading_identifier, [s_reading_fals]['-'] = s_reading_identifier, [s_reading_fals]['a' ... 'z'] = s_reading_identifier, [s_reading_fals]['A' ... 'Z'] = s_reading_identifier, [s_reading_fals]['e'] = s_reading_false, [s_reading_false][ANY] = s_false, [s_reading_false]['_'] = s_reading_identifier, [s_reading_false]['-'] = s_reading_identifier, [s_reading_false]['a' ... 'z'] = s_reading_identifier, [s_reading_false]['A' ... 'Z'] = s_reading_identifier, }; void tokenizer_next_token( struct tokenizer* this) { ENTER; this->rawtoken.n = 0; void append(uint8_t c) { ENTER; if (this->rawtoken.n == this->rawtoken.cap) { this->rawtoken.cap = this->rawtoken.cap << 1 ?: 1; this->rawtoken.data = srealloc( this->rawtoken.data, sizeof(*this->rawtoken.data) * this->rawtoken.cap); } this->rawtoken.data[this->rawtoken.n++] = c; EXIT; } enum state state = s_start; while (state >= s_start) { dpvc(this->stream->c); state = lookup[state][this->stream->c]; if (state > s_start) { append(this->stream->c); istream_read(this->stream); } if (state == s_start) { this->rawtoken.n = 0; istream_read(this->stream); } } append(0), this->rawtoken.n--; switch (state) { case s_error: { TODO; break; } case s_EOF: { this->token = t_EOF; break; } case s_gravemark: { this->token = t_gravemark; break; } case s_oparen: { this->token = t_oparen; break; } case s_cparen: { this->token = t_cparen; break; } case s_null: { this->token = t_null; break; } case s_true: { this->token = t_true; break; } case s_false: { this->token = t_false; break; } case s_identifier: { this->token = t_identifier; break; } case s_integer: { this->token = t_integer; break; } default: TODO; break; } EXIT; }