356 lines
9.8 KiB
C
356 lines
9.8 KiB
C
|
|
#include <assert.h>
|
|
|
|
#include <debug.h>
|
|
|
|
#include <memory/srealloc.h>
|
|
|
|
#include <parse/istream/struct.h>
|
|
#include <parse/istream/read.h>
|
|
|
|
#include "struct.h"
|
|
#include "next.h"
|
|
|
|
static const enum state {
|
|
s_error,
|
|
|
|
s_EOF,
|
|
|
|
s_null,
|
|
|
|
s_true,
|
|
|
|
s_false,
|
|
|
|
s_integer,
|
|
|
|
// symbols:
|
|
s_gravemark,
|
|
s_oparen,
|
|
s_cparen,
|
|
|
|
s_identifier,
|
|
|
|
s_start,
|
|
|
|
s_skipping_comment,
|
|
|
|
s_reading_oparen,
|
|
s_reading_cparen,
|
|
|
|
s_reading_integer,
|
|
|
|
s_reading_identifier,
|
|
|
|
s_reading_gravemark,
|
|
|
|
s_reading_n,
|
|
s_reading_nu,
|
|
s_reading_nul,
|
|
s_reading_null,
|
|
|
|
s_reading_t,
|
|
s_reading_tr,
|
|
s_reading_tru,
|
|
s_reading_true,
|
|
|
|
s_reading_f,
|
|
s_reading_fa,
|
|
s_reading_fal,
|
|
s_reading_fals,
|
|
s_reading_false,
|
|
|
|
number_of_states,
|
|
} lookup[number_of_states][256] = {
|
|
|
|
#define ANY 0 ... 255
|
|
|
|
// EOF:
|
|
[s_start][0] = s_EOF,
|
|
|
|
// skip whitespace
|
|
[s_start][' '] = s_start,
|
|
[s_start]['\t'] = s_start,
|
|
[s_start]['\n'] = s_start,
|
|
|
|
// skip comments:
|
|
[s_start]['#'] = s_skipping_comment,
|
|
[s_skipping_comment][ANY] = s_skipping_comment,
|
|
[s_skipping_comment]['\n'] = s_start,
|
|
|
|
// brackets:
|
|
[s_start]['('] = s_reading_oparen,
|
|
[s_reading_oparen][ANY] = s_oparen,
|
|
[s_start][')'] = s_reading_cparen,
|
|
[s_reading_cparen][ANY] = s_cparen,
|
|
|
|
// symbols:
|
|
[s_start]['`'] = s_reading_gravemark,
|
|
[s_reading_gravemark][ANY] = s_gravemark,
|
|
|
|
// integer literals:
|
|
[s_start]['0' ... '9'] = s_reading_integer,
|
|
[s_reading_integer][ ANY ] = s_integer,
|
|
[s_reading_integer]['0' ... '9'] = s_reading_integer,
|
|
|
|
// identifiers
|
|
[s_start]['?'] = s_reading_identifier,
|
|
[s_start]['!'] = s_reading_identifier,
|
|
[s_start]['>'] = s_reading_identifier,
|
|
[s_start]['<'] = s_reading_identifier,
|
|
[s_start]['='] = s_reading_identifier,
|
|
[s_start]['+'] = s_reading_identifier,
|
|
[s_start]['*'] = s_reading_identifier,
|
|
[s_start]['_'] = s_reading_identifier,
|
|
[s_start]['-'] = s_reading_identifier,
|
|
[s_start]['/'] = s_reading_identifier,
|
|
[s_start]['a' ... 'z'] = s_reading_identifier,
|
|
[s_start]['A' ... 'Z'] = s_reading_identifier,
|
|
[s_reading_identifier][ANY] = s_identifier,
|
|
[s_reading_identifier][':'] = s_reading_identifier,
|
|
[s_reading_identifier]['='] = s_reading_identifier,
|
|
[s_reading_identifier]['!'] = s_reading_identifier,
|
|
[s_reading_identifier]['+'] = s_reading_identifier,
|
|
[s_reading_identifier]['*'] = s_reading_identifier,
|
|
[s_reading_identifier]['_'] = s_reading_identifier,
|
|
[s_reading_identifier]['/'] = s_reading_identifier,
|
|
[s_reading_identifier]['-'] = s_reading_identifier,
|
|
[s_reading_identifier]['a' ... 'z'] = s_reading_identifier,
|
|
[s_reading_identifier]['A' ... 'Z'] = s_reading_identifier,
|
|
[s_reading_identifier]['0' ... '9'] = s_reading_identifier,
|
|
|
|
// "null" keyword:
|
|
[s_start]['n'] = s_reading_n,
|
|
[s_reading_n][ANY] = s_identifier,
|
|
[s_reading_n]['_'] = s_reading_identifier,
|
|
[s_reading_n]['-'] = s_reading_identifier,
|
|
[s_reading_n]['a' ... 'z'] = s_reading_identifier,
|
|
[s_reading_n]['A' ... 'Z'] = s_reading_identifier,
|
|
[s_reading_n]['u'] = s_reading_nu,
|
|
[s_reading_nu][ANY] = s_identifier,
|
|
[s_reading_nu]['_'] = s_reading_identifier,
|
|
[s_reading_nu]['-'] = s_reading_identifier,
|
|
[s_reading_nu]['a' ... 'z'] = s_reading_identifier,
|
|
[s_reading_nu]['A' ... 'Z'] = s_reading_identifier,
|
|
[s_reading_nu]['l'] = s_reading_nul,
|
|
[s_reading_nul][ANY] = s_identifier,
|
|
[s_reading_nul]['_'] = s_reading_identifier,
|
|
[s_reading_nul]['-'] = s_reading_identifier,
|
|
[s_reading_nul]['a' ... 'z'] = s_reading_identifier,
|
|
[s_reading_nul]['A' ... 'Z'] = s_reading_identifier,
|
|
[s_reading_nul]['l'] = s_reading_null,
|
|
[s_reading_null][ANY] = s_null,
|
|
[s_reading_null]['_'] = s_reading_identifier,
|
|
[s_reading_null]['-'] = s_reading_identifier,
|
|
[s_reading_null]['a' ... 'z'] = s_reading_identifier,
|
|
[s_reading_null]['A' ... 'Z'] = s_reading_identifier,
|
|
|
|
// "true" keyword:
|
|
[s_start]['t'] = s_reading_t,
|
|
[s_reading_t][ANY] = s_identifier,
|
|
[s_reading_t]['_'] = s_reading_identifier,
|
|
[s_reading_t]['-'] = s_reading_identifier,
|
|
[s_reading_t]['a' ... 'z'] = s_reading_identifier,
|
|
[s_reading_t]['A' ... 'Z'] = s_reading_identifier,
|
|
[s_reading_t]['r'] = s_reading_tr,
|
|
[s_reading_tr][ANY] = s_identifier,
|
|
[s_reading_tr]['_'] = s_reading_identifier,
|
|
[s_reading_tr]['-'] = s_reading_identifier,
|
|
[s_reading_tr]['a' ... 'z'] = s_reading_identifier,
|
|
[s_reading_tr]['A' ... 'Z'] = s_reading_identifier,
|
|
[s_reading_tr]['u'] = s_reading_tru,
|
|
[s_reading_tru][ANY] = s_identifier,
|
|
[s_reading_tru]['_'] = s_reading_identifier,
|
|
[s_reading_tru]['-'] = s_reading_identifier,
|
|
[s_reading_tru]['a' ... 'z'] = s_reading_identifier,
|
|
[s_reading_tru]['A' ... 'Z'] = s_reading_identifier,
|
|
[s_reading_tru]['e'] = s_reading_true,
|
|
[s_reading_true][ANY] = s_true,
|
|
[s_reading_true]['_'] = s_reading_identifier,
|
|
[s_reading_true]['-'] = s_reading_identifier,
|
|
[s_reading_true]['a' ... 'z'] = s_reading_identifier,
|
|
[s_reading_true]['A' ... 'Z'] = s_reading_identifier,
|
|
|
|
// "false" keyword:
|
|
[s_start]['f'] = s_reading_f,
|
|
[s_reading_f][ANY] = s_identifier,
|
|
[s_reading_f]['_'] = s_reading_identifier,
|
|
[s_reading_f]['-'] = s_reading_identifier,
|
|
[s_reading_f]['a' ... 'z'] = s_reading_identifier,
|
|
[s_reading_f]['A' ... 'Z'] = s_reading_identifier,
|
|
[s_reading_f]['a'] = s_reading_fa,
|
|
[s_reading_fa][ANY] = s_identifier,
|
|
[s_reading_fa]['_'] = s_reading_identifier,
|
|
[s_reading_fa]['-'] = s_reading_identifier,
|
|
[s_reading_fa]['a' ... 'z'] = s_reading_identifier,
|
|
[s_reading_fa]['A' ... 'Z'] = s_reading_identifier,
|
|
[s_reading_fa]['l'] = s_reading_fal,
|
|
[s_reading_fal][ANY] = s_identifier,
|
|
[s_reading_fal]['_'] = s_reading_identifier,
|
|
[s_reading_fal]['-'] = s_reading_identifier,
|
|
[s_reading_fal]['a' ... 'z'] = s_reading_identifier,
|
|
[s_reading_fal]['A' ... 'Z'] = s_reading_identifier,
|
|
[s_reading_fal]['s'] = s_reading_fals,
|
|
[s_reading_fals][ANY] = s_identifier,
|
|
[s_reading_fals]['_'] = s_reading_identifier,
|
|
[s_reading_fals]['-'] = s_reading_identifier,
|
|
[s_reading_fals]['a' ... 'z'] = s_reading_identifier,
|
|
[s_reading_fals]['A' ... 'Z'] = s_reading_identifier,
|
|
[s_reading_fals]['e'] = s_reading_false,
|
|
[s_reading_false][ANY] = s_false,
|
|
[s_reading_false]['_'] = s_reading_identifier,
|
|
[s_reading_false]['-'] = s_reading_identifier,
|
|
[s_reading_false]['a' ... 'z'] = s_reading_identifier,
|
|
[s_reading_false]['A' ... 'Z'] = s_reading_identifier,
|
|
};
|
|
|
|
void tokenizer_next_token(
|
|
struct tokenizer* this)
|
|
{
|
|
ENTER;
|
|
|
|
this->rawtoken.n = 0;
|
|
|
|
void append(uint8_t c)
|
|
{
|
|
ENTER;
|
|
|
|
if (this->rawtoken.n == this->rawtoken.cap)
|
|
{
|
|
this->rawtoken.cap = this->rawtoken.cap << 1 ?: 1;
|
|
|
|
this->rawtoken.data = srealloc(
|
|
this->rawtoken.data,
|
|
sizeof(*this->rawtoken.data) * this->rawtoken.cap);
|
|
}
|
|
|
|
this->rawtoken.data[this->rawtoken.n++] = c;
|
|
|
|
EXIT;
|
|
}
|
|
|
|
enum state state = s_start;
|
|
|
|
while (state >= s_start)
|
|
{
|
|
dpvc(this->stream->c);
|
|
|
|
state = lookup[state][this->stream->c];
|
|
|
|
if (state > s_start)
|
|
{
|
|
append(this->stream->c);
|
|
|
|
istream_read(this->stream);
|
|
}
|
|
|
|
if (state == s_start)
|
|
{
|
|
this->rawtoken.n = 0;
|
|
|
|
istream_read(this->stream);
|
|
}
|
|
}
|
|
|
|
append(0), this->rawtoken.n--;
|
|
|
|
switch (state)
|
|
{
|
|
case s_error:
|
|
{
|
|
TODO;
|
|
|
|
break;
|
|
}
|
|
|
|
case s_EOF:
|
|
{
|
|
this->token = t_EOF;
|
|
|
|
break;
|
|
}
|
|
|
|
case s_gravemark:
|
|
{
|
|
this->token = t_gravemark;
|
|
|
|
break;
|
|
}
|
|
|
|
case s_oparen:
|
|
{
|
|
this->token = t_oparen;
|
|
|
|
break;
|
|
}
|
|
|
|
case s_cparen:
|
|
{
|
|
this->token = t_cparen;
|
|
|
|
break;
|
|
}
|
|
|
|
case s_null:
|
|
{
|
|
this->token = t_null;
|
|
|
|
break;
|
|
}
|
|
|
|
case s_true:
|
|
{
|
|
this->token = t_true;
|
|
|
|
break;
|
|
}
|
|
|
|
case s_false:
|
|
{
|
|
this->token = t_false;
|
|
|
|
break;
|
|
}
|
|
|
|
case s_identifier:
|
|
{
|
|
this->token = t_identifier;
|
|
|
|
break;
|
|
}
|
|
|
|
case s_integer:
|
|
{
|
|
this->token = t_integer;
|
|
|
|
break;
|
|
}
|
|
|
|
default:
|
|
TODO;
|
|
break;
|
|
}
|
|
|
|
EXIT;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|