#include #include #include #include #include #include #include "../wcistream/struct.h" #include "../wcistream/read.h" #include "../position/struct.h" #include "../position/clone.h" #include "../position/inc.h" #include "../position/assign.h" #include "../position/free.h" #include "../token/new.h" #include "../token/inc.h" #include "../token/free.h" #include "struct.h" #include "next.h" static const enum state { s_error, s_EOF, s_number, s_colon, s_comma, s_equals, s_semicolon, s_oparen, s_cparen, s_newline, s_identifier, s_start, s_reading_newline, s_reading_slash, s_skipping_comment, s_skipping_comment_slash, s_reading_colon, s_reading_comma, s_reading_equals, s_reading_semicolon, s_reading_oparen, s_reading_cparen, s_reading_number, s_reading_identifier, number_of_states, } lookup[number_of_states][127 + 1 + 1] = { #define ANY 0 ... 128 // EOF: [s_start][0] = s_EOF, // skip whitespace [s_start][' '] = s_start, [s_start]['\t'] = s_start, [s_start]['\n'] = s_reading_newline, [s_reading_newline][ANY] = s_newline, // skip comments: [s_start]['#'] = s_skipping_comment, [s_skipping_comment][ANY] = s_skipping_comment, [s_skipping_comment]['\\'] = s_skipping_comment_slash, [s_skipping_comment_slash][ANY] = s_skipping_comment, [s_skipping_comment]['\n'] = s_start, // skip escaped newlines: [s_start]['\\'] = s_reading_slash, [s_reading_slash][ANY] = s_start, // symbols: [s_start][':'] = s_reading_colon, [s_reading_colon][ANY] = s_colon, [s_start][','] = s_reading_comma, [s_reading_comma][ANY] = s_comma, [s_start][';'] = s_reading_semicolon, [s_reading_semicolon][ANY] = s_semicolon, // brackets: [s_start]['('] = s_reading_oparen, [s_reading_oparen][ANY] = s_oparen, [s_start][')'] = s_reading_cparen, [s_reading_cparen][ANY] = s_cparen, // numeric literals: [s_start]['0' ... '9'] = s_reading_number, [s_reading_number][ ANY ] = s_number, [s_reading_number]['.'] = s_reading_number, [s_reading_number]['0' ... '9'] = s_reading_number, // identifiers [s_start]['?'] = s_reading_identifier, [s_start]['!'] = s_reading_identifier, [s_start]['-'] = s_reading_identifier, [s_start]['>'] = s_reading_identifier, [s_start]['='] = s_reading_identifier, [s_start]['<'] = s_reading_identifier, [s_start]['+'] = s_reading_identifier, [s_start]['*'] = s_reading_identifier, [s_start]['_'] = s_reading_identifier, [s_start]['-'] = s_reading_identifier, [s_start]['/'] = s_reading_identifier, [s_start]['a' ... 'z'] = s_reading_identifier, [s_start]['A' ... 'Z'] = s_reading_identifier, [s_start][128] = s_reading_identifier, [s_reading_identifier][ANY] = s_identifier, [s_reading_identifier]['!'] = s_reading_identifier, [s_reading_identifier]['+'] = s_reading_identifier, [s_reading_identifier]['*'] = s_reading_identifier, [s_reading_identifier]['_'] = s_reading_identifier, [s_reading_identifier]['/'] = s_reading_identifier, [s_reading_identifier]['-'] = s_reading_identifier, [s_reading_identifier]['a' ... 'z'] = s_reading_identifier, [s_reading_identifier]['A' ... 'Z'] = s_reading_identifier, [s_reading_identifier]['0' ... '9'] = s_reading_identifier, [s_reading_identifier][128] = s_reading_identifier, }; void tokenizer_next( struct tokenizer* this) { ENTER; if (this->put_back) { free_token(this->token); this->token = inc_token(this->put_back); free_token(this->put_back); this->put_back = NULL; } else { this->rawtoken.n = 0; void append(wchar_t c) { ENTER; if (this->rawtoken.n == this->rawtoken.cap) { this->rawtoken.cap = this->rawtoken.cap << 1 ?: 1; this->rawtoken.data = srealloc( this->rawtoken.data, sizeof(*this->rawtoken.data) * this->rawtoken.cap); } this->rawtoken.data[this->rawtoken.n++] = c; EXIT; } struct position* start_position = clone_position(this->position); struct position* end_position = this->position; enum state state = s_start; while (state >= s_start) { dpvu(this->stream->wc); dpvwc(this->stream->wc); state = lookup[state][MIN(this->stream->wc, 128)]; if (state > s_start) { append(this->stream->wc); switch (this->stream->wc) { case '\t': end_position->column += 4; break; case '\n': end_position->line++, end_position->column = 1; break; default: end_position->column++; break; } wcistream_read(this->stream); } if (state == s_start) { this->rawtoken.n = 0; assign_position(start_position, end_position); wcistream_read(this->stream); } } append(0), this->rawtoken.n--; free_token(this->token), this->token = NULL; struct position* end_clone = clone_position(end_position); switch (state) { case s_error: { dpvws(this->rawtoken.data); TODO; break; } case s_EOF: { this->token = new_token( /* kind: */ tk_EOF, /* text: */ NULL, /* start position: */ end_clone, /* end position: */ end_clone); break; } case s_newline: { struct string* text = new_string( /* data: */ this->rawtoken.data, /* len: */ this->rawtoken.n); this->token = new_token( /* kind: */ tk_newline, /* text: */ text, /* start position: */ start_position, /* end position: */ end_clone); free_string(text); break; } case s_identifier: { dpvws(this->rawtoken.data); struct string* text = new_string( /* data: */ this->rawtoken.data, /* len: */ this->rawtoken.n); if (!wcscmp(this->rawtoken.data, L"λ")) { this->token = new_token( /* kind: */ tk_lambda, /* text: */ text, /* start position: */ start_position, /* end position: */ end_clone); } else if (!wcscmp(this->rawtoken.data, L"<-")) { this->token = new_token( /* kind: */ tk_arrow, /* text: */ text, /* start position: */ start_position, /* end position: */ end_clone); } else { this->token = new_token( /* kind: */ tk_identifier, /* text: */ text, /* start position: */ start_position, /* end position: */ end_clone); } free_string(text); break; } case s_colon: { struct string* text = new_string( /* data: */ this->rawtoken.data, /* len: */ this->rawtoken.n); this->token = new_token( /* kind: */ tk_colon, /* text: */ text, /* start position: */ start_position, /* end position: */ end_clone); free_string(text); break; } case s_comma: { struct string* text = new_string( /* data: */ this->rawtoken.data, /* len: */ this->rawtoken.n); this->token = new_token( /* kind: */ tk_comma, /* text: */ text, /* start position: */ start_position, /* end position: */ end_clone); free_string(text); break; } case s_semicolon: { struct string* text = new_string( /* data: */ this->rawtoken.data, /* len: */ this->rawtoken.n); this->token = new_token( /* kind: */ tk_semicolon, /* text: */ text, /* start position: */ start_position, /* end position: */ end_clone); free_string(text); break; } case s_oparen: { struct string* text = new_string( /* data: */ this->rawtoken.data, /* len: */ this->rawtoken.n); this->token = new_token( /* kind: */ tk_oparen, /* text: */ text, /* start position: */ start_position, /* end position: */ end_clone); free_string(text); break; } case s_cparen: { struct string* text = new_string( /* data: */ this->rawtoken.data, /* len: */ this->rawtoken.n); this->token = new_token( /* kind: */ tk_cparen, /* text: */ text, /* start position: */ start_position, /* end position: */ end_clone); free_string(text); break; } case s_number: { struct string* text = new_string( /* data: */ this->rawtoken.data, /* len: */ this->rawtoken.n); this->token = new_token( /* kind: */ tk_literal, /* text: */ text, /* start position: */ start_position, /* end position: */ end_clone); free_string(text); break; } default: TODO; break; } free_position(start_position); free_position(end_clone); } EXIT; }