#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "token/struct.h" #include "token/inc.h" #include "token/free.h" #include "tokenizer/struct.h" #include "tokenizer/put_back.h" #include "tokenizer/next.h" #include "parse.h" // we need to gracefully handle errors // we also need to allow for newlines ending statements only when they \ could. struct expression* parse_application_expression( struct tokenizer* tokenizer, bool in_parentheses); struct expression* parse_lambda_expression( struct tokenizer* tokenizer, bool in_parentheses) { ENTER; assert(tokenizer->token->kind == tk_lambda); tokenizer_next(tokenizer); while (tokenizer->token->kind == tk_newline) { tokenizer_next(tokenizer); } if (tokenizer->token->kind != tk_identifier) { TODO; exit(1); } struct { struct string** data; size_t n, cap; } names = {}; while (tokenizer->token->kind == tk_identifier) { if (names.n == names.cap) { names.cap = names.cap << 1 ?: 1; names.data = srealloc(names.data, sizeof(*names.data) * names.cap); } names.data[names.n++] = inc_string(tokenizer->token->text); tokenizer_next(tokenizer); while (tokenizer->token->kind == tk_newline) { tokenizer_next(tokenizer); } if (tokenizer->token->kind == tk_comma) { tokenizer_next(tokenizer); } } assert(names.n); while (tokenizer->token->kind == tk_newline) { tokenizer_next(tokenizer); } if (true && tokenizer->token->kind != tk_colon && tokenizer->token->kind != tk_dot) { dpvu(tokenizer->token->kind); TODO; exit(1); } tokenizer_next(tokenizer); struct expression* body = parse_application_expression( tokenizer, in_parentheses); struct expression* retval = inc_expression(body); for (size_t i = names.n; i > 0; i--) { struct expression* prev = retval; retval = new_lambda_expression( /* variable name: */ names.data[i - 1], /* body: */ prev); free_expression(prev); } for (size_t i = 0; i < names.n; i++) { free_string(names.data[i]); } free_expression(body); free(names.data); EXIT; return retval; } // def parse-primary(): // literal // variable // parenthesis: // parse-application(in-parenthesis = true) // lambda: // eat zero or more newlines // if error token: // return error expression ("unknown token when reading \ lambda expression") // while not colon: // if error token: // return error expression ("unknown token when reading \ lambda expression") // expect identifier: // return error expression ("unexpected token when reading \ lambda expression") // add to variable list // eat identifier // eat zero or more newlines // if error token: // return error expression ("unknown token when \ reading lambda expression") // expect comma // return error expression ("unexpected token when reading \ lambda expression, expected comma or colon") // eat comma // eat colon // eat zero or more newlines // if error token: // return error expression ("unknown token when reading \ lambda expression") // parse-application // tk_error: // create error expression ("unknown token") // EOF: // create error expression ("unexpected EOF, expected expression") // default: // create error expression ("unexpected token") struct expression* parse_primary_expression( struct tokenizer* tokenizer, bool in_parentheses) { struct expression* retval; ENTER; switch (tokenizer->token->kind) { case tk_newline: { TODO; break; } case tk_literal: { struct number* literal = new_number(); number_set_str( /* instance: */ literal, /* literal string: */ tokenizer->token->text->data); struct value* value = new_number_value(literal); retval = new_literal_expression(value); tokenizer_next(tokenizer); free_value(value); free_number(literal); break; } case tk_identifier: { retval = new_variable_expression( /* variable name: */ tokenizer->token->text); tokenizer_next(tokenizer); break; } case tk_oparen: { tokenizer_next(tokenizer); struct expression* subexpression = parse_application_expression(tokenizer, /* in parenthesis: */ true); switch (tokenizer->token->kind) { case tk_cparen: retval = new_parenthesis_expression(subexpression); break; case tk_EOF: { retval = new_error_expression( L"Unexpected EOF when reading parenthesis. " L"Expecting close parenthesis.", subexpression); break; } case tk_semicolon: TODO; break; case tk_error: TODO; break; default: TODO; break; } tokenizer_next(tokenizer); free_expression(subexpression); break; } case tk_lambda: { retval = parse_lambda_expression(tokenizer, in_parentheses); break; } case tk_error: TODO; break; default: TODO; break; } EXIT; return retval; } // # parse-application is greedy, and since it's called for parsing the \ # body of lambdas, and lambdas inside that lambda, etc. the shift/reduce \ # error will work out in our favor // def parse-application(in-parenthesis = false): // parse parse-primary // # newlines can't stop us if we're in parenthesis: // if in-parenthesis: // while current token is newline: // eat the newline // match (current token): // case (number, variable, oparen, lambda): // parse that primary, connect the two with an 'application' \ expression // continue as you can // case (close paren, EOF, newline, semicolon): // if in-parenthesis = false: // create error expression // otherwise: // return what you have // tk_error: // create error expression ("unknown token") // connect the two with an 'application' expression // return what you have struct expression* parse_application_expression( struct tokenizer* tokenizer, bool in_parentheses) { struct expression* retval; ENTER; while (tokenizer->token->kind == tk_newline) { tokenizer_next(tokenizer); } switch (tokenizer->token->kind) { case tk_identifier: case tk_lambda: case tk_literal: case tk_oparen: { retval = parse_primary_expression( /* tokenizer: */ tokenizer, /* in parenthesis? */ in_parentheses); break; } case tk_error: TODO; break; case tk_EOF: { retval = new_error_expression(L"Unexpected EOF.", NULL); break; } default: TODO; break; } // is the next thing a part of the application? again: switch (tokenizer->token->kind) { case tk_identifier: case tk_lambda: case tk_literal: case tk_oparen: { struct expression* left = retval; struct expression* right = parse_primary_expression( tokenizer, in_parentheses); retval = new_application_expression(left, right); free_expression(left); free_expression(right); goto again; } case tk_newline: { if (in_parentheses) { TODO; } break; } case tk_semicolon: case tk_EOF: case tk_cparen: { break; } case tk_error: { TODO; break; } default: { TODO; break; } } EXIT; return retval; } struct statement* parse_statement( struct tokenizer* tokenizer) { struct statement* retval; ENTER; switch (tokenizer->token->kind) { case tk_identifier: { struct token* token = inc_token(tokenizer->token); tokenizer_next(tokenizer); switch (tokenizer->token->kind) { case tk_arrow: { tokenizer_next(tokenizer); struct expression* expression = parse_application_expression( /* tokenizer: */ tokenizer, /* in parenthesis: */ false); retval = new_assignment_statement( /* variable name: */ token->text, /* expression: */ expression); free_expression(expression); break; } case tk_newline: case tk_EOF: case tk_semicolon: { struct expression* expression = new_variable_expression( /* variable name: */ token->text); retval = new_expression_statement(expression); free_expression(expression); break; } case tk_oparen: case tk_identifier: case tk_literal: case tk_lambda: { tokenizer_put_back(tokenizer, token); struct expression* exp = parse_application_expression( /* tokenizer: */ tokenizer, /* in parenthesis? */ false); retval = new_expression_statement( /* expression: */ exp); free_expression(exp); break; } case tk_error: TODO; // create error statement ("unknown token", // stragglers = [first token]) // return what you have break; default: TODO; break; } free_token(token); break; } case tk_lambda: case tk_literal: case tk_oparen: { // call parse-application struct expression* exp = parse_application_expression( /* tokenizer: */ tokenizer, /* in parenthesis? */ false); retval = new_expression_statement( /* expression: */ exp); free_expression(exp); break; } case tk_semicolon: TODO; break; case tk_cparen: { retval = new_error_statement( L"Unexpected close parenthesis when reading statement", NULL); break; } case tk_error: TODO; // create error statement ("unknown token") // return what you have break; default: TODO; // create error statement ("unexpected token") // return what you have break; } EXIT; return retval; } struct statement* parse_statements( struct tokenizer* tokenizer) { ENTER; assert(tokenizer->token->kind != tk_newline); assert(tokenizer->token->kind != tk_EOF); struct statement* left = NULL; while (true && tokenizer->token->kind != tk_newline && tokenizer->token->kind != tk_EOF) { struct statement* right = parse_statement(tokenizer); if (left) { struct statement* oldleft = left; left = new_substatements_statement(left, right); free_statement(oldleft); } else { left = inc_statement(right); } while (tokenizer->token->kind == tk_cparen) { struct statement* oldleft = left; left = new_error_statement(L"Extra close parenthesis.", left); free_statement(oldleft); tokenizer_next(tokenizer); } while (tokenizer->token->kind == tk_semicolon) { tokenizer_next(tokenizer); } free_statement(right); } if (tokenizer->token->kind == tk_newline) { tokenizer_next(tokenizer); } assert(left); EXIT; return left; } struct statement* parse( struct tokenizer* tokenizer) { ENTER; struct statement* root = parse_statements(tokenizer); EXIT; return root; }