651 lines
16 KiB
C
651 lines
16 KiB
C
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include <stdbool.h>
|
|
|
|
#include <debug.h>
|
|
|
|
#include <string/struct.h>
|
|
#include <string/free.h>
|
|
|
|
#include <number/new.h>
|
|
#include <number/set_str.h>
|
|
#include <number/free.h>
|
|
|
|
#include <memory/srealloc.h>
|
|
|
|
#include <value/number/new.h>
|
|
#include <value/free.h>
|
|
|
|
#include <string/inc.h>
|
|
|
|
#include <expression/literal/new.h>
|
|
#include <expression/variable/new.h>
|
|
#include <expression/lambda/new.h>
|
|
#include <expression/application/new.h>
|
|
#include <expression/parenthesis/new.h>
|
|
#include <expression/error/new.h>
|
|
#include <expression/inc.h>
|
|
#include <expression/free.h>
|
|
|
|
#include <statement/expression/new.h>
|
|
#include <statement/substatements/new.h>
|
|
#include <statement/assignment/new.h>
|
|
#include <statement/error/new.h>
|
|
#include <statement/inc.h>
|
|
#include <statement/free.h>
|
|
|
|
#include "token/struct.h"
|
|
#include "token/inc.h"
|
|
#include "token/free.h"
|
|
|
|
#include "tokenizer/struct.h"
|
|
#include "tokenizer/put_back.h"
|
|
#include "tokenizer/next.h"
|
|
|
|
#include "parse.h"
|
|
|
|
// we need to gracefully handle errors
|
|
|
|
// we also need to allow for newlines ending statements only when they \
|
|
could.
|
|
|
|
struct expression* parse_application_expression(
|
|
struct tokenizer* tokenizer,
|
|
bool in_parentheses);
|
|
|
|
struct expression* parse_lambda_expression(
|
|
struct tokenizer* tokenizer,
|
|
bool in_parentheses)
|
|
{
|
|
ENTER;
|
|
|
|
assert(tokenizer->token->kind == tk_lambda);
|
|
|
|
tokenizer_next(tokenizer);
|
|
|
|
while (tokenizer->token->kind == tk_newline)
|
|
{
|
|
tokenizer_next(tokenizer);
|
|
}
|
|
|
|
if (tokenizer->token->kind != tk_identifier)
|
|
{
|
|
TODO;
|
|
exit(1);
|
|
}
|
|
|
|
struct {
|
|
struct string** data;
|
|
size_t n, cap;
|
|
} names = {};
|
|
|
|
while (tokenizer->token->kind == tk_identifier)
|
|
{
|
|
if (names.n == names.cap)
|
|
{
|
|
names.cap = names.cap << 1 ?: 1;
|
|
|
|
names.data = srealloc(names.data,
|
|
sizeof(*names.data) * names.cap);
|
|
}
|
|
|
|
names.data[names.n++] = inc_string(tokenizer->token->text);
|
|
|
|
tokenizer_next(tokenizer);
|
|
|
|
while (tokenizer->token->kind == tk_newline)
|
|
{
|
|
tokenizer_next(tokenizer);
|
|
}
|
|
|
|
if (tokenizer->token->kind == tk_comma)
|
|
{
|
|
tokenizer_next(tokenizer);
|
|
}
|
|
}
|
|
|
|
assert(names.n);
|
|
|
|
while (tokenizer->token->kind == tk_newline)
|
|
{
|
|
tokenizer_next(tokenizer);
|
|
}
|
|
|
|
if (true
|
|
&& tokenizer->token->kind != tk_colon
|
|
&& tokenizer->token->kind != tk_dot)
|
|
{
|
|
dpvu(tokenizer->token->kind);
|
|
|
|
TODO;
|
|
exit(1);
|
|
}
|
|
|
|
tokenizer_next(tokenizer);
|
|
|
|
struct expression* body = parse_application_expression(
|
|
tokenizer, in_parentheses);
|
|
|
|
struct expression* retval = inc_expression(body);
|
|
|
|
for (size_t i = names.n; i > 0; i--)
|
|
{
|
|
struct expression* prev = retval;
|
|
|
|
retval = new_lambda_expression(
|
|
/* variable name: */ names.data[i - 1],
|
|
/* body: */ prev);
|
|
|
|
free_expression(prev);
|
|
}
|
|
|
|
for (size_t i = 0; i < names.n; i++)
|
|
{
|
|
free_string(names.data[i]);
|
|
}
|
|
|
|
free_expression(body);
|
|
|
|
free(names.data);
|
|
|
|
EXIT;
|
|
return retval;
|
|
}
|
|
|
|
// def parse-primary():
|
|
// literal
|
|
// variable
|
|
// parenthesis:
|
|
// parse-application(in-parenthesis = true)
|
|
// lambda:
|
|
// eat zero or more newlines
|
|
// if error token:
|
|
// return error expression ("unknown token when reading \
|
|
lambda expression")
|
|
// while not colon:
|
|
// if error token:
|
|
// return error expression ("unknown token when reading \
|
|
lambda expression")
|
|
// expect identifier:
|
|
// return error expression ("unexpected token when reading \
|
|
lambda expression")
|
|
// add to variable list
|
|
// eat identifier
|
|
// eat zero or more newlines
|
|
// if error token:
|
|
// return error expression ("unknown token when \
|
|
reading lambda expression")
|
|
// expect comma
|
|
// return error expression ("unexpected token when reading \
|
|
lambda expression, expected comma or colon")
|
|
// eat comma
|
|
// eat colon
|
|
// eat zero or more newlines
|
|
// if error token:
|
|
// return error expression ("unknown token when reading \
|
|
lambda expression")
|
|
// parse-application
|
|
// tk_error:
|
|
// create error expression ("unknown token")
|
|
// EOF:
|
|
// create error expression ("unexpected EOF, expected expression")
|
|
// default:
|
|
// create error expression ("unexpected token")
|
|
|
|
struct expression* parse_primary_expression(
|
|
struct tokenizer* tokenizer,
|
|
bool in_parentheses)
|
|
{
|
|
struct expression* retval;
|
|
ENTER;
|
|
|
|
switch (tokenizer->token->kind)
|
|
{
|
|
case tk_newline:
|
|
{
|
|
TODO;
|
|
break;
|
|
}
|
|
|
|
case tk_literal:
|
|
{
|
|
struct number* literal = new_number();
|
|
|
|
number_set_str(
|
|
/* instance: */ literal,
|
|
/* literal string: */ tokenizer->token->text->data);
|
|
|
|
struct value* value = new_number_value(literal);
|
|
|
|
retval = new_literal_expression(value);
|
|
|
|
tokenizer_next(tokenizer);
|
|
|
|
free_value(value);
|
|
|
|
free_number(literal);
|
|
break;
|
|
}
|
|
|
|
case tk_identifier:
|
|
{
|
|
retval = new_variable_expression(
|
|
/* variable name: */ tokenizer->token->text);
|
|
|
|
tokenizer_next(tokenizer);
|
|
|
|
break;
|
|
}
|
|
|
|
case tk_oparen:
|
|
{
|
|
tokenizer_next(tokenizer);
|
|
|
|
struct expression* subexpression =
|
|
parse_application_expression(tokenizer,
|
|
/* in parenthesis: */ true);
|
|
|
|
switch (tokenizer->token->kind)
|
|
{
|
|
case tk_cparen:
|
|
retval = new_parenthesis_expression(subexpression);
|
|
break;
|
|
|
|
case tk_EOF:
|
|
{
|
|
retval = new_error_expression(
|
|
L"Unexpected EOF when reading parenthesis. "
|
|
L"Expecting close parenthesis.",
|
|
subexpression);
|
|
|
|
break;
|
|
}
|
|
|
|
case tk_semicolon:
|
|
TODO;
|
|
break;
|
|
|
|
case tk_error:
|
|
TODO;
|
|
break;
|
|
|
|
default:
|
|
TODO;
|
|
break;
|
|
}
|
|
|
|
tokenizer_next(tokenizer);
|
|
|
|
free_expression(subexpression);
|
|
|
|
break;
|
|
}
|
|
|
|
case tk_lambda:
|
|
{
|
|
retval = parse_lambda_expression(tokenizer, in_parentheses);
|
|
|
|
break;
|
|
}
|
|
|
|
case tk_error:
|
|
TODO;
|
|
break;
|
|
|
|
default:
|
|
TODO;
|
|
break;
|
|
}
|
|
|
|
EXIT;
|
|
return retval;
|
|
}
|
|
|
|
// # parse-application is greedy, and since it's called for parsing the \
|
|
# body of lambdas, and lambdas inside that lambda, etc. the shift/reduce \
|
|
# error will work out in our favor
|
|
|
|
// def parse-application(in-parenthesis = false):
|
|
// parse parse-primary
|
|
|
|
// # newlines can't stop us if we're in parenthesis:
|
|
// if in-parenthesis:
|
|
// while current token is newline:
|
|
// eat the newline
|
|
|
|
// match (current token):
|
|
// case (number, variable, oparen, lambda):
|
|
// parse that primary, connect the two with an 'application' \
|
|
expression
|
|
// continue as you can
|
|
|
|
// case (close paren, EOF, newline, semicolon):
|
|
// if in-parenthesis = false:
|
|
// create error expression
|
|
// otherwise:
|
|
// return what you have
|
|
|
|
// tk_error:
|
|
// create error expression ("unknown token")
|
|
// connect the two with an 'application' expression
|
|
// return what you have
|
|
|
|
struct expression* parse_application_expression(
|
|
struct tokenizer* tokenizer,
|
|
bool in_parentheses)
|
|
{
|
|
struct expression* retval;
|
|
ENTER;
|
|
|
|
while (tokenizer->token->kind == tk_newline)
|
|
{
|
|
tokenizer_next(tokenizer);
|
|
}
|
|
|
|
switch (tokenizer->token->kind)
|
|
{
|
|
case tk_identifier:
|
|
case tk_lambda:
|
|
case tk_literal:
|
|
case tk_oparen:
|
|
{
|
|
retval = parse_primary_expression(
|
|
/* tokenizer: */ tokenizer,
|
|
/* in parenthesis? */ in_parentheses);
|
|
break;
|
|
}
|
|
|
|
case tk_error:
|
|
TODO;
|
|
break;
|
|
|
|
case tk_EOF:
|
|
{
|
|
retval = new_error_expression(L"Unexpected EOF.", NULL);
|
|
|
|
break;
|
|
}
|
|
|
|
default:
|
|
TODO;
|
|
break;
|
|
}
|
|
|
|
// is the next thing a part of the application?
|
|
|
|
again: switch (tokenizer->token->kind)
|
|
{
|
|
case tk_identifier:
|
|
case tk_lambda:
|
|
case tk_literal:
|
|
case tk_oparen:
|
|
{
|
|
struct expression* left = retval;
|
|
|
|
struct expression* right = parse_primary_expression(
|
|
tokenizer, in_parentheses);
|
|
|
|
retval = new_application_expression(left, right);
|
|
|
|
free_expression(left);
|
|
|
|
free_expression(right);
|
|
|
|
goto again;
|
|
}
|
|
|
|
case tk_newline:
|
|
{
|
|
if (in_parentheses)
|
|
{
|
|
TODO;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case tk_semicolon:
|
|
case tk_EOF:
|
|
case tk_cparen:
|
|
{
|
|
break;
|
|
}
|
|
|
|
case tk_error:
|
|
{
|
|
TODO;
|
|
break;
|
|
}
|
|
|
|
default:
|
|
{
|
|
TODO;
|
|
break;
|
|
}
|
|
}
|
|
|
|
EXIT;
|
|
return retval;
|
|
}
|
|
|
|
struct statement* parse_statement(
|
|
struct tokenizer* tokenizer)
|
|
{
|
|
struct statement* retval;
|
|
ENTER;
|
|
|
|
switch (tokenizer->token->kind)
|
|
{
|
|
case tk_identifier:
|
|
{
|
|
struct token* token = inc_token(tokenizer->token);
|
|
|
|
tokenizer_next(tokenizer);
|
|
|
|
switch (tokenizer->token->kind)
|
|
{
|
|
case tk_colonequals:
|
|
{
|
|
TODO;
|
|
#if 0
|
|
tokenizer_next(tokenizer);
|
|
|
|
struct expression* expression =
|
|
parse_application_expression(
|
|
/* tokenizer: */ tokenizer,
|
|
/* in parenthesis: */ false);
|
|
|
|
retval = new_assignment_statement(
|
|
/* variable name: */ token->text,
|
|
/* expression: */ expression);
|
|
|
|
free_expression(expression);
|
|
#endif
|
|
|
|
break;
|
|
}
|
|
|
|
case tk_newline:
|
|
case tk_EOF:
|
|
case tk_semicolon:
|
|
{
|
|
struct expression* expression = new_variable_expression(
|
|
/* variable name: */ token->text);
|
|
|
|
retval = new_expression_statement(expression);
|
|
|
|
free_expression(expression);
|
|
|
|
break;
|
|
}
|
|
|
|
case tk_oparen:
|
|
case tk_identifier:
|
|
case tk_literal:
|
|
case tk_lambda:
|
|
{
|
|
tokenizer_put_back(tokenizer, token);
|
|
|
|
struct expression* exp = parse_application_expression(
|
|
/* tokenizer: */ tokenizer,
|
|
/* in parenthesis? */ false);
|
|
|
|
retval = new_expression_statement(
|
|
/* expression: */ exp);
|
|
|
|
free_expression(exp);
|
|
break;
|
|
}
|
|
|
|
case tk_error:
|
|
TODO;
|
|
// create error statement ("unknown token",
|
|
// stragglers = [first token])
|
|
// return what you have
|
|
break;
|
|
|
|
default:
|
|
TODO;
|
|
break;
|
|
}
|
|
|
|
free_token(token);
|
|
|
|
break;
|
|
}
|
|
|
|
case tk_lambda:
|
|
case tk_literal:
|
|
case tk_oparen:
|
|
{
|
|
// call parse-application
|
|
struct expression* exp = parse_application_expression(
|
|
/* tokenizer: */ tokenizer,
|
|
/* in parenthesis? */ false);
|
|
|
|
retval = new_expression_statement(
|
|
/* expression: */ exp);
|
|
|
|
free_expression(exp);
|
|
break;
|
|
}
|
|
|
|
case tk_semicolon:
|
|
TODO;
|
|
break;
|
|
|
|
case tk_cparen:
|
|
{
|
|
retval = new_error_statement(
|
|
L"Unexpected close parenthesis when reading statement", NULL);
|
|
|
|
break;
|
|
}
|
|
|
|
case tk_error:
|
|
TODO;
|
|
// create error statement ("unknown token")
|
|
// return what you have
|
|
break;
|
|
|
|
default:
|
|
TODO;
|
|
// create error statement ("unexpected token")
|
|
// return what you have
|
|
break;
|
|
}
|
|
|
|
EXIT;
|
|
return retval;
|
|
}
|
|
|
|
struct statement* parse_statements(
|
|
struct tokenizer* tokenizer)
|
|
{
|
|
ENTER;
|
|
|
|
assert(tokenizer->token->kind != tk_newline);
|
|
assert(tokenizer->token->kind != tk_EOF);
|
|
|
|
struct statement* left = NULL;
|
|
|
|
while (true
|
|
&& tokenizer->token->kind != tk_newline
|
|
&& tokenizer->token->kind != tk_EOF)
|
|
{
|
|
struct statement* right = parse_statement(tokenizer);
|
|
|
|
if (left)
|
|
{
|
|
struct statement* oldleft = left;
|
|
|
|
left = new_substatements_statement(left, right);
|
|
|
|
free_statement(oldleft);
|
|
}
|
|
else
|
|
{
|
|
left = inc_statement(right);
|
|
}
|
|
|
|
while (tokenizer->token->kind == tk_cparen)
|
|
{
|
|
struct statement* oldleft = left;
|
|
|
|
left = new_error_statement(L"Extra close parenthesis.", left);
|
|
|
|
free_statement(oldleft);
|
|
|
|
tokenizer_next(tokenizer);
|
|
}
|
|
|
|
while (tokenizer->token->kind == tk_semicolon)
|
|
{
|
|
tokenizer_next(tokenizer);
|
|
}
|
|
|
|
free_statement(right);
|
|
}
|
|
|
|
if (tokenizer->token->kind == tk_newline)
|
|
{
|
|
tokenizer_next(tokenizer);
|
|
}
|
|
|
|
assert(left);
|
|
|
|
EXIT;
|
|
return left;
|
|
}
|
|
|
|
struct statement* parse(
|
|
struct tokenizer* tokenizer)
|
|
{
|
|
ENTER;
|
|
|
|
struct statement* root = parse_statements(tokenizer);
|
|
|
|
EXIT;
|
|
return root;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|