|
|
|
@@ -1,5 +1,8 @@ |
|
|
|
namespace Parser { |
|
|
|
|
|
|
|
String* standard_in; |
|
|
|
String* parser_file; |
|
|
|
int parser_line; |
|
|
|
int parser_col; |
|
|
|
|
|
|
|
proc eat_comment_line(char* text, int* index_in_text) -> void { |
|
|
|
// safety check if we are actually starting a comment here |
|
|
|
@@ -15,6 +18,17 @@ namespace Parser { |
|
|
|
text[(*index_in_text)] != '\0'); |
|
|
|
} |
|
|
|
|
|
|
|
proc step_char(char* text, int* index_in_text, int steps = 1) { |
|
|
|
for (int i = 0; i < steps; ++i) { |
|
|
|
if (text[(*index_in_text)] == '\n') { |
|
|
|
++parser_line; |
|
|
|
parser_col = 0; |
|
|
|
} |
|
|
|
++parser_col; |
|
|
|
++(*index_in_text); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
proc eat_whitespace(char* text, int* index_in_text) -> void { |
|
|
|
// skip whitespaces |
|
|
|
while (text[(*index_in_text)] == ' ' || |
|
|
|
@@ -22,12 +36,7 @@ namespace Parser { |
|
|
|
text[(*index_in_text)] == '\n' || |
|
|
|
text[(*index_in_text)] == '\r') |
|
|
|
{ |
|
|
|
if (text[(*index_in_text)] == '\n') { |
|
|
|
++parser_line; |
|
|
|
parser_col = 0; |
|
|
|
} |
|
|
|
++parser_col; |
|
|
|
++(*index_in_text); |
|
|
|
step_char(text, index_in_text); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
@@ -40,13 +49,8 @@ namespace Parser { |
|
|
|
} while (position_before != *index_in_text); |
|
|
|
} |
|
|
|
|
|
|
|
proc step_char(int* index_in_text) { |
|
|
|
++(*index_in_text); |
|
|
|
++parser_col; |
|
|
|
} |
|
|
|
|
|
|
|
proc step_char_and_eat_until_code(char* text, int* index_in_text) { |
|
|
|
step_char(index_in_text); |
|
|
|
step_char(text, index_in_text); |
|
|
|
eat_until_code(text, index_in_text); |
|
|
|
} |
|
|
|
|
|
|
|
@@ -56,57 +60,225 @@ namespace Parser { |
|
|
|
return nullptr; |
|
|
|
} |
|
|
|
|
|
|
|
Lisp_Objcet* ret; |
|
|
|
Lisp_Objcet* head; |
|
|
|
Lisp_Object* ret; |
|
|
|
Lisp_Object* head; |
|
|
|
try ret = Memory::create_lisp_object_pair(first_elem, Memory::nil); |
|
|
|
head = ret; |
|
|
|
|
|
|
|
step_char(index_in_text); |
|
|
|
step_char(text, index_in_text); |
|
|
|
|
|
|
|
eat_until_code(text, index_in_text); |
|
|
|
while (text[*index_in_text] != r_delimiter) { |
|
|
|
eat_until_code(text, index_in_text); |
|
|
|
Lisp_Object* element; |
|
|
|
try element = parse_expression(text, index_in_text); |
|
|
|
try head.value.pair.rest = Memory::create_lisp_object_pair(element, Memory::nil); |
|
|
|
head = head.value.pair.rest; |
|
|
|
try head->value.pair.rest = Memory::create_lisp_object_pair(element, Memory::nil); |
|
|
|
head = head->value.pair.rest; |
|
|
|
eat_until_code(text, index_in_text); |
|
|
|
} |
|
|
|
|
|
|
|
step_char(text, index_in_text); |
|
|
|
|
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
proc get_atom_text_length(char* text, int* index_in_text) -> int { |
|
|
|
int atom_length = 0; |
|
|
|
while (text[*index_in_text+atom_length] != ' ' && |
|
|
|
text[*index_in_text+atom_length] != ')' && |
|
|
|
text[*index_in_text+atom_length] != '(' && |
|
|
|
text[*index_in_text+atom_length] != '[' && |
|
|
|
text[*index_in_text+atom_length] != ']' && |
|
|
|
text[*index_in_text+atom_length] != '{' && |
|
|
|
text[*index_in_text+atom_length] != '}' && |
|
|
|
text[*index_in_text+atom_length] != '\0' && |
|
|
|
text[*index_in_text+atom_length] != '\n' && |
|
|
|
text[*index_in_text+atom_length] != '\r' && |
|
|
|
text[*index_in_text+atom_length] != '\t') |
|
|
|
{ |
|
|
|
++atom_length; |
|
|
|
} |
|
|
|
return atom_length; |
|
|
|
} |
|
|
|
|
|
|
|
proc parse_number(char* text, int* index_in_text) -> Lisp_Object* { |
|
|
|
Lisp_Object* ret; |
|
|
|
try ret = Memory::create_lisp_object_number(0); |
|
|
|
|
|
|
|
sscanf(text+*index_in_text, "%lf", &ret->value.number); |
|
|
|
|
|
|
|
int atom_length = get_atom_text_length(text, index_in_text); |
|
|
|
step_char(text, index_in_text, atom_length); |
|
|
|
|
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
proc parse_symbol_or_keyword(char* text, int* index_in_text) -> Lisp_Object* { |
|
|
|
bool keyword = false; |
|
|
|
if (text[*index_in_text] == ':') { |
|
|
|
keyword = true; |
|
|
|
step_char(text, index_in_text); |
|
|
|
} |
|
|
|
|
|
|
|
int atom_length = get_atom_text_length(text, index_in_text); |
|
|
|
char orig = text[*index_in_text+atom_length]; |
|
|
|
text[*index_in_text+atom_length] = '\0'; |
|
|
|
|
|
|
|
|
|
|
|
String* str_keyword; |
|
|
|
Lisp_Object* ret; |
|
|
|
try str_keyword = Memory::create_string("", atom_length); |
|
|
|
strcpy(&str_keyword->data, text+*index_in_text); |
|
|
|
|
|
|
|
if (keyword) { |
|
|
|
try ret = Memory::get_or_create_lisp_object_keyword(str_keyword); |
|
|
|
} else { |
|
|
|
try ret = Memory::get_or_create_lisp_object_symbol(str_keyword); |
|
|
|
} |
|
|
|
|
|
|
|
++*index_in_text; |
|
|
|
|
|
|
|
text[*index_in_text+atom_length] = orig; |
|
|
|
step_char(text, index_in_text, atom_length); |
|
|
|
|
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
proc parse_string(char* text, int* index_in_text) -> Lisp_Object* { |
|
|
|
// the first character is the '"' |
|
|
|
step_char(text, index_in_text); |
|
|
|
|
|
|
|
// now we are at the first letter, if this is the closing '"' then |
|
|
|
// it's easy |
|
|
|
if (text[*index_in_text] == '"') { |
|
|
|
Lisp_Object* ret; |
|
|
|
try ret = Memory::create_lisp_object_string( |
|
|
|
Memory::create_string("", 0)); |
|
|
|
// inject_scl(ret); |
|
|
|
|
|
|
|
// plus one because we want to go after the quotes |
|
|
|
step_char(text, index_in_text); |
|
|
|
|
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
// okay so the first letter was not actually closing the string... |
|
|
|
int string_length = 0; |
|
|
|
bool escaping = false; |
|
|
|
while (escaping || text[*index_in_text+string_length] != '"') { |
|
|
|
if (escaping) { |
|
|
|
escaping = false; |
|
|
|
} |
|
|
|
else |
|
|
|
if (text[*index_in_text+string_length] == '\\') |
|
|
|
escaping = true; |
|
|
|
|
|
|
|
++string_length; |
|
|
|
} |
|
|
|
|
|
|
|
// we found the end of the string |
|
|
|
text[*index_in_text+string_length] = '\0'; |
|
|
|
|
|
|
|
// NOTE(Felix): Tactic: Through unescaping the string will |
|
|
|
// only get shorter, so we replace it inplace and later jump |
|
|
|
// to the original end of the string. |
|
|
|
int new_len; |
|
|
|
try new_len = unescape_string(text+(*index_in_text)); |
|
|
|
|
|
|
|
String* string = Memory::create_string("", new_len); |
|
|
|
|
|
|
|
strcpy(&string->data, text+(*index_in_text)); |
|
|
|
// printf("------ %s\n", &string->data); |
|
|
|
|
|
|
|
text[*index_in_text+string_length] = '"'; |
|
|
|
|
|
|
|
// plus one because we want to go after the quotes |
|
|
|
step_char(text, index_in_text, string_length+1); |
|
|
|
|
|
|
|
Lisp_Object* ret; |
|
|
|
try ret = Memory::create_lisp_object_string(string); |
|
|
|
|
|
|
|
// inject_scl(ret); |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
proc parse_atom(char* text, int* index_in_text) -> Lisp_Object* { |
|
|
|
Lisp_Object* ret; |
|
|
|
// numbers |
|
|
|
if ((text[*index_in_text] <= 57 && // if number |
|
|
|
text[*index_in_text] >= 48) |
|
|
|
|| |
|
|
|
((text[*index_in_text] == '+' || // or if sign and then number |
|
|
|
text[*index_in_text] == '-') |
|
|
|
&& |
|
|
|
(text[*index_in_text +1] <= 57 && |
|
|
|
text[*index_in_text +1] >= 48)) |
|
|
|
|| |
|
|
|
((text[*index_in_text] == '.') // or if . and then number |
|
|
|
&& |
|
|
|
(text[*index_in_text +1] <= 57 && |
|
|
|
text[*index_in_text +1] >= 48))) |
|
|
|
{ |
|
|
|
try ret = parse_number(text, index_in_text); |
|
|
|
} |
|
|
|
|
|
|
|
else if (text[*index_in_text] == '"') |
|
|
|
try ret = parse_string(text, index_in_text); |
|
|
|
else |
|
|
|
try ret = parse_symbol_or_keyword(text, index_in_text); |
|
|
|
|
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
proc parse_list(char* text, int* index_in_text) -> Lisp_Object* { |
|
|
|
|
|
|
|
if (text[*index_in_text] != '(') { |
|
|
|
create_parsing_error("a list cannot be parsed here"); |
|
|
|
return nullptr; |
|
|
|
} |
|
|
|
step_char_and_eat_until_code(); |
|
|
|
step_char_and_eat_until_code(text, index_in_text); |
|
|
|
|
|
|
|
if (text[*index_in_text] == ')') { |
|
|
|
return meory::nil; |
|
|
|
step_char(text, index_in_text); |
|
|
|
return Memory::nil; |
|
|
|
} |
|
|
|
|
|
|
|
Lisp_Object* first_elem; |
|
|
|
Lisp_Objcet* ret; |
|
|
|
Lisp_Objcet* head; |
|
|
|
Lisp_Object* ret; |
|
|
|
Lisp_Object* head; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try first_elem = parse_epression(text, index_in_text); |
|
|
|
try first_elem = parse_expression(text, index_in_text); |
|
|
|
try ret = Memory::create_lisp_object_pair(first_elem, Memory::nil); |
|
|
|
head = ret; |
|
|
|
|
|
|
|
while (text[*index_in_text] != r_delimiter) { |
|
|
|
eat_until_code(text, index_in_text); |
|
|
|
eat_until_code(text, index_in_text); |
|
|
|
while (text[*index_in_text] != ')') { |
|
|
|
Lisp_Object* element; |
|
|
|
try element = parse_expression(text, index_in_text); |
|
|
|
try head.value.pair.rest = Memory::create_lisp_object_pair(element, Memory::nil); |
|
|
|
head = head.value.pair.rest; |
|
|
|
} |
|
|
|
|
|
|
|
if (text[*index_in_text+0] == '.' && |
|
|
|
text[*index_in_text+1] == ' ') |
|
|
|
{ |
|
|
|
step_char(text, index_in_text, 2); |
|
|
|
try element = parse_expression(text, index_in_text); |
|
|
|
head->value.pair.rest = element; |
|
|
|
|
|
|
|
eat_until_code(text, index_in_text); |
|
|
|
if (text[*index_in_text] != ')') { |
|
|
|
create_parsing_error("expected the list to end after the dotted end."); |
|
|
|
return nullptr; |
|
|
|
} |
|
|
|
step_char(text, index_in_text); |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
try element = parse_expression(text, index_in_text); |
|
|
|
try head->value.pair.rest = Memory::create_lisp_object_pair(element, Memory::nil); |
|
|
|
head = head->value.pair.rest; |
|
|
|
eat_until_code(text, index_in_text); |
|
|
|
} |
|
|
|
step_char(text, index_in_text); |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
proc maybe_expand_short_form(char* text, int* index_in_text) -> Lisp_Object* { |
|
|
|
@@ -118,34 +290,37 @@ namespace Parser { |
|
|
|
Lisp_Object* unquote_sym = Memory::get_or_create_lisp_object_symbol("unquote"); |
|
|
|
Lisp_Object* unquote_splicing_sym = Memory::get_or_create_lisp_object_symbol("unquote-splicing"); |
|
|
|
|
|
|
|
Lisp_Object* ret; |
|
|
|
Lisp_Object* ret = nullptr; |
|
|
|
Lisp_Object* expr; |
|
|
|
|
|
|
|
switch (text[*index_in_text]) { |
|
|
|
case '\'': { |
|
|
|
// quote |
|
|
|
step_char_and_eat_until_code(text, index_in_text); |
|
|
|
expr = parse_expresion(text, index_in_text); |
|
|
|
try ret = Memory::create_lisp_object_pair(quote_sym, expr); |
|
|
|
try expr = parse_expression(text, index_in_text); |
|
|
|
try ret = Memory::create_lisp_object_pair(expr, Memory::nil); |
|
|
|
try ret = Memory::create_lisp_object_pair(quote_sym, ret); |
|
|
|
} break; |
|
|
|
case '`': { |
|
|
|
// quasiquote |
|
|
|
step_char_and_eat_until_code(text, index_in_text); |
|
|
|
expr = parse_expresion(text, index_in_text); |
|
|
|
try ret = Memory::create_lisp_object_pair(quasiquote_sym, expr); |
|
|
|
try expr = parse_expression(text, index_in_text); |
|
|
|
try ret = Memory::create_lisp_object_pair(expr, Memory::nil); |
|
|
|
try ret = Memory::create_lisp_object_pair(quasiquote_sym, ret); |
|
|
|
} break; |
|
|
|
case ',': { |
|
|
|
if (text[*index_in_text+1] == '@') { |
|
|
|
step_char_and_eat_until_code(text, index_in_text); |
|
|
|
if (text[*index_in_text] == '@') { |
|
|
|
// unquote-splicing |
|
|
|
step_char(text, index_in_text); |
|
|
|
step_char_and_eat_until_code(itext, index_in_text); |
|
|
|
expr = parse_expresion(text, index_in_text); |
|
|
|
try ret = Memory::create_lisp_object_pair(unquote_splicing_sym, expr); |
|
|
|
step_char_and_eat_until_code(text, index_in_text); |
|
|
|
try expr = parse_expression(text, index_in_text); |
|
|
|
try ret = Memory::create_lisp_object_pair(expr, Memory::nil); |
|
|
|
try ret = Memory::create_lisp_object_pair(unquote_splicing_sym, ret); |
|
|
|
} else { |
|
|
|
// unquote |
|
|
|
expr = parse_expresion(text, index_in_text); |
|
|
|
try ret = Memory::create_lisp_object_pair(unquote_sym, expr); |
|
|
|
step_char_and_eat_until_code(text, index_in_text); |
|
|
|
try expr = parse_expression(text, index_in_text); |
|
|
|
try ret = Memory::create_lisp_object_pair(expr, Memory::nil); |
|
|
|
try ret = Memory::create_lisp_object_pair(unquote_sym, ret); |
|
|
|
} |
|
|
|
} break; |
|
|
|
case '[': { |
|
|
|
@@ -155,7 +330,6 @@ namespace Parser { |
|
|
|
case '{': { |
|
|
|
// hashmap |
|
|
|
try ret = parse_fancy_delimiter(text, index_in_text, '{', '}', hash_map_sym); |
|
|
|
try parse_hash_map(text, index_in_text); |
|
|
|
} break; |
|
|
|
default: break; |
|
|
|
} |
|
|
|
@@ -165,13 +339,50 @@ namespace Parser { |
|
|
|
|
|
|
|
proc parse_expression(char* text, int* index_in_text) -> Lisp_Object* { |
|
|
|
Lisp_Object* ret; |
|
|
|
eat_until_code(text, index_in_text); |
|
|
|
try ret = maybe_expand_short_form(text, index_in_text); |
|
|
|
if (ret) |
|
|
|
return ret; |
|
|
|
|
|
|
|
if (text[*index_in_text] == '(') { |
|
|
|
try ret = parse_list(text, index_in_text); |
|
|
|
} else { |
|
|
|
try ret = parse_atom(text, index_in_text); |
|
|
|
} |
|
|
|
|
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
proc parse_single_expression(char* text) -> Lisp_Object* { |
|
|
|
parser_file = standard_in; |
|
|
|
parser_line = 1; |
|
|
|
parser_col = 1; |
|
|
|
|
|
|
|
int index_in_text = 0; |
|
|
|
Lisp_Object* ret; |
|
|
|
try ret = parse_expression(text, &index_in_text); |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
proc parse_program(String* file_name, char* text) -> Array_List<Lisp_Object*>* { |
|
|
|
parser_file = file_name; |
|
|
|
parser_line = 1; |
|
|
|
parser_col = 0; |
|
|
|
|
|
|
|
Array_List<Lisp_Object*>* program = new Array_List<Lisp_Object*>; |
|
|
|
|
|
|
|
int index_in_text = 0; |
|
|
|
Lisp_Object* parsed; |
|
|
|
|
|
|
|
eat_until_code(text, &index_in_text); |
|
|
|
while (text[index_in_text] != '\0') { |
|
|
|
try parsed = parse_expression(text, &index_in_text); |
|
|
|
program->append(parsed); |
|
|
|
eat_until_code(text, &index_in_text); |
|
|
|
} |
|
|
|
return program; |
|
|
|
} |
|
|
|
|
|
|
|
} |