namespace Parser { String* standard_in; String* parser_file; int parser_line; int parser_col; proc eat_comment_line(char* text, int* index_in_text) -> void { // safety check if we are actually starting a comment here if (text[*index_in_text] != ';') return; // eat the comment line do { ++(*index_in_text); ++parser_col; } while (text[(*index_in_text)] != '\n' && text[(*index_in_text)] != '\r' && text[(*index_in_text)] != '\0'); } proc step_char(char* text, int* index_in_text, int steps = 1) { for (int i = 0; i < steps; ++i) { if (text[(*index_in_text)] == '\n') { ++parser_line; parser_col = 0; } ++parser_col; ++(*index_in_text); } } proc eat_whitespace(char* text, int* index_in_text) -> void { // skip whitespaces while (text[(*index_in_text)] == ' ' || text[(*index_in_text)] == '\t' || text[(*index_in_text)] == '\n' || text[(*index_in_text)] == '\r') { step_char(text, index_in_text); } } proc eat_until_code(char* text, int* index_in_text) -> void { profile_this(); int position_before; do { position_before = *index_in_text; eat_comment_line(text, index_in_text); eat_whitespace(text, index_in_text); } while (position_before != *index_in_text); } proc step_char_and_eat_until_code(char* text, int* index_in_text) { step_char(text, index_in_text); eat_until_code(text, index_in_text); } proc parse_fancy_delimiter(char* text, int* index_in_text, char l_delimiter, char r_delimiter, Lisp_Object* first_elem) -> Lisp_Object* { profile_this(); if (text[*index_in_text] != l_delimiter) { create_parsing_error("a fancy cannot be parsed here"); return nullptr; } Lisp_Object* ret; Lisp_Object* head; try ret = Memory::create_lisp_object_pair(first_elem, Memory::nil); head = ret; step_char(text, index_in_text); eat_until_code(text, index_in_text); while (text[*index_in_text] != r_delimiter) { Lisp_Object* element; try element = parse_expression(text, index_in_text); try head->value.pair.rest = Memory::create_lisp_object_pair(element, Memory::nil); head = head->value.pair.rest; eat_until_code(text, index_in_text); } step_char(text, index_in_text); return ret; } proc get_atom_text_length(char* text, int* index_in_text) -> int { int atom_length = 0; while (text[*index_in_text+atom_length] != ' ' && text[*index_in_text+atom_length] != ')' && text[*index_in_text+atom_length] != '(' && text[*index_in_text+atom_length] != '[' && text[*index_in_text+atom_length] != ']' && text[*index_in_text+atom_length] != '{' && text[*index_in_text+atom_length] != '}' && text[*index_in_text+atom_length] != '\0' && text[*index_in_text+atom_length] != '\n' && text[*index_in_text+atom_length] != '\r' && text[*index_in_text+atom_length] != '\t') { ++atom_length; } return atom_length; } proc parse_number(char* text, int* index_in_text) -> Lisp_Object* { Lisp_Object* ret; try ret = Memory::create_lisp_object(0.0); sscanf(text+*index_in_text, "%lf", &ret->value.number); int atom_length = get_atom_text_length(text, index_in_text); step_char(text, index_in_text, atom_length); return ret; } proc parse_symbol_or_keyword(char* text, int* index_in_text) -> Lisp_Object* { bool keyword = false; if (text[*index_in_text] == ':') { keyword = true; step_char(text, index_in_text); } int atom_length = get_atom_text_length(text, index_in_text); char orig = text[*index_in_text+atom_length]; text[*index_in_text+atom_length] = '\0'; String* str_keyword; Lisp_Object* ret; try str_keyword = Memory::create_string("", atom_length); strcpy(&str_keyword->data, text+*index_in_text); if (keyword) { try ret = Memory::get_keyword(str_keyword); } else { try ret = Memory::get_symbol(str_keyword); } text[*index_in_text+atom_length] = orig; step_char(text, index_in_text, atom_length); return ret; } proc parse_string(char* text, int* index_in_text) -> Lisp_Object* { // the first character is the '"' step_char(text, index_in_text); // now we are at the first letter, if this is the closing '"' then // it's easy if (text[*index_in_text] == '"') { Lisp_Object* ret; try ret = Memory::create_lisp_object(Memory::create_string("", 0)); // inject_scl(ret); // plus one because we want to go after the quotes step_char(text, index_in_text); return ret; } // okay so the first letter was not actually closing the string... int string_length = 0; bool escaping = false; while (escaping || text[*index_in_text+string_length] != '"') { if (escaping) { escaping = false; } else if (text[*index_in_text+string_length] == '\\') escaping = true; ++string_length; } // we found the end of the string text[*index_in_text+string_length] = '\0'; // NOTE(Felix): Tactic: Through unescaping the string will // only get shorter, so we replace it inplace and later jump // to the original end of the string. int new_len; try new_len = unescape_string(text+(*index_in_text)); String* string = Memory::create_string("", new_len); strcpy(&string->data, text+(*index_in_text)); // printf("------ %s\n", &string->data); text[*index_in_text+string_length] = '"'; // plus one because we want to go after the quotes step_char(text, index_in_text, string_length+1); Lisp_Object* ret; try ret = Memory::create_lisp_object(string); // inject_scl(ret); return ret; } proc parse_atom(char* text, int* index_in_text) -> Lisp_Object* { profile_this(); Lisp_Object* ret; // numbers if ((text[*index_in_text] <= 57 && // if number text[*index_in_text] >= 48) || ((text[*index_in_text] == '+' || // or if sign and then number text[*index_in_text] == '-') && (text[*index_in_text +1] <= 57 && text[*index_in_text +1] >= 48)) || ((text[*index_in_text] == '.') // or if . and then number && (text[*index_in_text +1] <= 57 && text[*index_in_text +1] >= 48))) { try ret = parse_number(text, index_in_text); } else if (text[*index_in_text] == '"') try ret = parse_string(text, index_in_text); else try ret = parse_symbol_or_keyword(text, index_in_text); return ret; } proc parse_list(char* text, int* index_in_text) -> Lisp_Object* { profile_this(); if (text[*index_in_text] != '(') { create_parsing_error("a list cannot be parsed here"); return nullptr; } step_char_and_eat_until_code(text, index_in_text); if (text[*index_in_text] == ')') { step_char(text, index_in_text); return Memory::nil; } Lisp_Object* first_elem; Lisp_Object* ret; Lisp_Object* head; try first_elem = parse_expression(text, index_in_text); try ret = Memory::create_lisp_object_pair(first_elem, Memory::nil); head = ret; eat_until_code(text, index_in_text); while (text[*index_in_text] != ')') { Lisp_Object* element; if (text[*index_in_text+0] == '.' && text[*index_in_text+1] == ' ') { step_char(text, index_in_text, 2); try element = parse_expression(text, index_in_text); head->value.pair.rest = element; eat_until_code(text, index_in_text); if (text[*index_in_text] != ')') { create_parsing_error("expected the list to end after the dotted end."); return nullptr; } step_char(text, index_in_text); return ret; } try element = parse_expression(text, index_in_text); try head->value.pair.rest = Memory::create_lisp_object_pair(element, Memory::nil); head = head->value.pair.rest; eat_until_code(text, index_in_text); } step_char(text, index_in_text); return ret; } proc maybe_expand_short_form(char* text, int* index_in_text) -> Lisp_Object* { profile_this(); Lisp_Object* vector_sym = Memory::get_symbol("vector"); Lisp_Object* hash_map_sym = Memory::get_symbol("hash-map"); Lisp_Object* quote_sym = Memory::get_symbol("quote"); Lisp_Object* quasiquote_sym = Memory::get_symbol("quasiquote"); Lisp_Object* unquote_sym = Memory::get_symbol("unquote"); Lisp_Object* unquote_splicing_sym = Memory::get_symbol("unquote-splicing"); Lisp_Object* ret = nullptr; Lisp_Object* expr; switch (text[*index_in_text]) { case '\'': { // quote step_char_and_eat_until_code(text, index_in_text); try expr = parse_expression(text, index_in_text); try ret = Memory::create_lisp_object_pair(expr, Memory::nil); try ret = Memory::create_lisp_object_pair(quote_sym, ret); } break; case '`': { // quasiquote step_char_and_eat_until_code(text, index_in_text); try expr = parse_expression(text, index_in_text); try ret = Memory::create_lisp_object_pair(expr, Memory::nil); try ret = Memory::create_lisp_object_pair(quasiquote_sym, ret); } break; case ',': { step_char_and_eat_until_code(text, index_in_text); if (text[*index_in_text] == '@') { // unquote-splicing step_char_and_eat_until_code(text, index_in_text); try expr = parse_expression(text, index_in_text); try ret = Memory::create_lisp_object_pair(expr, Memory::nil); try ret = Memory::create_lisp_object_pair(unquote_splicing_sym, ret); } else { // unquote try expr = parse_expression(text, index_in_text); try ret = Memory::create_lisp_object_pair(expr, Memory::nil); try ret = Memory::create_lisp_object_pair(unquote_sym, ret); } } break; case '[': { // vector try ret = parse_fancy_delimiter(text, index_in_text, '[', ']', vector_sym); } break; case '{': { // hashmap try ret = parse_fancy_delimiter(text, index_in_text, '{', '}', hash_map_sym); } break; default: break; } return ret; } proc parse_expression(char* text, int* index_in_text) -> Lisp_Object* { profile_this(); Lisp_Object* ret; eat_until_code(text, index_in_text); try ret = maybe_expand_short_form(text, index_in_text); if (ret) return ret; if (text[*index_in_text] == '(') { try ret = parse_list(text, index_in_text); } else { try ret = parse_atom(text, index_in_text); } return ret; } proc parse_single_expression(wchar_t* text) -> Lisp_Object* { char* res = wchar_to_char(text); defer {free(res);}; return parse_single_expression(res); } proc parse_single_expression(char* text) -> Lisp_Object* { parser_file = standard_in; parser_line = 1; parser_col = 1; int index_in_text = 0; Lisp_Object* ret; try ret = parse_expression(text, &index_in_text); return ret; } proc parse_program(String* file_name, char* text) -> Array_List* { profile_this(); parser_file = file_name; parser_line = 1; parser_col = 0; Array_List* program = new Array_List; int index_in_text = 0; Lisp_Object* parsed; eat_until_code(text, &index_in_text); while (text[index_in_text] != '\0') { try parsed = parse_expression(text, &index_in_text); program->append(parsed); eat_until_code(text, &index_in_text); } return program; } }