char* parser_file; int parser_line; int parser_col; #define inject_scl(_ret) \ ret->sourceCodeLocation = new(Source_Code_Location); \ ret->sourceCodeLocation->file = parser_file; \ ret->sourceCodeLocation->line = parser_line; \ ret->sourceCodeLocation->column = parser_col // TODO(Felix): use the array list macro here? Ast_Node_Array_List* create_Ast_Node_Array_List(int initial_length) { Ast_Node_Array_List* ret = new (Ast_Node_Array_List); // create one with 16 entries first ret->length = initial_length; ret->data = (struct Ast_Node**)malloc(initial_length * sizeof(struct Ast_Node)); ret->next_index = 0; return ret; } void append_to_Ast_Node_Array_List(Ast_Node_Array_List* list, struct Ast_Node* node) { if (list->next_index == list->length) { list->length *= 2; list->data = (struct Ast_Node**)realloc(list->data, list->length * sizeof(struct Ast_Node)); } list->data[list->next_index++] = node; } void eat_comment_line(char* text, int* index_in_text) { // safety check if we are actually starting a comment here if (text[*index_in_text] != ';') return; // eat the comment line do { ++(*index_in_text); ++parser_col; } while (text[(*index_in_text)] != '\n' && text[(*index_in_text)] != '\r' && text[(*index_in_text)] != '\0'); } void eat_whitespace(char* text, int* index_in_text) { // skip whitespaces while (text[(*index_in_text)] == ' ' || text[(*index_in_text)] == '\t' || text[(*index_in_text)] == '\n' || text[(*index_in_text)] == '\r') { if (text[(*index_in_text)] == '\n') { ++parser_line; parser_col = 0; } ++parser_col; ++(*index_in_text); } } void eat_until_code(char* text, int* index_in_text) { int position_before; do { position_before = *index_in_text; eat_comment_line(text, index_in_text); eat_whitespace(text, index_in_text); } while (position_before != *index_in_text); } char* read_atom(char* text, int* index_in_text) { int atom_length = 0; while (text[*index_in_text+atom_length] != ' ' && text[*index_in_text+atom_length] != ')' && text[*index_in_text+atom_length] != '(' && text[*index_in_text+atom_length] != '\0' && text[*index_in_text+atom_length] != '\n' && text[*index_in_text+atom_length] != '\r' && text[*index_in_text+atom_length] != '\t') { ++atom_length; } // let's mark the end of the atom there quickly, so the string can // be copied from there easily and then put the char that was // before there back char before = text[*index_in_text+atom_length]; text[*index_in_text+atom_length] = '\0'; // get the atom char* atom = (char*)malloc(atom_length*sizeof(char)+1); // plus null char strcpy(atom, text+(*index_in_text)); // restore the original string text[*index_in_text+atom_length] = before; // update the index to point to the character after the atom // ended *index_in_text += atom_length; return atom; } Ast_Node* parse_number(char* text, int* index_in_text) { double number; char* str_number = read_atom(text, index_in_text); sscanf(str_number, "%lf", &number); Ast_Node* ret = create_ast_node_number(number); inject_scl(ret); return ret; } Ast_Node* parse_keyword(char* text, int* index_in_text) { // we are now on the colon ++(*index_in_text); ++parser_col; char* str_keyword = read_atom(text, index_in_text); Ast_Node* ret = create_ast_node_keyword(str_keyword); inject_scl(ret); return ret; } Ast_Node* parse_symbol(char* text, int* index_in_text) { // we are now at the first char of the symbol char* str_symbol = read_atom(text, index_in_text); Ast_Node* ret = create_ast_node_symbol(str_symbol); inject_scl(ret); return ret; } Ast_Node* parse_string(char* text, int* index_in_text) { // the first character is the '"' ++(*index_in_text); ++parser_col; // now we are at the first letter, if this is the closing '"' then // it's easy if (text[*index_in_text] == '"') { char* str = new(char); *str = '\0'; Ast_Node* ret = create_ast_node_string(str, 0); inject_scl(ret); return ret; } // okay so the first letter was not actually closing the string... int string_length = 0; while (text[*index_in_text+string_length] != '"' || text[*index_in_text+string_length] == '\\') { ++string_length; } // we found the end of the string text[*index_in_text+string_length] = '\0'; char* string = (char*)malloc(string_length*sizeof(char)+1); // plus null char if (!unescape_string(text+(*index_in_text))) { create_error( Error_Type_Unknown_Error, create_source_code_location(parser_file, parser_line, parser_col)); return nullptr; } strcpy(string, text+(*index_in_text)); /* manually copy to parse control sequences correctly */ /* int temp_index = 0; */ /* while (text+(temp_index+(*index_in_text)) != '\0') { */ /* string[temp_index++] = text[temp_index+(*index_in_text)]; */ /* } */ /* string[temp_index++] = '\0'; */ text[*index_in_text+string_length] = '"'; *index_in_text += string_length +1; // plus one because we want to // go after the quotes Ast_Node* ret = create_ast_node_string(string, string_length); inject_scl(ret); return ret; } Ast_Node* parse_atom(char* text, int* index_in_text) { // numbers if ((text[*index_in_text] <= 57 && // if number text[*index_in_text] >= 48) || ((text[*index_in_text] == '+' || // or if sign and then number text[*index_in_text] == '-') && (text[*index_in_text +1] <= 57 && text[*index_in_text +1] >= 48)) || ((text[*index_in_text] == '.') // or if . and then number && (text[*index_in_text +1] <= 57 && text[*index_in_text +1] >= 48))) return parse_number(text, index_in_text); // keywords if (text[*index_in_text] == ':') return parse_keyword(text, index_in_text); // strings if (text[*index_in_text] == '"') return parse_string(text, index_in_text); return parse_symbol(text, index_in_text); } Ast_Node* parse_expression(char* text, int* index_in_text) { if (text[*index_in_text] == '\'') { ++(*index_in_text); ++parser_col; Ast_Node* result; if (text[*index_in_text] == '(' || text[*index_in_text] == '\'' ) { try { result = parse_expression(text, index_in_text); } } else { try { result = parse_atom(text, index_in_text); } } return create_ast_node_pair( create_ast_node_symbol("quote"), create_ast_node_pair(result, create_ast_node_nil())); } ++(*index_in_text); ++parser_col; eat_whitespace(text, index_in_text); // if there was actually nothing in the list, return nil if (text[(*index_in_text)] == ')') { ++(*index_in_text); ++parser_col; return create_ast_node_nil(); } // okay there is something Ast_Node* head = new(Ast_Node); head->type = Ast_Node_Type_Pair; head->value.pair = new(Pair); Ast_Node* expression = head; while (true) { if (text[(*index_in_text)] == '(' || text[(*index_in_text)] == '\'' ) { try { head->value.pair->first = parse_expression(text, index_in_text); } } else { try { head->value.pair->first = parse_atom(text, index_in_text); } } eat_until_code(text, index_in_text); if (text[(*index_in_text)] == '\0') { create_error(Error_Type_Unexpected_Eof, create_source_code_location(parser_file, parser_line, parser_col)); return nullptr; } if (text[(*index_in_text)] == ')') { head->value.pair->rest = create_ast_node_nil(); ++parser_col; ++(*index_in_text); break; } else if (text[(*index_in_text)] == '.') { ++parser_col; ++(*index_in_text); eat_until_code(text, index_in_text); if (text[(*index_in_text)] == '(') head->value.pair->rest = parse_expression(text, index_in_text); else head->value.pair->rest = parse_atom(text, index_in_text); eat_until_code(text, index_in_text); if (text[(*index_in_text)] != ')') create_error(Error_Type_Syntax_Error, create_source_code_location(parser_file, parser_line, parser_col)); ++parser_col; ++(*index_in_text); break; } else { head->value.pair->rest = create_ast_node_pair(nullptr, nullptr); head = head->value.pair->rest; } } return expression; } Ast_Node* parse_single_expression(char* text) { parser_file = "stdin"; parser_line = 1; parser_col = 1; int index_in_text = 0; Ast_Node* result; eat_until_code(text, &index_in_text); if (text[(index_in_text)] == '\0') return create_ast_node_nil(); if (text[(index_in_text)] == '(' || text[(index_in_text)] == '\'' ) try { result = parse_expression(text, &index_in_text); } else try { result = parse_atom(text, &index_in_text); } eat_until_code(text, &index_in_text); if (text[(index_in_text)] == '\0') return result; create_error(Error_Type_Trailing_Garbage, create_source_code_location(parser_file, parser_line, parser_col)); return nullptr; } Ast_Node_Array_List* parse_program(char* file_name, char* text) { parser_file = (char*)malloc(strlen(file_name) * sizeof(char) + 1); strcpy(parser_file, file_name); parser_line = 1; parser_col = 0; Ast_Node_Array_List* program = create_Ast_Node_Array_List(16); int index_in_text = 0; while (text[index_in_text] != '\0') { switch (text[index_in_text]) { case '(': { Ast_Node* parsed; try { parsed = parse_expression(text, &index_in_text); } append_to_Ast_Node_Array_List(program, parsed); } break; case ';': case ' ': case '\t': case '\n': case '\r': { eat_until_code(text, &index_in_text); } break; default: /* syntax error */ create_error(Error_Type_Syntax_Error, create_source_code_location(parser_file, parser_line, parser_col)); return nullptr; } } return program; } #undef inject_scl