namespace Parser { #define inject_scl(_ret) \ _ret->sourceCodeLocation = new(Source_Code_Location); \ _ret->sourceCodeLocation->file = parser_file; \ _ret->sourceCodeLocation->line = parser_line; \ _ret->sourceCodeLocation->column = parser_col const char* parser_file; int parser_line; int parser_col; // NOTE(Felix): In this environment, the build in functions will // be loaded, and the macros will be stroed in form of // special-lambdas, that get executed in this environment at // read-time. This should always be the global environment. Environment* environment_for_macros; void init(Environment* env) { // NOTE(Felix): it is important to keep the parser environment // up to date with the global environment. When donig tests, // or running a programm we have to reaload it. // NOTE(Felix): For now we just allow executing built-ins at // read-time (while creating macros). If later we want to // change that, we have to define some funcions in this // environment. environment_for_macros = env; } void eat_comment_line(char* text, int* index_in_text) { // safety check if we are actually starting a comment here if (text[*index_in_text] != ';') return; // eat the comment line do { ++(*index_in_text); ++parser_col; } while (text[(*index_in_text)] != '\n' && text[(*index_in_text)] != '\r' && text[(*index_in_text)] != '\0'); } void eat_whitespace(char* text, int* index_in_text) { // skip whitespaces while (text[(*index_in_text)] == ' ' || text[(*index_in_text)] == '\t' || text[(*index_in_text)] == '\n' || text[(*index_in_text)] == '\r') { if (text[(*index_in_text)] == '\n') { ++parser_line; parser_col = 0; } ++parser_col; ++(*index_in_text); } } void eat_until_code(char* text, int* index_in_text) { int position_before; do { position_before = *index_in_text; eat_comment_line(text, index_in_text); eat_whitespace(text, index_in_text); } while (position_before != *index_in_text); } char* read_atom(char* text, int* index_in_text) { int atom_length = 0; while (text[*index_in_text+atom_length] != ' ' && text[*index_in_text+atom_length] != ')' && text[*index_in_text+atom_length] != '(' && text[*index_in_text+atom_length] != '\0' && text[*index_in_text+atom_length] != '\n' && text[*index_in_text+atom_length] != '\r' && text[*index_in_text+atom_length] != '\t') { ++atom_length; } // let's mark the end of the atom there quickly, so the string can // be copied from there easily and then put the char that was // before there back char before = text[*index_in_text+atom_length]; text[*index_in_text+atom_length] = '\0'; // get the atom char* atom = (char*)malloc(atom_length*sizeof(char)+1); // plus null char strcpy(atom, text+(*index_in_text)); // restore the original string text[*index_in_text+atom_length] = before; // update the index to point to the character after the atom // ended *index_in_text += atom_length; return atom; } Lisp_Object* parse_number(char* text, int* index_in_text) { double number; char* str_number = read_atom(text, index_in_text); sscanf(str_number, "%lf", &number); Lisp_Object* ret = Memory::create_lisp_object_number(number); inject_scl(ret); return ret; } Lisp_Object* parse_keyword(char* text, int* index_in_text) { // we are now on the colon ++(*index_in_text); ++parser_col; char* str_keyword = read_atom(text, index_in_text); Lisp_Object* ret = Memory::create_lisp_object_keyword(str_keyword); inject_scl(ret); return ret; } Lisp_Object* parse_symbol(char* text, int* index_in_text) { // we are now at the first char of the symbol char* str_symbol = read_atom(text, index_in_text); Lisp_Object* ret = Memory::create_lisp_object_symbol(str_symbol); inject_scl(ret); return ret; } Lisp_Object* parse_string(char* text, int* index_in_text) { // the first character is the '"' ++(*index_in_text); ++parser_col; // now we are at the first letter, if this is the closing '"' then // it's easy if (text[*index_in_text] == '"') { char* str = new(char); *str = '\0'; Lisp_Object* ret = Memory::create_lisp_object_string(str, 0); inject_scl(ret); // plus one because we want to go after the quotes *index_in_text += 1; return ret; } // okay so the first letter was not actually closing the string... int string_length = 0; while (text[*index_in_text+string_length] != '"' || text[*index_in_text+string_length] == '\\') { ++string_length; } // we found the end of the string text[*index_in_text+string_length] = '\0'; char* string = (char*)malloc(string_length*sizeof(char)+1); // plus null char if (!unescape_string(text+(*index_in_text))) { create_error( Error_Type::Unknown_Error, create_source_code_location(parser_file, parser_line, parser_col)); return nullptr; } strcpy(string, text+(*index_in_text)); /* manually copy to parse control sequences correctly */ /* int temp_index = 0; */ /* while (text+(temp_index+(*index_in_text)) != '\0') { */ /* string[temp_index++] = text[temp_index+(*index_in_text)]; */ /* } */ /* string[temp_index++] = '\0'; */ text[*index_in_text+string_length] = '"'; *index_in_text += string_length +1; // plus one because we want to // go after the quotes Lisp_Object* ret = Memory::create_lisp_object_string(string, string_length); inject_scl(ret); return ret; } Lisp_Object* parse_atom(char* text, int* index_in_text) { // numbers if ((text[*index_in_text] <= 57 && // if number text[*index_in_text] >= 48) || ((text[*index_in_text] == '+' || // or if sign and then number text[*index_in_text] == '-') && (text[*index_in_text +1] <= 57 && text[*index_in_text +1] >= 48)) || ((text[*index_in_text] == '.') // or if . and then number && (text[*index_in_text +1] <= 57 && text[*index_in_text +1] >= 48))) return parse_number(text, index_in_text); // keywords if (text[*index_in_text] == ':') return parse_keyword(text, index_in_text); // strings if (text[*index_in_text] == '"') return parse_string(text, index_in_text); return parse_symbol(text, index_in_text); } Lisp_Object* parse_expression(char* text, int* index_in_text) { // if it is quoted if (text[*index_in_text] == '\'' || text[*index_in_text] == '`' || text[*index_in_text] == ',') { char quoteType = text[*index_in_text]; ++(*index_in_text); ++parser_col; Lisp_Object* result; if (text[*index_in_text] == '(' || text[*index_in_text] == '\'' || text[*index_in_text] == '`' || text[*index_in_text] == ',') { try { result = parse_expression(text, index_in_text); } } else { try { result = parse_atom(text, index_in_text); } } if (quoteType == '\'') return Memory::create_lisp_object_pair( Memory::create_lisp_object_symbol("quote"), Memory::create_lisp_object_pair(result, Memory::create_lisp_object_nil())); else if (quoteType == '`') return Memory::create_lisp_object_pair( Memory::create_lisp_object_symbol("quasiquote"), Memory::create_lisp_object_pair(result, Memory::create_lisp_object_nil())); // it has to be an unquote return Memory::create_lisp_object_pair( Memory::create_lisp_object_symbol("unquote"), Memory::create_lisp_object_pair(result, Memory::create_lisp_object_nil())); } // if it is not quoted ++(*index_in_text); ++parser_col; eat_whitespace(text, index_in_text); // if there was actually nothing in the list, we define here, // that that means nil if (text[(*index_in_text)] == ')') { ++(*index_in_text); ++parser_col; return Memory::create_lisp_object_nil(); } // okay there is something Lisp_Object* head = Memory::create_lisp_object(); head->type = Lisp_Object_Type::Pair; head->value.pair = new(Pair); Lisp_Object* expression = head; while (true) { if (text[*index_in_text] == '(' || text[*index_in_text] == '\''|| text[*index_in_text] == '`' || text[*index_in_text] == ',') { try { head->value.pair->first = parse_expression(text, index_in_text); } } else { try { head->value.pair->first = parse_atom(text, index_in_text); } } eat_until_code(text, index_in_text); if (text[(*index_in_text)] == '\0') { create_error(Error_Type::Unexpected_Eof, create_source_code_location(parser_file, parser_line, parser_col)); return nullptr; } if (text[(*index_in_text)] == ')') { head->value.pair->rest = Memory::create_lisp_object_nil(); ++parser_col; ++(*index_in_text); break; } else if (text[(*index_in_text)] == '.') { ++parser_col; ++(*index_in_text); eat_until_code(text, index_in_text); if (text[(*index_in_text)] == '(') head->value.pair->rest = parse_expression(text, index_in_text); else head->value.pair->rest = parse_atom(text, index_in_text); eat_until_code(text, index_in_text); if (text[(*index_in_text)] != ')') create_error(Error_Type::Syntax_Error, create_source_code_location(parser_file, parser_line, parser_col)); ++parser_col; ++(*index_in_text); break; } else { head->value.pair->rest = Memory::create_lisp_object_pair(nullptr, nullptr); head = head->value.pair->rest; } } // check if we have to create or delete or run macros if (expression->value.pair->first->type == Lisp_Object_Type::Symbol) { if (string_equal("define-syntax", expression->value.pair->first->value.symbol->identifier)) { // create a new macro Lisp_Object* arguments = expression->value.pair->rest; int arguments_length; // HACK(Felix): almost code duplicate from // `built_ins.cpp`: special-lambda try { arguments_length = list_length(arguments); } // (define-syntax defun (name args :rest body) (...)) if (arguments_length < 2) { create_error(Error_Type::Wrong_Number_Of_Arguments, expression->sourceCodeLocation); return nullptr; } if (arguments->value.pair->first->type != Lisp_Object_Type::Symbol) { create_error(Error_Type::Type_Missmatch, expression->sourceCodeLocation); return nullptr; } // extract the name Lisp_Object* symbol_for_macro = arguments->value.pair->first; arguments = arguments->value.pair->rest; Function* function = new(Function); function->parent_environment = environment_for_macros; function->type = Function_Type::Macro; // if parameters were specified if (arguments->value.pair->first->type != Lisp_Object_Type::Nil) { try { assert_type(arguments->value.pair->first, Lisp_Object_Type::Pair); } try { parse_argument_list(arguments->value.pair->first, function); } } else { function->positional_arguments = create_positional_argument_list(1); function->keyword_arguments = create_keyword_argument_list(1); function->rest_argument = nullptr; } arguments = arguments->value.pair->rest; // if there is a docstring, use it if (arguments->value.pair->first->type == Lisp_Object_Type::String) { function->docstring = arguments->value.pair->first->value.string->value; arguments = arguments->value.pair->rest; } else { function->docstring = nullptr; } // we are now in the function body, just wrap it in an // implicit prog function->body = Memory::create_lisp_object_pair( Memory::create_lisp_object_symbol("prog"), arguments); Lisp_Object* macro = Memory::create_lisp_object(); macro->type = Lisp_Object_Type::Function; macro->value.function = function; define_symbol(symbol_for_macro, macro, environment_for_macros); // print_environment(environment_for_macros); return Memory::create_lisp_object_nil(); } else if (string_equal("delete-syntax", expression->value.pair->first->value.symbol->identifier)) { /* --- deleting an existing macro --- */ // TODO(Felix): this is a hard one because when // environments will be made from hashmaps, how can we // delete stuff from hashmaps? If we do probing on // collision and then delte the first colliding entry, // how can we find the second one? How many probes do // we have to do to know for sure that an elemenet is // not in the hashmap? It would be much easier if we // never deleted any elements from the hashmap, so // that, when an entry is not found immidiately, we // know for sure that it does not exist in the table. create_error(Error_Type::Not_Yet_Implemented, expression->sourceCodeLocation); return nullptr; } else { // if threre is a macro named like this, then macroexpand // if not it is regular code, dont touch. for (int i = 0; i < environment_for_macros->next_index; ++i) { if (string_equal(expression->value.pair->first->value.symbol->identifier, environment_for_macros->keys[i]) && environment_for_macros->values[i]->type == Lisp_Object_Type::Function && environment_for_macros->values[i]->value.function->type == Function_Type::Macro) { try { // if (string_equal(environment_for_macros->keys[i], "when")) { // printf("invoking macro for %s in %s:%d to:\n\t", environment_for_macros->keys[i], parser_file, parser_line); // print(environment_for_macros->values[i]->value.function->body); // } expression = eval_expr(expression, environment_for_macros); // if (string_equal(environment_for_macros->keys[i], "when")) { // printf("\nresult: \n\t"); // print(expression); // printf("\n\n"); // } } } } } } return expression; } Lisp_Object* parse_single_expression(char* text) { parser_file = "stdin"; parser_line = 1; parser_col = 1; int index_in_text = 0; Lisp_Object* result; eat_until_code(text, &index_in_text); if (text[(index_in_text)] == '\0') return Memory::create_lisp_object_nil(); if (text[index_in_text] == '(' || text[index_in_text] == '\'' || text[index_in_text] == '`' || text[index_in_text] == ',') { try { result = parse_expression(text, &index_in_text); } } else try { result = parse_atom(text, &index_in_text); } eat_until_code(text, &index_in_text); if (text[(index_in_text)] == '\0') return result; create_error(Error_Type::Trailing_Garbage, create_source_code_location(parser_file, parser_line, parser_col)); return nullptr; } void write_expanded_file(const char* file_name, Lisp_Object_Array_List* program) { const char* ext = ".expanded"; char* newName = (char*)calloc(4 + strlen(file_name), sizeof(char)); strcpy(newName, file_name); strcat(newName, ext); FILE *f = fopen(newName, "w"); if (f == NULL) { printf("Error opening file!\n"); exit(1); } for (int i = 0; i < program->next_index; ++i) { // a macro will parse as nil for now, so we skip those if (program->data[i]->type == Lisp_Object_Type::Nil) continue; fprint(f, program->data[i]); fprintf(f, "\n\n"); } fclose(f); } Lisp_Object_Array_List* parse_program(const char* file_name, char* text) { parser_file = (char*)malloc(strlen(file_name) * sizeof(char) + 1); strcpy((char *)parser_file, file_name); parser_line = 1; parser_col = 0; Lisp_Object_Array_List* program = create_Lisp_Object_array_list(16); int index_in_text = 0; while (text[index_in_text] != '\0') { switch (text[index_in_text]) { case '(': { Lisp_Object* parsed; try { parsed = parse_expression(text, &index_in_text); } append_to_Lisp_Object_array_list(program, parsed); } break; case ';': case ' ': case '\t': case '\n': case '\r': { eat_until_code(text, &index_in_text); } break; default: /* syntax error */ create_error(Error_Type::Syntax_Error, create_source_code_location(parser_file, parser_line, parser_col)); return nullptr; } } write_expanded_file(file_name, program); return program; } #undef inject_scl }