|
- namespace Parser {
-
- String* parser_file;
- int parser_line;
- int parser_col;
- // NOTE(Felix): In this environment, the build in functions will
- // be loaded, and the macros will be stroed in form of
- // special-lambdas, that get executed in this environment at
- // read-time. This should always be the global environment.
- Environment* environment_for_macros;
-
- proc init(Environment* env) -> void {
- // NOTE(Felix): it is important to keep the parser environment
- // up to date with the global environment. When donig tests,
- // or running a programm we have to reaload it.
-
- // NOTE(Felix): For now we just allow executing built-ins at
- // read-time (while creating macros). If later we want to
- // change that, we have to define some funcions in this
- // environment.
- environment_for_macros = env;
- }
-
- proc inject_scl(Lisp_Object* lo) -> void {
- lo->sourceCodeLocation = new(Source_Code_Location);
- lo->sourceCodeLocation->file = parser_file;
- lo->sourceCodeLocation->line = parser_line;
- lo->sourceCodeLocation->column = parser_col;
- }
-
- proc eat_comment_line(char* text, int* index_in_text) -> void {
- // safety check if we are actually starting a comment here
- if (text[*index_in_text] != ';')
- return;
-
- // eat the comment line
- do {
- ++(*index_in_text);
- ++parser_col;
- } while (text[(*index_in_text)] != '\n' &&
- text[(*index_in_text)] != '\r' &&
- text[(*index_in_text)] != '\0');
- }
-
- proc eat_whitespace(char* text, int* index_in_text) -> void {
- // skip whitespaces
- while (text[(*index_in_text)] == ' ' ||
- text[(*index_in_text)] == '\t' ||
- text[(*index_in_text)] == '\n' ||
- text[(*index_in_text)] == '\r')
- {
- if (text[(*index_in_text)] == '\n') {
- ++parser_line;
- parser_col = 0;
- }
- ++parser_col;
- ++(*index_in_text);
- }
-
- }
-
- proc eat_until_code(char* text, int* index_in_text) -> void {
- int position_before;
- do {
- position_before = *index_in_text;
- eat_comment_line(text, index_in_text);
- eat_whitespace(text, index_in_text);
- } while (position_before != *index_in_text);
- }
-
- proc read_atom(char* text, int* index_in_text) -> String* {
- int atom_length = 0;
- while (text[*index_in_text+atom_length] != ' ' &&
- text[*index_in_text+atom_length] != ')' &&
- text[*index_in_text+atom_length] != '(' &&
- text[*index_in_text+atom_length] != '\0' &&
- text[*index_in_text+atom_length] != '\n' &&
- text[*index_in_text+atom_length] != '\r' &&
- text[*index_in_text+atom_length] != '\t')
- {
- ++atom_length;
- }
-
- // let's mark the end of the atom there quickly, so the string can
- // be copied from there easily and then put the char that was
- // before there back
- char before = text[*index_in_text+atom_length];
- text[*index_in_text+atom_length] = '\0';
-
- // get the atom
- String* ret = Memory::create_string("", atom_length);
- // char* atom = (char*)malloc(atom_length*sizeof(char)+1); // plus null char
- strcpy(&ret->data, text+(*index_in_text));
-
- // restore the original string
- text[*index_in_text+atom_length] = before;
-
- // update the index to point to the character after the atom
- // ended
- *index_in_text += atom_length;
-
- return ret;
- }
-
- proc parse_number(char* text, int* index_in_text) -> Lisp_Object* {
- double number;
- // TODO(Felix): parse the number direcrly from the string and
- // dont create a String first
- String* str_number = read_atom(text, index_in_text);
- sscanf(Memory::get_c_str(str_number), "%lf", &number);
- Lisp_Object* ret = Memory::create_lisp_object_number(number);
-
- inject_scl(ret);
- return ret;
- }
-
- proc parse_keyword(char* text, int* index_in_text) -> Lisp_Object* {
- // we are now on the colon
- ++(*index_in_text);
- ++parser_col;
- String* str_keyword = read_atom(text, index_in_text);
- Lisp_Object* ret = Memory::create_lisp_object_keyword(str_keyword);
-
- inject_scl(ret);
- return ret;
- }
-
- proc parse_symbol(char* text, int* index_in_text) -> Lisp_Object* {
- // we are now at the first char of the symbol
- String* str_symbol = read_atom(text, index_in_text);
- Lisp_Object* ret = Memory::create_lisp_object_symbol(str_symbol);
- inject_scl(ret);
- return ret;
- }
-
- proc parse_string(char* text, int* index_in_text) -> Lisp_Object*{
- // the first character is the '"'
- ++(*index_in_text);
- ++parser_col;
-
- // now we are at the first letter, if this is the closing '"' then
- // it's easy
- if (text[*index_in_text] == '"') {
- Lisp_Object* ret = Memory::create_lisp_object_string(
- Memory::create_string("", 0));
- inject_scl(ret);
-
- // plus one because we want to go after the quotes
- *index_in_text += 1;
-
- return ret;
- }
-
- // okay so the first letter was not actually closing the string...
- int string_length = 0;
- while (text[*index_in_text+string_length] != '"' ||
- text[*index_in_text+string_length] == '\\')
- {
- ++string_length;
- }
-
- // we found the end of the string
- text[*index_in_text+string_length] = '\0';
-
- String* string = Memory::create_string("", string_length);
-
- if (!unescape_string(text+(*index_in_text))) {
- create_error(
- Error_Type::Unknown_Error,
- create_source_code_location(parser_file, parser_line, parser_col));
- return nullptr;
- }
- strcpy(&string->data, text+(*index_in_text));
- /* manually copy to parse control sequences correctly */
- /* int temp_index = 0; */
- /* while (text+(temp_index+(*index_in_text)) != '\0') { */
- /* string[temp_index++] = text[temp_index+(*index_in_text)]; */
- /* } */
- /* string[temp_index++] = '\0'; */
-
- text[*index_in_text+string_length] = '"';
-
- *index_in_text += string_length +1; // plus one because we want to
- // go after the quotes
-
- Lisp_Object* ret = Memory::create_lisp_object_string(string);
- inject_scl(ret);
- return ret;
- }
-
- proc parse_atom(char* text, int* index_in_text) -> Lisp_Object* {
- // numbers
- if ((text[*index_in_text] <= 57 && // if number
- text[*index_in_text] >= 48)
- ||
- ((text[*index_in_text] == '+' || // or if sign and then number
- text[*index_in_text] == '-')
- &&
- (text[*index_in_text +1] <= 57 &&
- text[*index_in_text +1] >= 48))
- ||
- ((text[*index_in_text] == '.') // or if . and then number
- &&
- (text[*index_in_text +1] <= 57 &&
- text[*index_in_text +1] >= 48)))
- return parse_number(text, index_in_text);
-
- // keywords
- if (text[*index_in_text] == ':')
- return parse_keyword(text, index_in_text);
-
- // strings
- if (text[*index_in_text] == '"')
- return parse_string(text, index_in_text);
-
- return parse_symbol(text, index_in_text);
- }
-
- proc parse_expression(char* text, int* index_in_text) -> Lisp_Object* {
-
- // if it is quoted
- if (text[*index_in_text] == '\'' ||
- text[*index_in_text] == '`' ||
- text[*index_in_text] == ',')
- {
- char quoteType = text[*index_in_text];
- ++(*index_in_text);
- ++parser_col;
- Lisp_Object* result;
- if (text[*index_in_text] == '(' ||
- text[*index_in_text] == '\'' ||
- text[*index_in_text] == '`' ||
- text[*index_in_text] == ',')
- {
- try {
- result = parse_expression(text, index_in_text);
- }
- } else {
- try {
- result = parse_atom(text, index_in_text);
- }
- }
-
- if (quoteType == '\'')
- return Memory::create_lisp_object_pair(
- Memory::create_lisp_object_symbol("quote"),
- Memory::create_lisp_object_pair(result, Memory::create_lisp_object_nil()));
- else if (quoteType == '`')
- return Memory::create_lisp_object_pair(
- Memory::create_lisp_object_symbol("quasiquote"),
- Memory::create_lisp_object_pair(result, Memory::create_lisp_object_nil()));
- // it has to be an unquote
- return Memory::create_lisp_object_pair(
- Memory::create_lisp_object_symbol("unquote"),
- Memory::create_lisp_object_pair(result, Memory::create_lisp_object_nil()));
- }
-
-
- // if it is not quoted
- ++(*index_in_text);
- ++parser_col;
-
- eat_whitespace(text, index_in_text);
-
- // if there was actually nothing in the list, we define here,
- // that that means nil
- if (text[(*index_in_text)] == ')') {
- ++(*index_in_text);
- ++parser_col;
- return Memory::create_lisp_object_nil();
- }
-
- // okay there is something
- Lisp_Object* head = Memory::create_lisp_object();
- head->type = Lisp_Object_Type::Pair;
- head->value.pair = new(Pair);
- Lisp_Object* expression = head;
-
- while (true) {
- if (text[*index_in_text] == '(' ||
- text[*index_in_text] == '\''||
- text[*index_in_text] == '`' ||
- text[*index_in_text] == ',')
- {
- try {
- head->value.pair->first = parse_expression(text, index_in_text);
- }
- } else {
- try {
- head->value.pair->first = parse_atom(text, index_in_text);
- }
- }
-
- eat_until_code(text, index_in_text);
- if (text[(*index_in_text)] == '\0') {
- create_error(Error_Type::Unexpected_Eof, create_source_code_location(parser_file, parser_line, parser_col));
- return nullptr;
- }
-
-
- if (text[(*index_in_text)] == ')') {
- head->value.pair->rest = Memory::create_lisp_object_nil();
- ++parser_col;
- ++(*index_in_text);
- break;
- } else if (text[(*index_in_text)] == '.') {
- ++parser_col;
- ++(*index_in_text);
- eat_until_code(text, index_in_text);
-
- if (text[(*index_in_text)] == '(')
- head->value.pair->rest = parse_expression(text, index_in_text);
- else
- head->value.pair->rest = parse_atom(text, index_in_text);
-
- eat_until_code(text, index_in_text);
-
- if (text[(*index_in_text)] != ')')
- create_error(Error_Type::Syntax_Error, create_source_code_location(parser_file, parser_line, parser_col));
- ++parser_col;
- ++(*index_in_text);
- break;
- } else {
- head->value.pair->rest = Memory::create_lisp_object_pair(nullptr, nullptr);
- head = head->value.pair->rest;
- }
- }
-
- // check if we have to create or delete or run macros
- if (expression->value.pair->first->type == Lisp_Object_Type::Symbol) {
- if (string_equal("define-syntax", expression->value.pair->first->value.symbol->identifier)) {
- // create a new macro
- Lisp_Object* arguments = expression->value.pair->rest;
- int arguments_length;
-
- // HACK(Felix): almost code duplicate from
- // `built_ins.cpp`: special-lambda
- try {
- arguments_length = list_length(arguments);
- }
-
- // (define-syntax defun (name args :rest body) (...))
- if (arguments_length < 2) {
- create_error(Error_Type::Wrong_Number_Of_Arguments, expression->sourceCodeLocation);
- return nullptr;
- }
-
- if (arguments->value.pair->first->type != Lisp_Object_Type::Symbol) {
- create_error(Error_Type::Type_Missmatch, expression->sourceCodeLocation);
- return nullptr;
- }
-
- // extract the name
- Lisp_Object* symbol_for_macro = arguments->value.pair->first;
- arguments = arguments->value.pair->rest;
-
- Function* function = new(Function);
- function->parent_environment = environment_for_macros;
- function->type = Function_Type::Macro;
-
- // if parameters were specified
- if (arguments->value.pair->first->type != Lisp_Object_Type::Nil) {
- try {
- assert_type(arguments->value.pair->first, Lisp_Object_Type::Pair);
- }
- try {
- parse_argument_list(arguments->value.pair->first, function);
- }
- } else {
- function->positional_arguments = create_positional_argument_list(1);
- function->keyword_arguments = create_keyword_argument_list(1);
- function->rest_argument = nullptr;
- }
-
- arguments = arguments->value.pair->rest;
- // if there is a docstring, use it
- if (arguments->value.pair->first->type == Lisp_Object_Type::String) {
- function->docstring = arguments->value.pair->first->value.string;
- arguments = arguments->value.pair->rest;
- } else {
- function->docstring = nullptr;
- }
-
- // we are now in the function body, just wrap it in an
- // implicit prog
- function->body = Memory::create_lisp_object_pair(
- Memory::create_lisp_object_symbol("prog"),
- arguments);
-
- Lisp_Object* macro = Memory::create_lisp_object();
- macro->type = Lisp_Object_Type::Function;
- macro->value.function = function;
- define_symbol(symbol_for_macro, macro, environment_for_macros);
-
- // print_environment(environment_for_macros);
- return Memory::create_lisp_object_nil();
-
- } else if (string_equal("delete-syntax", expression->value.pair->first->value.symbol->identifier)) {
- /* --- deleting an existing macro --- */
- // TODO(Felix): this is a hard one because when
- // environments will be made from hashmaps, how can we
- // delete stuff from hashmaps? If we do probing on
- // collision and then delte the first colliding entry,
- // how can we find the second one? How many probes do
- // we have to do to know for sure that an elemenet is
- // not in the hashmap? It would be much easier if we
- // never deleted any elements from the hashmap, so
- // that, when an entry is not found immidiately, we
- // know for sure that it does not exist in the table.
-
- create_error(Error_Type::Not_Yet_Implemented, expression->sourceCodeLocation);
- return nullptr;
- } else {
- // if threre is a macro named like this, then macroexpand
- // if not it is regular code, dont touch.
-
- for (int i = 0; i < environment_for_macros->next_index; ++i) {
- if (string_equal(expression->value.pair->first->value.symbol->identifier, environment_for_macros->keys[i]) &&
- environment_for_macros->values[i]->type == Lisp_Object_Type::Function &&
- environment_for_macros->values[i]->value.function->type == Function_Type::Macro)
- {
- try {
- // if (string_equal(environment_for_macros->keys[i], "when")) {
- // printf("invoking macro for %s in %s:%d to:\n\t", environment_for_macros->keys[i], parser_file, parser_line);
- // print(environment_for_macros->values[i]->value.function->body);
- // }
- expression = eval_expr(expression, environment_for_macros);
- // if (string_equal(environment_for_macros->keys[i], "when")) {
- // printf("\nresult: \n\t");
- // print(expression);
- // printf("\n\n");
- // }
- }
- }
- }
- }
- }
-
-
- return expression;
- }
-
- proc parse_single_expression(char* text) -> Lisp_Object* {
- parser_file = Memory::create_string("stdin");
- parser_line = 1;
- parser_col = 1;
-
- int index_in_text = 0;
- Lisp_Object* result;
- eat_until_code(text, &index_in_text);
- if (text[(index_in_text)] == '\0')
- return Memory::create_lisp_object_nil();
- if (text[index_in_text] == '(' ||
- text[index_in_text] == '\'' ||
- text[index_in_text] == '`' ||
- text[index_in_text] == ',')
- {
- try {
- result = parse_expression(text, &index_in_text);
- }
- }
- else
- try {
- result = parse_atom(text, &index_in_text);
- }
- eat_until_code(text, &index_in_text);
- if (text[(index_in_text)] == '\0')
- return result;
- create_error(Error_Type::Trailing_Garbage, create_source_code_location(parser_file, parser_line, parser_col));
- return nullptr;
- }
-
- proc write_expanded_file(String* file_name, Lisp_Object_Array_List* program) -> void {
- const char* ext = ".expanded";
- char* newName = (char*)calloc(10 + file_name->length, sizeof(char));
- strcpy(newName, Memory::get_c_str(file_name));
- strcat(newName, ext);
-
- FILE *f = fopen(newName, "w");
- if (f == NULL) {
- printf("Error opening file!\n");
- exit(1);
- }
-
- for (int i = 0; i < program->next_index; ++i) {
- // a macro will parse as nil for now, so we skip those
- if (program->data[i]->type == Lisp_Object_Type::Nil)
- continue;
- fprint(f, program->data[i]);
- fprintf(f, "\n\n");
- }
-
- fclose(f);
- free(newName);
- }
-
- proc parse_program(String* file_name, char* text) -> Lisp_Object_Array_List* {
- parser_file = file_name;
- parser_line = 1;
- parser_col = 0;
-
- Lisp_Object_Array_List* program = create_Lisp_Object_array_list();
-
- int index_in_text = 0;
-
- while (text[index_in_text] != '\0') {
- switch (text[index_in_text]) {
- case '(': {
- Lisp_Object* parsed;
- try {
- parsed = parse_expression(text, &index_in_text);
- }
- append_to_array_list(program, parsed);
- } break;
- case ';':
- case ' ':
- case '\t':
- case '\n':
- case '\r': {
- eat_until_code(text, &index_in_text);
- } break;
- default:
- /* syntax error */
- create_error(Error_Type::Syntax_Error, create_source_code_location(parser_file, parser_line, parser_col));
- return nullptr;
- }
- }
-
- write_expanded_file(file_name, program);
-
- return program;
- }
- }
|