From 49f18d77712f840907c45894ed4d66a356abc599 Mon Sep 17 00:00:00 2001 From: FelixBrendel Date: Sat, 27 Oct 2018 18:16:43 +0200 Subject: [PATCH] Unscape all strings when parsing --- bin/pre.slime | 34 +++++++++++++++++++--------- src/env.c | 1 - src/helpers.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.c | 1 + src/parse.c | 27 +++++++++++++++++++---- todo.org | 21 ++++++++++-------- 6 files changed, 121 insertions(+), 24 deletions(-) diff --git a/bin/pre.slime b/bin/pre.slime index 49033fa..902ad7b 100644 --- a/bin/pre.slime +++ b/bin/pre.slime @@ -82,26 +82,40 @@ the (rest) of the last element of the sequence." (mutate e (pair (first e) elem))) seq) +(defun incr (val) + (+ val 1)) + +(defun decr (val) + (- val 1)) + (defun append (seq elem) (extend seq (pair elem nil))) +(defun length (seq) + (if (nil? seq) + 0 + (incr (length (rest seq))))) + (defmacro n-times (@times @action) (unless (<= (eval @times) 0) (eval @action) - (macro-define @args (pair (pair - (pair (eval @times) (pair 1 nil))) (pair @action nil))) - ;; [o|o] --------------------------> [o|o] -> nil - ;; | | - ;; V V - ;; [o|o] -> [o|o] -> [o|o]-> nil action - ;; | | | - ;; V V V - ;; - times 1 - (eval (pair n-times @args)))) + (eval (list n-times (list - @times 1) @action)))) + +(defmacro for (@symbol @from @to :rest @for-body) + (if (< (eval @from) (eval @to)) + (macro-define @op incr) + (if (> (eval @from) (eval @to)) + (macro-define @op decr) + (macro-define @op nil))) + (when @op + (macro-define (eval @symbol) (eval @from)) + (eval (pair prog @for-body)) + (eval (extend (list for @symbol (@op @from) @to) @for-body)))) (defun range (:keys from :defaults-to 0 to) "Returns a sequence of numbers starting with the number defined by the key 'from' and ends with the number defined in 'to'." - (if (<= from to) + (if (< from to) (pair from (range :from (+ 1 from) :to to)) nil)) diff --git a/src/env.c b/src/env.c index 5eba794..fbe55cd 100644 --- a/src/env.c +++ b/src/env.c @@ -51,7 +51,6 @@ void define_symbol(Ast_Node* symbol, Ast_Node* value, Environment* env) { // also searching for thesymbol from the back, so we will find the // latest defined one first, but a bit messy. Later we should use // a hashmap here. @refactor - if (env->next_index == env->capacity) { env->capacity *= 2; env->keys = (char**)realloc(env->keys, env->capacity * sizeof(char*)); diff --git a/src/helpers.c b/src/helpers.c index 013350d..5385ea7 100644 --- a/src/helpers.c +++ b/src/helpers.c @@ -79,6 +79,67 @@ int asprintf(char *strp[], const char *fmt, ...) { } // asprintf implementation end +static char get_nibble(char c) { + if (c >= 'A' && c <= 'F') + return (c - 'a') + 10; + else if (c >= 'a' && c <= 'f') + return (c - 'A') + 10; + return (c - '0'); +} + +bool unescape_string(char* in) { + if (!in) + return true; + + char *out = in, *p = in; + const char *int_err = nullptr; + + while (*p && !int_err) { + if (*p != '\\') { + /* normal case */ + *out++ = *p++; + } else { + /* escape sequence */ + switch (*++p) { + case 'a': *out++ = '\a'; ++p; break; + case 'b': *out++ = '\b'; ++p; break; + case 'f': *out++ = '\f'; ++p; break; + case 'n': *out++ = '\n'; ++p; break; + case 'r': *out++ = '\r'; ++p; break; + case 't': *out++ = '\t'; ++p; break; + case 'v': *out++ = '\v'; ++p; break; + + case '"': + case '\'': + case '\\': + *out++ = *p++; + case '?': + break; + + case 'x': + case 'X': + if (!isxdigit(p[1]) || !isxdigit(p[2])) { + int_err = "Invalid character on hexadecimal escape."; + } else { + *out++ = (char)(get_nibble(p[1]) * 0x10 + get_nibble(p[2])); + p += 3; + } + break; + + default: + int_err = "Unexpected '\\' with no escape sequence."; + break; + } + } + } + + /* Set the end of string. */ + *out = '\0'; + if (int_err) + return false; + return true; +} + char* read_entire_file (char* filename) { char *fileContent = nullptr; FILE *fp = fopen(filename, "r"); diff --git a/src/main.c b/src/main.c index d6e9851..71185e8 100644 --- a/src/main.c +++ b/src/main.c @@ -3,6 +3,7 @@ #include #include #include /* needed for va_list */ +#include #include #include "./helpers.c" diff --git a/src/parse.c b/src/parse.c index e1aeb38..5867227 100644 --- a/src/parse.c +++ b/src/parse.c @@ -128,7 +128,18 @@ Ast_Node* parse_string(char* text, int* index_in_text) { text[*index_in_text+string_length] = '\0'; char* string = (char*)malloc(string_length*sizeof(char)+1); // plus null char + + if (!unescape_string(text+(*index_in_text))) { + create_error(Error_Type_Unknown_Error, create_ast_node_nil()); + return nullptr; + } strcpy(string, text+(*index_in_text)); + /* manually copy to parse control sequences correctly */ + /* int temp_index = 0; */ + /* while (text+(temp_index+(*index_in_text)) != '\0') { */ + /* string[temp_index++] = text[temp_index+(*index_in_text)]; */ + /* } */ + /* string[temp_index++] = '\0'; */ text[*index_in_text+string_length] = '"'; @@ -201,9 +212,13 @@ Ast_Node* parse_expression(char* text, int* index_in_text) { while (true) { if (text[(*index_in_text)] == '(' || text[(*index_in_text)] == '\'' ) { - head->value.pair->first = parse_expression(text, index_in_text); + try { + head->value.pair->first = parse_expression(text, index_in_text); + } } else { - head->value.pair->first = parse_atom(text, index_in_text); + try { + head->value.pair->first = parse_atom(text, index_in_text); + } } eat_until_code(text, index_in_text); @@ -245,9 +260,13 @@ Ast_Node* parse_single_expression(char* text) { Ast_Node* result; eat_until_code(text, &index_in_text); if (text[(index_in_text)] == '(' || text[(index_in_text)] == '\'' ) - result = parse_expression(text, &index_in_text); + try { + result = parse_expression(text, &index_in_text); + } else - result = parse_atom(text, &index_in_text); + try { + result = parse_atom(text, &index_in_text); + } eat_until_code(text, &index_in_text); if (text[(index_in_text)] == '\0') return result; diff --git a/todo.org b/todo.org index 1e8002e..b1ea214 100644 --- a/todo.org +++ b/todo.org @@ -330,15 +330,18 @@ set to see if we are in an errornious state. * DONE use an enum for builtin identifiers CLOSED: [2018-10-11 Do 17:15] -* TODO =assert_equal_type= macro in testing -* TODO =t= ast node type, universal source of truth -* TODO backquoting -* TODO dont create new nils or builtins, but store one of each globally -* TODO make keywords unique (binary tree) -* TODO store all ast nodes in a huge arena -* TODO source code locations for errors -* TODO String error messages -* TODO Rename macro to =special= or something +* DONE Print escaped chracters correctly + CLOSED: [2018-10-27 Sa 18:16] +* TODO [#A] =t= ast node type, universal source of truth +* TODO [#A] source code locations for errors +* TODO [#A] String error messages +* TODO [#A] Rename macro to =special= or something +* TODO [#B] =assert_equal_type= macro in testing +* TODO [#B] dont create new nils or builtins, but store one of each globally +* TODO [#B] make keywords unique (binary tree) +* TODO [#B] store all ast nodes in a huge arena +* TODO [#B] Auto doc generation +* TODO [#C] backquoting * Build-in forms [29/30] ** TODO info