Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.
 
 
 
 
 
 

359 строки
11 KiB

  1. char* parser_file;
  2. int parser_line;
  3. int parser_col;
  4. #define inject_scl(_ret) \
  5. ret->sourceCodeLocation = new(Source_Code_Location); \
  6. ret->sourceCodeLocation->file = parser_file; \
  7. ret->sourceCodeLocation->line = parser_line; \
  8. ret->sourceCodeLocation->column = parser_col
  9. // TODO(Felix): use the array list macro here?
  10. Ast_Node_Array_List* create_Ast_Node_Array_List(int initial_length) {
  11. Ast_Node_Array_List* ret = new (Ast_Node_Array_List);
  12. // create one with 16 entries first
  13. ret->length = initial_length;
  14. ret->data = (struct Ast_Node**)malloc(initial_length * sizeof(struct Ast_Node));
  15. ret->next_index = 0;
  16. return ret;
  17. }
  18. void append_to_Ast_Node_Array_List(Ast_Node_Array_List* list, struct Ast_Node* node) {
  19. if (list->next_index == list->length) {
  20. list->length *= 2;
  21. list->data = (struct Ast_Node**)realloc(list->data, list->length * sizeof(struct Ast_Node));
  22. }
  23. list->data[list->next_index++] = node;
  24. }
  25. void eat_comment_line(char* text, int* index_in_text) {
  26. // safety check if we are actually starting a comment here
  27. if (text[*index_in_text] != ';')
  28. return;
  29. // eat the comment line
  30. do {
  31. ++(*index_in_text);
  32. ++parser_col;
  33. } while (text[(*index_in_text)] != '\n' &&
  34. text[(*index_in_text)] != '\r' &&
  35. text[(*index_in_text)] != '\0');
  36. }
  37. void eat_whitespace(char* text, int* index_in_text) {
  38. // skip whitespaces
  39. while (text[(*index_in_text)] == ' ' ||
  40. text[(*index_in_text)] == '\t' ||
  41. text[(*index_in_text)] == '\n' ||
  42. text[(*index_in_text)] == '\r')
  43. {
  44. if (text[(*index_in_text)] == '\n') {
  45. ++parser_line;
  46. parser_col = 0;
  47. }
  48. ++parser_col;
  49. ++(*index_in_text);
  50. }
  51. }
  52. void eat_until_code(char* text, int* index_in_text) {
  53. int position_before;
  54. do {
  55. position_before = *index_in_text;
  56. eat_comment_line(text, index_in_text);
  57. eat_whitespace(text, index_in_text);
  58. } while (position_before != *index_in_text);
  59. }
  60. char* read_atom(char* text, int* index_in_text) {
  61. int atom_length = 0;
  62. while (text[*index_in_text+atom_length] != ' ' &&
  63. text[*index_in_text+atom_length] != ')' &&
  64. text[*index_in_text+atom_length] != '(' &&
  65. text[*index_in_text+atom_length] != '\0' &&
  66. text[*index_in_text+atom_length] != '\n' &&
  67. text[*index_in_text+atom_length] != '\r' &&
  68. text[*index_in_text+atom_length] != '\t')
  69. {
  70. ++atom_length;
  71. }
  72. // let's mark the end of the atom there quickly, so the string can
  73. // be copied from there easily and then put the char that was
  74. // before there back
  75. char before = text[*index_in_text+atom_length];
  76. text[*index_in_text+atom_length] = '\0';
  77. // get the atom
  78. char* atom = (char*)malloc(atom_length*sizeof(char)+1); // plus null char
  79. strcpy(atom, text+(*index_in_text));
  80. // restore the original string
  81. text[*index_in_text+atom_length] = before;
  82. // update the index to point to the character after the atom
  83. // ended
  84. *index_in_text += atom_length;
  85. return atom;
  86. }
  87. Ast_Node* parse_number(char* text, int* index_in_text) {
  88. double number;
  89. char* str_number = read_atom(text, index_in_text);
  90. sscanf(str_number, "%lf", &number);
  91. Ast_Node* ret = create_ast_node_number(number);
  92. inject_scl(ret);
  93. return ret;
  94. }
  95. Ast_Node* parse_keyword(char* text, int* index_in_text) {
  96. // we are now on the colon
  97. ++(*index_in_text);
  98. ++parser_col;
  99. char* str_keyword = read_atom(text, index_in_text);
  100. Ast_Node* ret = create_ast_node_keyword(str_keyword);
  101. inject_scl(ret);
  102. return ret;
  103. }
  104. Ast_Node* parse_symbol(char* text, int* index_in_text) {
  105. // we are now at the first char of the symbol
  106. char* str_symbol = read_atom(text, index_in_text);
  107. Ast_Node* ret = create_ast_node_symbol(str_symbol);
  108. inject_scl(ret);
  109. return ret;
  110. }
  111. Ast_Node* parse_string(char* text, int* index_in_text) {
  112. // the first character is the '"'
  113. ++(*index_in_text);
  114. ++parser_col;
  115. // now we are at the first letter, if this is the closing '"' then
  116. // it's easy
  117. if (text[*index_in_text] == '"') {
  118. char* str = new(char);
  119. *str = '\0';
  120. Ast_Node* ret = create_ast_node_string(str, 0);
  121. inject_scl(ret);
  122. return ret;
  123. }
  124. // okay so the first letter was not actually closing the string...
  125. int string_length = 0;
  126. while (text[*index_in_text+string_length] != '"' ||
  127. text[*index_in_text+string_length] == '\\')
  128. {
  129. ++string_length;
  130. }
  131. // we found the end of the string
  132. text[*index_in_text+string_length] = '\0';
  133. char* string = (char*)malloc(string_length*sizeof(char)+1); // plus null char
  134. if (!unescape_string(text+(*index_in_text))) {
  135. create_error(
  136. Error_Type_Unknown_Error,
  137. create_source_code_location(parser_file, parser_line, parser_col));
  138. return nullptr;
  139. }
  140. strcpy(string, text+(*index_in_text));
  141. /* manually copy to parse control sequences correctly */
  142. /* int temp_index = 0; */
  143. /* while (text+(temp_index+(*index_in_text)) != '\0') { */
  144. /* string[temp_index++] = text[temp_index+(*index_in_text)]; */
  145. /* } */
  146. /* string[temp_index++] = '\0'; */
  147. text[*index_in_text+string_length] = '"';
  148. *index_in_text += string_length +1; // plus one because we want to
  149. // go after the quotes
  150. Ast_Node* ret = create_ast_node_string(string, string_length);
  151. inject_scl(ret);
  152. return ret;
  153. }
  154. Ast_Node* parse_atom(char* text, int* index_in_text) {
  155. // numbers
  156. if ((text[*index_in_text] <= 57 && // if number
  157. text[*index_in_text] >= 48)
  158. ||
  159. ((text[*index_in_text] == '+' || // or if sign and then number
  160. text[*index_in_text] == '-')
  161. &&
  162. (text[*index_in_text +1] <= 57 &&
  163. text[*index_in_text +1] >= 48))
  164. ||
  165. ((text[*index_in_text] == '.') // or if . and then number
  166. &&
  167. (text[*index_in_text +1] <= 57 &&
  168. text[*index_in_text +1] >= 48)))
  169. return parse_number(text, index_in_text);
  170. // keywords
  171. if (text[*index_in_text] == ':')
  172. return parse_keyword(text, index_in_text);
  173. // strings
  174. if (text[*index_in_text] == '"')
  175. return parse_string(text, index_in_text);
  176. return parse_symbol(text, index_in_text);
  177. }
  178. Ast_Node* parse_expression(char* text, int* index_in_text) {
  179. if (text[*index_in_text] == '\'') {
  180. ++(*index_in_text);
  181. ++parser_col;
  182. Ast_Node* result;
  183. if (text[*index_in_text] == '(' || text[*index_in_text] == '\'' ) {
  184. try {
  185. result = parse_expression(text, index_in_text);
  186. }
  187. } else {
  188. try {
  189. result = parse_atom(text, index_in_text);
  190. }
  191. }
  192. return create_ast_node_pair(
  193. create_ast_node_symbol("quote"),
  194. create_ast_node_pair(result, create_ast_node_nil()));
  195. }
  196. ++(*index_in_text);
  197. ++parser_col;
  198. eat_whitespace(text, index_in_text);
  199. // if there was actually nothing in the list, return nil
  200. if (text[(*index_in_text)] == ')') {
  201. ++(*index_in_text);
  202. ++parser_col;
  203. return create_ast_node_nil();
  204. }
  205. // okay there is something
  206. Ast_Node* head = new(Ast_Node);
  207. head->type = Ast_Node_Type_Pair;
  208. head->value.pair = new(Pair);
  209. Ast_Node* expression = head;
  210. while (true) {
  211. if (text[(*index_in_text)] == '(' || text[(*index_in_text)] == '\'' ) {
  212. try {
  213. head->value.pair->first = parse_expression(text, index_in_text);
  214. }
  215. } else {
  216. try {
  217. head->value.pair->first = parse_atom(text, index_in_text);
  218. }
  219. }
  220. eat_until_code(text, index_in_text);
  221. if (text[(*index_in_text)] == '\0') {
  222. create_error(Error_Type_Unexpected_Eof, create_source_code_location(parser_file, parser_line, parser_col));
  223. return nullptr;
  224. }
  225. if (text[(*index_in_text)] == ')') {
  226. head->value.pair->rest = create_ast_node_nil();
  227. ++parser_col;
  228. ++(*index_in_text);
  229. break;
  230. } else if (text[(*index_in_text)] == '.') {
  231. ++parser_col;
  232. ++(*index_in_text);
  233. eat_until_code(text, index_in_text);
  234. if (text[(*index_in_text)] == '(')
  235. head->value.pair->rest = parse_expression(text, index_in_text);
  236. else
  237. head->value.pair->rest = parse_atom(text, index_in_text);
  238. eat_until_code(text, index_in_text);
  239. if (text[(*index_in_text)] != ')')
  240. create_error(Error_Type_Syntax_Error, create_source_code_location(parser_file, parser_line, parser_col));
  241. ++parser_col;
  242. ++(*index_in_text);
  243. break;
  244. } else {
  245. head->value.pair->rest = create_ast_node_pair(nullptr, nullptr);
  246. head = head->value.pair->rest;
  247. }
  248. }
  249. return expression;
  250. }
  251. Ast_Node* parse_single_expression(char* text) {
  252. parser_file = "stdin";
  253. parser_line = 1;
  254. parser_col = 1;
  255. int index_in_text = 0;
  256. Ast_Node* result;
  257. eat_until_code(text, &index_in_text);
  258. if (text[(index_in_text)] == '\0')
  259. return create_ast_node_nil();
  260. if (text[(index_in_text)] == '(' || text[(index_in_text)] == '\'' )
  261. try {
  262. result = parse_expression(text, &index_in_text);
  263. }
  264. else
  265. try {
  266. result = parse_atom(text, &index_in_text);
  267. }
  268. eat_until_code(text, &index_in_text);
  269. if (text[(index_in_text)] == '\0')
  270. return result;
  271. create_error(Error_Type_Trailing_Garbage, create_source_code_location(parser_file, parser_line, parser_col));
  272. return nullptr;
  273. }
  274. Ast_Node_Array_List* parse_program(char* file_name, char* text) {
  275. parser_file = (char*)malloc(strlen(file_name) * sizeof(char) + 1);
  276. strcpy(parser_file, file_name);
  277. parser_line = 1;
  278. parser_col = 0;
  279. Ast_Node_Array_List* program = create_Ast_Node_Array_List(16);
  280. int index_in_text = 0;
  281. while (text[index_in_text] != '\0') {
  282. switch (text[index_in_text]) {
  283. case '(': {
  284. Ast_Node* parsed;
  285. try {
  286. parsed = parse_expression(text, &index_in_text);
  287. }
  288. append_to_Ast_Node_Array_List(program, parsed);
  289. } break;
  290. case ';':
  291. case ' ':
  292. case '\t':
  293. case '\n':
  294. case '\r': {
  295. eat_until_code(text, &index_in_text);
  296. } break;
  297. default:
  298. /* syntax error */
  299. create_error(Error_Type_Syntax_Error, create_source_code_location(parser_file, parser_line, parser_col));
  300. return nullptr;
  301. }
  302. }
  303. return program;
  304. }
  305. #undef inject_scl