Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.
 
 
 
 
 
 

534 строки
20 KiB

  1. namespace Parser {
  2. String* parser_file;
  3. int parser_line;
  4. int parser_col;
  5. // NOTE(Felix): In this environment, the build in functions will
  6. // be loaded, and the macros will be stroed in form of
  7. // special-lambdas, that get executed in this environment at
  8. // read-time. This should always be the global environment.
  9. Environment* environment_for_macros;
  10. proc init(Environment* env) -> void {
  11. // NOTE(Felix): it is important to keep the parser environment
  12. // up to date with the global environment. When donig tests,
  13. // or running a programm we have to reaload it.
  14. // NOTE(Felix): For now we just allow executing built-ins at
  15. // read-time (while creating macros). If later we want to
  16. // change that, we have to define some funcions in this
  17. // environment.
  18. environment_for_macros = env;
  19. }
  20. proc inject_scl(Lisp_Object* lo) -> void {
  21. lo->sourceCodeLocation = new(Source_Code_Location);
  22. lo->sourceCodeLocation->file = parser_file;
  23. lo->sourceCodeLocation->line = parser_line;
  24. lo->sourceCodeLocation->column = parser_col;
  25. }
  26. proc eat_comment_line(char* text, int* index_in_text) -> void {
  27. // safety check if we are actually starting a comment here
  28. if (text[*index_in_text] != ';')
  29. return;
  30. // eat the comment line
  31. do {
  32. ++(*index_in_text);
  33. ++parser_col;
  34. } while (text[(*index_in_text)] != '\n' &&
  35. text[(*index_in_text)] != '\r' &&
  36. text[(*index_in_text)] != '\0');
  37. }
  38. proc eat_whitespace(char* text, int* index_in_text) -> void {
  39. // skip whitespaces
  40. while (text[(*index_in_text)] == ' ' ||
  41. text[(*index_in_text)] == '\t' ||
  42. text[(*index_in_text)] == '\n' ||
  43. text[(*index_in_text)] == '\r')
  44. {
  45. if (text[(*index_in_text)] == '\n') {
  46. ++parser_line;
  47. parser_col = 0;
  48. }
  49. ++parser_col;
  50. ++(*index_in_text);
  51. }
  52. }
  53. proc eat_until_code(char* text, int* index_in_text) -> void {
  54. int position_before;
  55. do {
  56. position_before = *index_in_text;
  57. eat_comment_line(text, index_in_text);
  58. eat_whitespace(text, index_in_text);
  59. } while (position_before != *index_in_text);
  60. }
  61. proc read_atom(char* text, int* index_in_text) -> String* {
  62. int atom_length = 0;
  63. while (text[*index_in_text+atom_length] != ' ' &&
  64. text[*index_in_text+atom_length] != ')' &&
  65. text[*index_in_text+atom_length] != '(' &&
  66. text[*index_in_text+atom_length] != '\0' &&
  67. text[*index_in_text+atom_length] != '\n' &&
  68. text[*index_in_text+atom_length] != '\r' &&
  69. text[*index_in_text+atom_length] != '\t')
  70. {
  71. ++atom_length;
  72. }
  73. // let's mark the end of the atom there quickly, so the string can
  74. // be copied from there easily and then put the char that was
  75. // before there back
  76. char before = text[*index_in_text+atom_length];
  77. text[*index_in_text+atom_length] = '\0';
  78. // get the atom
  79. String* ret = Memory::create_string("", atom_length);
  80. // char* atom = (char*)malloc(atom_length*sizeof(char)+1); // plus null char
  81. strcpy(&ret->data, text+(*index_in_text));
  82. // restore the original string
  83. text[*index_in_text+atom_length] = before;
  84. // update the index to point to the character after the atom
  85. // ended
  86. *index_in_text += atom_length;
  87. return ret;
  88. }
  89. proc parse_number(char* text, int* index_in_text) -> Lisp_Object* {
  90. double number;
  91. // TODO(Felix): parse the number direcrly from the string and
  92. // dont create a String first
  93. String* str_number = read_atom(text, index_in_text);
  94. sscanf(Memory::get_c_str(str_number), "%lf", &number);
  95. Lisp_Object* ret = Memory::create_lisp_object_number(number);
  96. inject_scl(ret);
  97. return ret;
  98. }
  99. proc parse_keyword(char* text, int* index_in_text) -> Lisp_Object* {
  100. // we are now on the colon
  101. ++(*index_in_text);
  102. ++parser_col;
  103. String* str_keyword = read_atom(text, index_in_text);
  104. Lisp_Object* ret = Memory::create_lisp_object_keyword(str_keyword);
  105. inject_scl(ret);
  106. return ret;
  107. }
  108. proc parse_symbol(char* text, int* index_in_text) -> Lisp_Object* {
  109. // we are now at the first char of the symbol
  110. String* str_symbol = read_atom(text, index_in_text);
  111. Lisp_Object* ret = Memory::create_lisp_object_symbol(str_symbol);
  112. inject_scl(ret);
  113. return ret;
  114. }
  115. proc parse_string(char* text, int* index_in_text) -> Lisp_Object*{
  116. // the first character is the '"'
  117. ++(*index_in_text);
  118. ++parser_col;
  119. // now we are at the first letter, if this is the closing '"' then
  120. // it's easy
  121. if (text[*index_in_text] == '"') {
  122. Lisp_Object* ret = Memory::create_lisp_object_string(
  123. Memory::create_string("", 0));
  124. inject_scl(ret);
  125. // plus one because we want to go after the quotes
  126. *index_in_text += 1;
  127. return ret;
  128. }
  129. // okay so the first letter was not actually closing the string...
  130. int string_length = 0;
  131. while (text[*index_in_text+string_length] != '"' ||
  132. text[*index_in_text+string_length] == '\\')
  133. {
  134. ++string_length;
  135. }
  136. // we found the end of the string
  137. text[*index_in_text+string_length] = '\0';
  138. String* string = Memory::create_string("", string_length);
  139. if (!unescape_string(text+(*index_in_text))) {
  140. create_error(
  141. Error_Type::Unknown_Error,
  142. create_source_code_location(parser_file, parser_line, parser_col));
  143. return nullptr;
  144. }
  145. strcpy(&string->data, text+(*index_in_text));
  146. /* manually copy to parse control sequences correctly */
  147. /* int temp_index = 0; */
  148. /* while (text+(temp_index+(*index_in_text)) != '\0') { */
  149. /* string[temp_index++] = text[temp_index+(*index_in_text)]; */
  150. /* } */
  151. /* string[temp_index++] = '\0'; */
  152. text[*index_in_text+string_length] = '"';
  153. *index_in_text += string_length +1; // plus one because we want to
  154. // go after the quotes
  155. Lisp_Object* ret = Memory::create_lisp_object_string(string);
  156. inject_scl(ret);
  157. return ret;
  158. }
  159. proc parse_atom(char* text, int* index_in_text) -> Lisp_Object* {
  160. // numbers
  161. if ((text[*index_in_text] <= 57 && // if number
  162. text[*index_in_text] >= 48)
  163. ||
  164. ((text[*index_in_text] == '+' || // or if sign and then number
  165. text[*index_in_text] == '-')
  166. &&
  167. (text[*index_in_text +1] <= 57 &&
  168. text[*index_in_text +1] >= 48))
  169. ||
  170. ((text[*index_in_text] == '.') // or if . and then number
  171. &&
  172. (text[*index_in_text +1] <= 57 &&
  173. text[*index_in_text +1] >= 48)))
  174. return parse_number(text, index_in_text);
  175. // keywords
  176. if (text[*index_in_text] == ':')
  177. return parse_keyword(text, index_in_text);
  178. // strings
  179. if (text[*index_in_text] == '"')
  180. return parse_string(text, index_in_text);
  181. return parse_symbol(text, index_in_text);
  182. }
  183. proc parse_expression(char* text, int* index_in_text) -> Lisp_Object* {
  184. // if it is quoted
  185. if (text[*index_in_text] == '\'' ||
  186. text[*index_in_text] == '`' ||
  187. text[*index_in_text] == ',')
  188. {
  189. char quoteType = text[*index_in_text];
  190. ++(*index_in_text);
  191. ++parser_col;
  192. Lisp_Object* result;
  193. if (text[*index_in_text] == '(' ||
  194. text[*index_in_text] == '\'' ||
  195. text[*index_in_text] == '`' ||
  196. text[*index_in_text] == ',')
  197. {
  198. try {
  199. result = parse_expression(text, index_in_text);
  200. }
  201. } else {
  202. try {
  203. result = parse_atom(text, index_in_text);
  204. }
  205. }
  206. if (quoteType == '\'')
  207. return Memory::create_lisp_object_pair(
  208. Memory::create_lisp_object_symbol("quote"),
  209. Memory::create_lisp_object_pair(result, Memory::create_lisp_object_nil()));
  210. else if (quoteType == '`')
  211. return Memory::create_lisp_object_pair(
  212. Memory::create_lisp_object_symbol("quasiquote"),
  213. Memory::create_lisp_object_pair(result, Memory::create_lisp_object_nil()));
  214. // it has to be an unquote
  215. return Memory::create_lisp_object_pair(
  216. Memory::create_lisp_object_symbol("unquote"),
  217. Memory::create_lisp_object_pair(result, Memory::create_lisp_object_nil()));
  218. }
  219. // if it is not quoted
  220. ++(*index_in_text);
  221. ++parser_col;
  222. eat_whitespace(text, index_in_text);
  223. // if there was actually nothing in the list, we define here,
  224. // that that means nil
  225. if (text[(*index_in_text)] == ')') {
  226. ++(*index_in_text);
  227. ++parser_col;
  228. return Memory::create_lisp_object_nil();
  229. }
  230. // okay there is something
  231. Lisp_Object* head = Memory::create_lisp_object();
  232. head->type = Lisp_Object_Type::Pair;
  233. head->value.pair = new(Pair);
  234. Lisp_Object* expression = head;
  235. while (true) {
  236. if (text[*index_in_text] == '(' ||
  237. text[*index_in_text] == '\''||
  238. text[*index_in_text] == '`' ||
  239. text[*index_in_text] == ',')
  240. {
  241. try {
  242. head->value.pair->first = parse_expression(text, index_in_text);
  243. }
  244. } else {
  245. try {
  246. head->value.pair->first = parse_atom(text, index_in_text);
  247. }
  248. }
  249. eat_until_code(text, index_in_text);
  250. if (text[(*index_in_text)] == '\0') {
  251. create_error(Error_Type::Unexpected_Eof, create_source_code_location(parser_file, parser_line, parser_col));
  252. return nullptr;
  253. }
  254. if (text[(*index_in_text)] == ')') {
  255. head->value.pair->rest = Memory::create_lisp_object_nil();
  256. ++parser_col;
  257. ++(*index_in_text);
  258. break;
  259. } else if (text[(*index_in_text)] == '.') {
  260. ++parser_col;
  261. ++(*index_in_text);
  262. eat_until_code(text, index_in_text);
  263. if (text[(*index_in_text)] == '(')
  264. head->value.pair->rest = parse_expression(text, index_in_text);
  265. else
  266. head->value.pair->rest = parse_atom(text, index_in_text);
  267. eat_until_code(text, index_in_text);
  268. if (text[(*index_in_text)] != ')')
  269. create_error(Error_Type::Syntax_Error, create_source_code_location(parser_file, parser_line, parser_col));
  270. ++parser_col;
  271. ++(*index_in_text);
  272. break;
  273. } else {
  274. head->value.pair->rest = Memory::create_lisp_object_pair(nullptr, nullptr);
  275. head = head->value.pair->rest;
  276. }
  277. }
  278. // check if we have to create or delete or run macros
  279. if (expression->value.pair->first->type == Lisp_Object_Type::Symbol) {
  280. if (string_equal("define-syntax", expression->value.pair->first->value.symbol->identifier)) {
  281. // create a new macro
  282. Lisp_Object* arguments = expression->value.pair->rest;
  283. int arguments_length;
  284. // HACK(Felix): almost code duplicate from
  285. // `built_ins.cpp`: special-lambda
  286. try {
  287. arguments_length = list_length(arguments);
  288. }
  289. // (define-syntax defun (name args :rest body) (...))
  290. if (arguments_length < 2) {
  291. create_error(Error_Type::Wrong_Number_Of_Arguments, expression->sourceCodeLocation);
  292. return nullptr;
  293. }
  294. if (arguments->value.pair->first->type != Lisp_Object_Type::Symbol) {
  295. create_error(Error_Type::Type_Missmatch, expression->sourceCodeLocation);
  296. return nullptr;
  297. }
  298. // extract the name
  299. Lisp_Object* symbol_for_macro = arguments->value.pair->first;
  300. arguments = arguments->value.pair->rest;
  301. Function* function = new(Function);
  302. function->parent_environment = environment_for_macros;
  303. function->type = Function_Type::Macro;
  304. // if parameters were specified
  305. if (arguments->value.pair->first->type != Lisp_Object_Type::Nil) {
  306. try {
  307. assert_type(arguments->value.pair->first, Lisp_Object_Type::Pair);
  308. }
  309. try {
  310. parse_argument_list(arguments->value.pair->first, function);
  311. }
  312. } else {
  313. function->positional_arguments = create_positional_argument_list(1);
  314. function->keyword_arguments = create_keyword_argument_list(1);
  315. function->rest_argument = nullptr;
  316. }
  317. arguments = arguments->value.pair->rest;
  318. // if there is a docstring, use it
  319. if (arguments->value.pair->first->type == Lisp_Object_Type::String) {
  320. function->docstring = arguments->value.pair->first->value.string;
  321. arguments = arguments->value.pair->rest;
  322. } else {
  323. function->docstring = nullptr;
  324. }
  325. // we are now in the function body, just wrap it in an
  326. // implicit prog
  327. function->body = Memory::create_lisp_object_pair(
  328. Memory::create_lisp_object_symbol("prog"),
  329. arguments);
  330. Lisp_Object* macro = Memory::create_lisp_object();
  331. macro->type = Lisp_Object_Type::Function;
  332. macro->value.function = function;
  333. define_symbol(symbol_for_macro, macro, environment_for_macros);
  334. // print_environment(environment_for_macros);
  335. return Memory::create_lisp_object_nil();
  336. } else if (string_equal("delete-syntax", expression->value.pair->first->value.symbol->identifier)) {
  337. /* --- deleting an existing macro --- */
  338. // TODO(Felix): this is a hard one because when
  339. // environments will be made from hashmaps, how can we
  340. // delete stuff from hashmaps? If we do probing on
  341. // collision and then delte the first colliding entry,
  342. // how can we find the second one? How many probes do
  343. // we have to do to know for sure that an elemenet is
  344. // not in the hashmap? It would be much easier if we
  345. // never deleted any elements from the hashmap, so
  346. // that, when an entry is not found immidiately, we
  347. // know for sure that it does not exist in the table.
  348. create_error(Error_Type::Not_Yet_Implemented, expression->sourceCodeLocation);
  349. return nullptr;
  350. } else {
  351. // if threre is a macro named like this, then macroexpand
  352. // if not it is regular code, dont touch.
  353. for (int i = 0; i < environment_for_macros->next_index; ++i) {
  354. if (string_equal(expression->value.pair->first->value.symbol->identifier, environment_for_macros->keys[i]) &&
  355. environment_for_macros->values[i]->type == Lisp_Object_Type::Function &&
  356. environment_for_macros->values[i]->value.function->type == Function_Type::Macro)
  357. {
  358. try {
  359. // if (string_equal(environment_for_macros->keys[i], "when")) {
  360. // printf("invoking macro for %s in %s:%d to:\n\t", environment_for_macros->keys[i], parser_file, parser_line);
  361. // print(environment_for_macros->values[i]->value.function->body);
  362. // }
  363. expression = eval_expr(expression, environment_for_macros);
  364. // if (string_equal(environment_for_macros->keys[i], "when")) {
  365. // printf("\nresult: \n\t");
  366. // print(expression);
  367. // printf("\n\n");
  368. // }
  369. }
  370. }
  371. }
  372. }
  373. }
  374. return expression;
  375. }
  376. proc parse_single_expression(char* text) -> Lisp_Object* {
  377. parser_file = Memory::create_string("stdin");
  378. parser_line = 1;
  379. parser_col = 1;
  380. int index_in_text = 0;
  381. Lisp_Object* result;
  382. eat_until_code(text, &index_in_text);
  383. if (text[(index_in_text)] == '\0')
  384. return Memory::create_lisp_object_nil();
  385. if (text[index_in_text] == '(' ||
  386. text[index_in_text] == '\'' ||
  387. text[index_in_text] == '`' ||
  388. text[index_in_text] == ',')
  389. {
  390. try {
  391. result = parse_expression(text, &index_in_text);
  392. }
  393. }
  394. else
  395. try {
  396. result = parse_atom(text, &index_in_text);
  397. }
  398. eat_until_code(text, &index_in_text);
  399. if (text[(index_in_text)] == '\0')
  400. return result;
  401. create_error(Error_Type::Trailing_Garbage, create_source_code_location(parser_file, parser_line, parser_col));
  402. return nullptr;
  403. }
  404. proc write_expanded_file(String* file_name, Lisp_Object_Array_List* program) -> void {
  405. const char* ext = ".expanded";
  406. char* newName = (char*)calloc(10 + file_name->length, sizeof(char));
  407. strcpy(newName, Memory::get_c_str(file_name));
  408. strcat(newName, ext);
  409. FILE *f = fopen(newName, "w");
  410. if (f == NULL) {
  411. printf("Error opening file!\n");
  412. exit(1);
  413. }
  414. for (int i = 0; i < program->next_index; ++i) {
  415. // a macro will parse as nil for now, so we skip those
  416. if (program->data[i]->type == Lisp_Object_Type::Nil)
  417. continue;
  418. fprint(f, program->data[i]);
  419. fprintf(f, "\n\n");
  420. }
  421. fclose(f);
  422. free(newName);
  423. }
  424. proc parse_program(String* file_name, char* text) -> Lisp_Object_Array_List* {
  425. parser_file = file_name;
  426. parser_line = 1;
  427. parser_col = 0;
  428. Lisp_Object_Array_List* program = create_Lisp_Object_array_list();
  429. int index_in_text = 0;
  430. while (text[index_in_text] != '\0') {
  431. switch (text[index_in_text]) {
  432. case '(': {
  433. Lisp_Object* parsed;
  434. try {
  435. parsed = parse_expression(text, &index_in_text);
  436. }
  437. append_to_array_list(program, parsed);
  438. } break;
  439. case ';':
  440. case ' ':
  441. case '\t':
  442. case '\n':
  443. case '\r': {
  444. eat_until_code(text, &index_in_text);
  445. } break;
  446. default:
  447. /* syntax error */
  448. create_error(Error_Type::Syntax_Error, create_source_code_location(parser_file, parser_line, parser_col));
  449. return nullptr;
  450. }
  451. }
  452. write_expanded_file(file_name, program);
  453. return program;
  454. }
  455. }