Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.
 
 
 
 
 
 

532 wiersze
20 KiB

  1. namespace Parser {
  2. #define inject_scl(_ret) \
  3. _ret->sourceCodeLocation = new(Source_Code_Location); \
  4. _ret->sourceCodeLocation->file = parser_file; \
  5. _ret->sourceCodeLocation->line = parser_line; \
  6. _ret->sourceCodeLocation->column = parser_col
  7. const char* parser_file;
  8. int parser_line;
  9. int parser_col;
  10. // NOTE(Felix): In this environment, the build in functions will
  11. // be loaded, and the macros will be stroed in form of
  12. // special-lambdas, that get executed in this environment at
  13. // read-time. This should always be the global environment.
  14. Environment* environment_for_macros;
  15. void init(Environment* env) {
  16. // NOTE(Felix): it is important to keep the parser environment
  17. // up to date with the global environment. When donig tests,
  18. // or running a programm we have to reaload it.
  19. // NOTE(Felix): For now we just allow executing built-ins at
  20. // read-time (while creating macros). If later we want to
  21. // change that, we have to define some funcions in this
  22. // environment.
  23. environment_for_macros = env;
  24. }
  25. void eat_comment_line(char* text, int* index_in_text) {
  26. // safety check if we are actually starting a comment here
  27. if (text[*index_in_text] != ';')
  28. return;
  29. // eat the comment line
  30. do {
  31. ++(*index_in_text);
  32. ++parser_col;
  33. } while (text[(*index_in_text)] != '\n' &&
  34. text[(*index_in_text)] != '\r' &&
  35. text[(*index_in_text)] != '\0');
  36. }
  37. void eat_whitespace(char* text, int* index_in_text) {
  38. // skip whitespaces
  39. while (text[(*index_in_text)] == ' ' ||
  40. text[(*index_in_text)] == '\t' ||
  41. text[(*index_in_text)] == '\n' ||
  42. text[(*index_in_text)] == '\r')
  43. {
  44. if (text[(*index_in_text)] == '\n') {
  45. ++parser_line;
  46. parser_col = 0;
  47. }
  48. ++parser_col;
  49. ++(*index_in_text);
  50. }
  51. }
  52. void eat_until_code(char* text, int* index_in_text) {
  53. int position_before;
  54. do {
  55. position_before = *index_in_text;
  56. eat_comment_line(text, index_in_text);
  57. eat_whitespace(text, index_in_text);
  58. } while (position_before != *index_in_text);
  59. }
  60. char* read_atom(char* text, int* index_in_text) {
  61. int atom_length = 0;
  62. while (text[*index_in_text+atom_length] != ' ' &&
  63. text[*index_in_text+atom_length] != ')' &&
  64. text[*index_in_text+atom_length] != '(' &&
  65. text[*index_in_text+atom_length] != '\0' &&
  66. text[*index_in_text+atom_length] != '\n' &&
  67. text[*index_in_text+atom_length] != '\r' &&
  68. text[*index_in_text+atom_length] != '\t')
  69. {
  70. ++atom_length;
  71. }
  72. // let's mark the end of the atom there quickly, so the string can
  73. // be copied from there easily and then put the char that was
  74. // before there back
  75. char before = text[*index_in_text+atom_length];
  76. text[*index_in_text+atom_length] = '\0';
  77. // get the atom
  78. char* atom = (char*)malloc(atom_length*sizeof(char)+1); // plus null char
  79. strcpy(atom, text+(*index_in_text));
  80. // restore the original string
  81. text[*index_in_text+atom_length] = before;
  82. // update the index to point to the character after the atom
  83. // ended
  84. *index_in_text += atom_length;
  85. return atom;
  86. }
  87. Lisp_Object* parse_number(char* text, int* index_in_text) {
  88. double number;
  89. char* str_number = read_atom(text, index_in_text);
  90. sscanf(str_number, "%lf", &number);
  91. Lisp_Object* ret = Memory::create_lisp_object_number(number);
  92. inject_scl(ret);
  93. return ret;
  94. }
  95. Lisp_Object* parse_keyword(char* text, int* index_in_text) {
  96. // we are now on the colon
  97. ++(*index_in_text);
  98. ++parser_col;
  99. char* str_keyword = read_atom(text, index_in_text);
  100. Lisp_Object* ret = Memory::create_lisp_object_keyword(str_keyword);
  101. inject_scl(ret);
  102. return ret;
  103. }
  104. Lisp_Object* parse_symbol(char* text, int* index_in_text) {
  105. // we are now at the first char of the symbol
  106. char* str_symbol = read_atom(text, index_in_text);
  107. Lisp_Object* ret = Memory::create_lisp_object_symbol(str_symbol);
  108. inject_scl(ret);
  109. return ret;
  110. }
  111. Lisp_Object* parse_string(char* text, int* index_in_text) {
  112. // the first character is the '"'
  113. ++(*index_in_text);
  114. ++parser_col;
  115. // now we are at the first letter, if this is the closing '"' then
  116. // it's easy
  117. if (text[*index_in_text] == '"') {
  118. char* str = new(char);
  119. *str = '\0';
  120. Lisp_Object* ret = Memory::create_lisp_object_string(str, 0);
  121. inject_scl(ret);
  122. // plus one because we want to go after the quotes
  123. *index_in_text += 1;
  124. return ret;
  125. }
  126. // okay so the first letter was not actually closing the string...
  127. int string_length = 0;
  128. while (text[*index_in_text+string_length] != '"' ||
  129. text[*index_in_text+string_length] == '\\')
  130. {
  131. ++string_length;
  132. }
  133. // we found the end of the string
  134. text[*index_in_text+string_length] = '\0';
  135. char* string = (char*)malloc(string_length*sizeof(char)+1); // plus null char
  136. if (!unescape_string(text+(*index_in_text))) {
  137. create_error(
  138. Error_Type::Unknown_Error,
  139. create_source_code_location(parser_file, parser_line, parser_col));
  140. return nullptr;
  141. }
  142. strcpy(string, text+(*index_in_text));
  143. /* manually copy to parse control sequences correctly */
  144. /* int temp_index = 0; */
  145. /* while (text+(temp_index+(*index_in_text)) != '\0') { */
  146. /* string[temp_index++] = text[temp_index+(*index_in_text)]; */
  147. /* } */
  148. /* string[temp_index++] = '\0'; */
  149. text[*index_in_text+string_length] = '"';
  150. *index_in_text += string_length +1; // plus one because we want to
  151. // go after the quotes
  152. Lisp_Object* ret = Memory::create_lisp_object_string(string, string_length);
  153. inject_scl(ret);
  154. return ret;
  155. }
  156. Lisp_Object* parse_atom(char* text, int* index_in_text) {
  157. // numbers
  158. if ((text[*index_in_text] <= 57 && // if number
  159. text[*index_in_text] >= 48)
  160. ||
  161. ((text[*index_in_text] == '+' || // or if sign and then number
  162. text[*index_in_text] == '-')
  163. &&
  164. (text[*index_in_text +1] <= 57 &&
  165. text[*index_in_text +1] >= 48))
  166. ||
  167. ((text[*index_in_text] == '.') // or if . and then number
  168. &&
  169. (text[*index_in_text +1] <= 57 &&
  170. text[*index_in_text +1] >= 48)))
  171. return parse_number(text, index_in_text);
  172. // keywords
  173. if (text[*index_in_text] == ':')
  174. return parse_keyword(text, index_in_text);
  175. // strings
  176. if (text[*index_in_text] == '"')
  177. return parse_string(text, index_in_text);
  178. return parse_symbol(text, index_in_text);
  179. }
  180. Lisp_Object* parse_expression(char* text, int* index_in_text) {
  181. // if it is quoted
  182. if (text[*index_in_text] == '\'' ||
  183. text[*index_in_text] == '`' ||
  184. text[*index_in_text] == ',')
  185. {
  186. char quoteType = text[*index_in_text];
  187. ++(*index_in_text);
  188. ++parser_col;
  189. Lisp_Object* result;
  190. if (text[*index_in_text] == '(' ||
  191. text[*index_in_text] == '\'' ||
  192. text[*index_in_text] == '`' ||
  193. text[*index_in_text] == ',')
  194. {
  195. try {
  196. result = parse_expression(text, index_in_text);
  197. }
  198. } else {
  199. try {
  200. result = parse_atom(text, index_in_text);
  201. }
  202. }
  203. if (quoteType == '\'')
  204. return Memory::create_lisp_object_pair(
  205. Memory::create_lisp_object_symbol("quote"),
  206. Memory::create_lisp_object_pair(result, Memory::create_lisp_object_nil()));
  207. else if (quoteType == '`')
  208. return Memory::create_lisp_object_pair(
  209. Memory::create_lisp_object_symbol("quasiquote"),
  210. Memory::create_lisp_object_pair(result, Memory::create_lisp_object_nil()));
  211. // it has to be an unquote
  212. return Memory::create_lisp_object_pair(
  213. Memory::create_lisp_object_symbol("unquote"),
  214. Memory::create_lisp_object_pair(result, Memory::create_lisp_object_nil()));
  215. }
  216. // if it is not quoted
  217. ++(*index_in_text);
  218. ++parser_col;
  219. eat_whitespace(text, index_in_text);
  220. // if there was actually nothing in the list, we define here,
  221. // that that means nil
  222. if (text[(*index_in_text)] == ')') {
  223. ++(*index_in_text);
  224. ++parser_col;
  225. return Memory::create_lisp_object_nil();
  226. }
  227. // okay there is something
  228. Lisp_Object* head = Memory::create_lisp_object();
  229. head->type = Lisp_Object_Type::Pair;
  230. head->value.pair = new(Pair);
  231. Lisp_Object* expression = head;
  232. while (true) {
  233. if (text[*index_in_text] == '(' ||
  234. text[*index_in_text] == '\''||
  235. text[*index_in_text] == '`' ||
  236. text[*index_in_text] == ',')
  237. {
  238. try {
  239. head->value.pair->first = parse_expression(text, index_in_text);
  240. }
  241. } else {
  242. try {
  243. head->value.pair->first = parse_atom(text, index_in_text);
  244. }
  245. }
  246. eat_until_code(text, index_in_text);
  247. if (text[(*index_in_text)] == '\0') {
  248. create_error(Error_Type::Unexpected_Eof, create_source_code_location(parser_file, parser_line, parser_col));
  249. return nullptr;
  250. }
  251. if (text[(*index_in_text)] == ')') {
  252. head->value.pair->rest = Memory::create_lisp_object_nil();
  253. ++parser_col;
  254. ++(*index_in_text);
  255. break;
  256. } else if (text[(*index_in_text)] == '.') {
  257. ++parser_col;
  258. ++(*index_in_text);
  259. eat_until_code(text, index_in_text);
  260. if (text[(*index_in_text)] == '(')
  261. head->value.pair->rest = parse_expression(text, index_in_text);
  262. else
  263. head->value.pair->rest = parse_atom(text, index_in_text);
  264. eat_until_code(text, index_in_text);
  265. if (text[(*index_in_text)] != ')')
  266. create_error(Error_Type::Syntax_Error, create_source_code_location(parser_file, parser_line, parser_col));
  267. ++parser_col;
  268. ++(*index_in_text);
  269. break;
  270. } else {
  271. head->value.pair->rest = Memory::create_lisp_object_pair(nullptr, nullptr);
  272. head = head->value.pair->rest;
  273. }
  274. }
  275. // check if we have to create or delete or run macros
  276. if (expression->value.pair->first->type == Lisp_Object_Type::Symbol) {
  277. if (string_equal("define-syntax", expression->value.pair->first->value.symbol->identifier)) {
  278. // create a new macro
  279. Lisp_Object* arguments = expression->value.pair->rest;
  280. int arguments_length;
  281. // HACK(Felix): almost code duplicate from
  282. // `built_ins.cpp`: special-lambda
  283. try {
  284. arguments_length = list_length(arguments);
  285. }
  286. // (define-syntax defun (name args :rest body) (...))
  287. if (arguments_length < 2) {
  288. create_error(Error_Type::Wrong_Number_Of_Arguments, expression->sourceCodeLocation);
  289. return nullptr;
  290. }
  291. if (arguments->value.pair->first->type != Lisp_Object_Type::Symbol) {
  292. create_error(Error_Type::Type_Missmatch, expression->sourceCodeLocation);
  293. return nullptr;
  294. }
  295. // extract the name
  296. Lisp_Object* symbol_for_macro = arguments->value.pair->first;
  297. arguments = arguments->value.pair->rest;
  298. Function* function = new(Function);
  299. function->parent_environment = environment_for_macros;
  300. function->type = Function_Type::Macro;
  301. // if parameters were specified
  302. if (arguments->value.pair->first->type != Lisp_Object_Type::Nil) {
  303. try {
  304. assert_type(arguments->value.pair->first, Lisp_Object_Type::Pair);
  305. }
  306. try {
  307. parse_argument_list(arguments->value.pair->first, function);
  308. }
  309. } else {
  310. function->positional_arguments = create_positional_argument_list(1);
  311. function->keyword_arguments = create_keyword_argument_list(1);
  312. function->rest_argument = nullptr;
  313. }
  314. arguments = arguments->value.pair->rest;
  315. // if there is a docstring, use it
  316. if (arguments->value.pair->first->type == Lisp_Object_Type::String) {
  317. function->docstring = arguments->value.pair->first->value.string->value;
  318. arguments = arguments->value.pair->rest;
  319. } else {
  320. function->docstring = nullptr;
  321. }
  322. // we are now in the function body, just wrap it in an
  323. // implicit prog
  324. function->body = Memory::create_lisp_object_pair(
  325. Memory::create_lisp_object_symbol("prog"),
  326. arguments);
  327. Lisp_Object* macro = Memory::create_lisp_object();
  328. macro->type = Lisp_Object_Type::Function;
  329. macro->value.function = function;
  330. define_symbol(symbol_for_macro, macro, environment_for_macros);
  331. // print_environment(environment_for_macros);
  332. return Memory::create_lisp_object_nil();
  333. } else if (string_equal("delete-syntax", expression->value.pair->first->value.symbol->identifier)) {
  334. /* --- deleting an existing macro --- */
  335. // TODO(Felix): this is a hard one because when
  336. // environments will be made from hashmaps, how can we
  337. // delete stuff from hashmaps? If we do probing on
  338. // collision and then delte the first colliding entry,
  339. // how can we find the second one? How many probes do
  340. // we have to do to know for sure that an elemenet is
  341. // not in the hashmap? It would be much easier if we
  342. // never deleted any elements from the hashmap, so
  343. // that, when an entry is not found immidiately, we
  344. // know for sure that it does not exist in the table.
  345. create_error(Error_Type::Not_Yet_Implemented, expression->sourceCodeLocation);
  346. return nullptr;
  347. } else {
  348. // if threre is a macro named like this, then macroexpand
  349. // if not it is regular code, dont touch.
  350. for (int i = 0; i < environment_for_macros->next_index; ++i) {
  351. if (string_equal(expression->value.pair->first->value.symbol->identifier, environment_for_macros->keys[i]) &&
  352. environment_for_macros->values[i]->type == Lisp_Object_Type::Function &&
  353. environment_for_macros->values[i]->value.function->type == Function_Type::Macro)
  354. {
  355. try {
  356. // if (string_equal(environment_for_macros->keys[i], "when")) {
  357. // printf("invoking macro for %s in %s:%d to:\n\t", environment_for_macros->keys[i], parser_file, parser_line);
  358. // print(environment_for_macros->values[i]->value.function->body);
  359. // }
  360. expression = eval_expr(expression, environment_for_macros);
  361. // if (string_equal(environment_for_macros->keys[i], "when")) {
  362. // printf("\nresult: \n\t");
  363. // print(expression);
  364. // printf("\n\n");
  365. // }
  366. }
  367. }
  368. }
  369. }
  370. }
  371. return expression;
  372. }
  373. Lisp_Object* parse_single_expression(char* text) {
  374. parser_file = "stdin";
  375. parser_line = 1;
  376. parser_col = 1;
  377. int index_in_text = 0;
  378. Lisp_Object* result;
  379. eat_until_code(text, &index_in_text);
  380. if (text[(index_in_text)] == '\0')
  381. return Memory::create_lisp_object_nil();
  382. if (text[index_in_text] == '(' ||
  383. text[index_in_text] == '\'' ||
  384. text[index_in_text] == '`' ||
  385. text[index_in_text] == ',')
  386. {
  387. try {
  388. result = parse_expression(text, &index_in_text);
  389. }
  390. }
  391. else
  392. try {
  393. result = parse_atom(text, &index_in_text);
  394. }
  395. eat_until_code(text, &index_in_text);
  396. if (text[(index_in_text)] == '\0')
  397. return result;
  398. create_error(Error_Type::Trailing_Garbage, create_source_code_location(parser_file, parser_line, parser_col));
  399. return nullptr;
  400. }
  401. void write_expanded_file(const char* file_name, Lisp_Object_Array_List* program) {
  402. const char* ext = ".expanded";
  403. char* newName = (char*)calloc(4 + strlen(file_name), sizeof(char));
  404. strcpy(newName, file_name);
  405. strcat(newName, ext);
  406. FILE *f = fopen(newName, "w");
  407. if (f == NULL) {
  408. printf("Error opening file!\n");
  409. exit(1);
  410. }
  411. for (int i = 0; i < program->next_index; ++i) {
  412. // a macro will parse as nil for now, so we skip those
  413. if (program->data[i]->type == Lisp_Object_Type::Nil)
  414. continue;
  415. fprint(f, program->data[i]);
  416. fprintf(f, "\n\n");
  417. }
  418. fclose(f);
  419. }
  420. Lisp_Object_Array_List* parse_program(const char* file_name, char* text) {
  421. parser_file = (char*)malloc(strlen(file_name) * sizeof(char) + 1);
  422. strcpy((char *)parser_file, file_name);
  423. parser_line = 1;
  424. parser_col = 0;
  425. Lisp_Object_Array_List* program = create_Lisp_Object_array_list(16);
  426. int index_in_text = 0;
  427. while (text[index_in_text] != '\0') {
  428. switch (text[index_in_text]) {
  429. case '(': {
  430. Lisp_Object* parsed;
  431. try {
  432. parsed = parse_expression(text, &index_in_text);
  433. }
  434. append_to_Lisp_Object_array_list(program, parsed);
  435. } break;
  436. case ';':
  437. case ' ':
  438. case '\t':
  439. case '\n':
  440. case '\r': {
  441. eat_until_code(text, &index_in_text);
  442. } break;
  443. default:
  444. /* syntax error */
  445. create_error(Error_Type::Syntax_Error, create_source_code_location(parser_file, parser_line, parser_col));
  446. return nullptr;
  447. }
  448. }
  449. write_expanded_file(file_name, program);
  450. return program;
  451. }
  452. #undef inject_scl
  453. }