You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

399 line
14 KiB

  1. namespace Parser {
  2. String* standard_in;
  3. String* parser_file;
  4. int parser_line;
  5. int parser_col;
  6. proc eat_comment_line(char* text, int* index_in_text) -> void {
  7. // safety check if we are actually starting a comment here
  8. if (text[*index_in_text] != ';')
  9. return;
  10. // eat the comment line
  11. do {
  12. ++(*index_in_text);
  13. ++parser_col;
  14. } while (text[(*index_in_text)] != '\n' &&
  15. text[(*index_in_text)] != '\r' &&
  16. text[(*index_in_text)] != '\0');
  17. }
  18. proc step_char(char* text, int* index_in_text, int steps = 1) {
  19. for (int i = 0; i < steps; ++i) {
  20. if (text[(*index_in_text)] == '\n') {
  21. ++parser_line;
  22. parser_col = 0;
  23. }
  24. ++parser_col;
  25. ++(*index_in_text);
  26. }
  27. }
  28. proc eat_whitespace(char* text, int* index_in_text) -> void {
  29. // skip whitespaces
  30. while (text[(*index_in_text)] == ' ' ||
  31. text[(*index_in_text)] == '\t' ||
  32. text[(*index_in_text)] == '\n' ||
  33. text[(*index_in_text)] == '\r')
  34. {
  35. step_char(text, index_in_text);
  36. }
  37. }
  38. proc eat_until_code(char* text, int* index_in_text) -> void {
  39. profile_this();
  40. int position_before;
  41. do {
  42. position_before = *index_in_text;
  43. eat_comment_line(text, index_in_text);
  44. eat_whitespace(text, index_in_text);
  45. } while (position_before != *index_in_text);
  46. }
  47. proc step_char_and_eat_until_code(char* text, int* index_in_text) {
  48. step_char(text, index_in_text);
  49. eat_until_code(text, index_in_text);
  50. }
  51. proc parse_fancy_delimiter(char* text, int* index_in_text, char l_delimiter, char r_delimiter, Lisp_Object* first_elem) -> Lisp_Object* {
  52. profile_this();
  53. if (text[*index_in_text] != l_delimiter) {
  54. create_parsing_error("a fancy cannot be parsed here");
  55. return nullptr;
  56. }
  57. Lisp_Object* ret;
  58. Lisp_Object* head;
  59. try ret = Memory::create_lisp_object_pair(first_elem, Memory::nil);
  60. head = ret;
  61. step_char(text, index_in_text);
  62. eat_until_code(text, index_in_text);
  63. while (text[*index_in_text] != r_delimiter) {
  64. Lisp_Object* element;
  65. try element = parse_expression(text, index_in_text);
  66. try head->value.pair.rest = Memory::create_lisp_object_pair(element, Memory::nil);
  67. head = head->value.pair.rest;
  68. eat_until_code(text, index_in_text);
  69. }
  70. step_char(text, index_in_text);
  71. return ret;
  72. }
  73. proc get_atom_text_length(char* text, int* index_in_text) -> int {
  74. int atom_length = 0;
  75. while (text[*index_in_text+atom_length] != ' ' &&
  76. text[*index_in_text+atom_length] != ')' &&
  77. text[*index_in_text+atom_length] != '(' &&
  78. text[*index_in_text+atom_length] != '[' &&
  79. text[*index_in_text+atom_length] != ']' &&
  80. text[*index_in_text+atom_length] != '{' &&
  81. text[*index_in_text+atom_length] != '}' &&
  82. text[*index_in_text+atom_length] != '\0' &&
  83. text[*index_in_text+atom_length] != '\n' &&
  84. text[*index_in_text+atom_length] != '\r' &&
  85. text[*index_in_text+atom_length] != '\t')
  86. {
  87. ++atom_length;
  88. }
  89. return atom_length;
  90. }
  91. proc parse_number(char* text, int* index_in_text) -> Lisp_Object* {
  92. Lisp_Object* ret;
  93. try ret = Memory::create_lisp_object(0.0);
  94. sscanf(text+*index_in_text, "%lf", &ret->value.number);
  95. int atom_length = get_atom_text_length(text, index_in_text);
  96. step_char(text, index_in_text, atom_length);
  97. return ret;
  98. }
  99. proc parse_symbol_or_keyword(char* text, int* index_in_text) -> Lisp_Object* {
  100. bool keyword = false;
  101. if (text[*index_in_text] == ':') {
  102. keyword = true;
  103. step_char(text, index_in_text);
  104. }
  105. int atom_length = get_atom_text_length(text, index_in_text);
  106. char orig = text[*index_in_text+atom_length];
  107. text[*index_in_text+atom_length] = '\0';
  108. String* str_keyword;
  109. Lisp_Object* ret;
  110. try str_keyword = Memory::create_string("", atom_length);
  111. strcpy(&str_keyword->data, text+*index_in_text);
  112. if (keyword) {
  113. try ret = Memory::get_keyword(str_keyword);
  114. } else {
  115. try ret = Memory::get_symbol(str_keyword);
  116. }
  117. text[*index_in_text+atom_length] = orig;
  118. step_char(text, index_in_text, atom_length);
  119. return ret;
  120. }
  121. proc parse_string(char* text, int* index_in_text) -> Lisp_Object* {
  122. // the first character is the '"'
  123. step_char(text, index_in_text);
  124. // now we are at the first letter, if this is the closing '"' then
  125. // it's easy
  126. if (text[*index_in_text] == '"') {
  127. Lisp_Object* ret;
  128. try ret = Memory::create_lisp_object(Memory::create_string("", 0));
  129. // inject_scl(ret);
  130. // plus one because we want to go after the quotes
  131. step_char(text, index_in_text);
  132. return ret;
  133. }
  134. // okay so the first letter was not actually closing the string...
  135. int string_length = 0;
  136. bool escaping = false;
  137. while (escaping || text[*index_in_text+string_length] != '"') {
  138. if (escaping) {
  139. escaping = false;
  140. }
  141. else
  142. if (text[*index_in_text+string_length] == '\\')
  143. escaping = true;
  144. ++string_length;
  145. }
  146. // we found the end of the string
  147. text[*index_in_text+string_length] = '\0';
  148. // NOTE(Felix): Tactic: Through unescaping the string will
  149. // only get shorter, so we replace it inplace and later jump
  150. // to the original end of the string.
  151. int new_len;
  152. try new_len = unescape_string(text+(*index_in_text));
  153. String* string = Memory::create_string("", new_len);
  154. strcpy(&string->data, text+(*index_in_text));
  155. // printf("------ %s\n", &string->data);
  156. text[*index_in_text+string_length] = '"';
  157. // plus one because we want to go after the quotes
  158. step_char(text, index_in_text, string_length+1);
  159. Lisp_Object* ret;
  160. try ret = Memory::create_lisp_object(string);
  161. // inject_scl(ret);
  162. return ret;
  163. }
  164. proc parse_atom(char* text, int* index_in_text) -> Lisp_Object* {
  165. profile_this();
  166. Lisp_Object* ret;
  167. // numbers
  168. if ((text[*index_in_text] <= 57 && // if number
  169. text[*index_in_text] >= 48)
  170. ||
  171. ((text[*index_in_text] == '+' || // or if sign and then number
  172. text[*index_in_text] == '-')
  173. &&
  174. (text[*index_in_text +1] <= 57 &&
  175. text[*index_in_text +1] >= 48))
  176. ||
  177. ((text[*index_in_text] == '.') // or if . and then number
  178. &&
  179. (text[*index_in_text +1] <= 57 &&
  180. text[*index_in_text +1] >= 48)))
  181. {
  182. try ret = parse_number(text, index_in_text);
  183. }
  184. else if (text[*index_in_text] == '"')
  185. try ret = parse_string(text, index_in_text);
  186. else
  187. try ret = parse_symbol_or_keyword(text, index_in_text);
  188. return ret;
  189. }
  190. proc parse_list(char* text, int* index_in_text) -> Lisp_Object* {
  191. profile_this();
  192. if (text[*index_in_text] != '(') {
  193. create_parsing_error("a list cannot be parsed here");
  194. return nullptr;
  195. }
  196. step_char_and_eat_until_code(text, index_in_text);
  197. if (text[*index_in_text] == ')') {
  198. step_char(text, index_in_text);
  199. return Memory::nil;
  200. }
  201. Lisp_Object* first_elem;
  202. Lisp_Object* ret;
  203. Lisp_Object* head;
  204. try first_elem = parse_expression(text, index_in_text);
  205. try ret = Memory::create_lisp_object_pair(first_elem, Memory::nil);
  206. head = ret;
  207. eat_until_code(text, index_in_text);
  208. while (text[*index_in_text] != ')') {
  209. Lisp_Object* element;
  210. if (text[*index_in_text+0] == '.' &&
  211. text[*index_in_text+1] == ' ')
  212. {
  213. step_char(text, index_in_text, 2);
  214. try element = parse_expression(text, index_in_text);
  215. head->value.pair.rest = element;
  216. eat_until_code(text, index_in_text);
  217. if (text[*index_in_text] != ')') {
  218. create_parsing_error("expected the list to end after the dotted end.");
  219. return nullptr;
  220. }
  221. step_char(text, index_in_text);
  222. return ret;
  223. }
  224. try element = parse_expression(text, index_in_text);
  225. try head->value.pair.rest = Memory::create_lisp_object_pair(element, Memory::nil);
  226. head = head->value.pair.rest;
  227. eat_until_code(text, index_in_text);
  228. }
  229. step_char(text, index_in_text);
  230. return ret;
  231. }
  232. proc maybe_expand_short_form(char* text, int* index_in_text) -> Lisp_Object* {
  233. profile_this();
  234. Lisp_Object* vector_sym = Memory::get_symbol("vector");
  235. Lisp_Object* hash_map_sym = Memory::get_symbol("hash-map");
  236. Lisp_Object* quote_sym = Memory::get_symbol("quote");
  237. Lisp_Object* quasiquote_sym = Memory::get_symbol("quasiquote");
  238. Lisp_Object* unquote_sym = Memory::get_symbol("unquote");
  239. Lisp_Object* unquote_splicing_sym = Memory::get_symbol("unquote-splicing");
  240. Lisp_Object* ret = nullptr;
  241. Lisp_Object* expr;
  242. switch (text[*index_in_text]) {
  243. case '\'': {
  244. // quote
  245. step_char_and_eat_until_code(text, index_in_text);
  246. try expr = parse_expression(text, index_in_text);
  247. try ret = Memory::create_lisp_object_pair(expr, Memory::nil);
  248. try ret = Memory::create_lisp_object_pair(quote_sym, ret);
  249. } break;
  250. case '`': {
  251. // quasiquote
  252. step_char_and_eat_until_code(text, index_in_text);
  253. try expr = parse_expression(text, index_in_text);
  254. try ret = Memory::create_lisp_object_pair(expr, Memory::nil);
  255. try ret = Memory::create_lisp_object_pair(quasiquote_sym, ret);
  256. } break;
  257. case ',': {
  258. step_char_and_eat_until_code(text, index_in_text);
  259. if (text[*index_in_text] == '@') {
  260. // unquote-splicing
  261. step_char_and_eat_until_code(text, index_in_text);
  262. try expr = parse_expression(text, index_in_text);
  263. try ret = Memory::create_lisp_object_pair(expr, Memory::nil);
  264. try ret = Memory::create_lisp_object_pair(unquote_splicing_sym, ret);
  265. } else {
  266. // unquote
  267. try expr = parse_expression(text, index_in_text);
  268. try ret = Memory::create_lisp_object_pair(expr, Memory::nil);
  269. try ret = Memory::create_lisp_object_pair(unquote_sym, ret);
  270. }
  271. } break;
  272. case '[': {
  273. // vector
  274. try ret = parse_fancy_delimiter(text, index_in_text, '[', ']', vector_sym);
  275. } break;
  276. case '{': {
  277. // hashmap
  278. try ret = parse_fancy_delimiter(text, index_in_text, '{', '}', hash_map_sym);
  279. } break;
  280. default: break;
  281. }
  282. return ret;
  283. }
  284. proc parse_expression(char* text, int* index_in_text) -> Lisp_Object* {
  285. profile_this();
  286. Lisp_Object* ret;
  287. eat_until_code(text, index_in_text);
  288. try ret = maybe_expand_short_form(text, index_in_text);
  289. if (ret)
  290. return ret;
  291. if (text[*index_in_text] == '(') {
  292. try ret = parse_list(text, index_in_text);
  293. } else {
  294. try ret = parse_atom(text, index_in_text);
  295. }
  296. return ret;
  297. }
  298. proc parse_single_expression(wchar_t* text) -> Lisp_Object* {
  299. char* res = wchar_to_char(text);
  300. defer {free(res);};
  301. return parse_single_expression(res);
  302. }
  303. proc parse_single_expression(char* text) -> Lisp_Object* {
  304. parser_file = standard_in;
  305. parser_line = 1;
  306. parser_col = 1;
  307. int index_in_text = 0;
  308. Lisp_Object* ret;
  309. try ret = parse_expression(text, &index_in_text);
  310. return ret;
  311. }
  312. proc parse_program(String* file_name, char* text) -> Array_List<Lisp_Object*>* {
  313. profile_this();
  314. parser_file = file_name;
  315. parser_line = 1;
  316. parser_col = 0;
  317. Array_List<Lisp_Object*>* program = new Array_List<Lisp_Object*>;
  318. int index_in_text = 0;
  319. Lisp_Object* parsed;
  320. eat_until_code(text, &index_in_text);
  321. while (text[index_in_text] != '\0') {
  322. try parsed = parse_expression(text, &index_in_text);
  323. program->append(parsed);
  324. eat_until_code(text, &index_in_text);
  325. }
  326. return program;
  327. }
  328. }