Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.
 
 
 
 
 
 

306 строки
9.3 KiB

  1. Ast_Node_Array_List* create_Ast_Node_Array_List(int initial_length) {
  2. Ast_Node_Array_List* ret = new (Ast_Node_Array_List);
  3. // create one with 16 entries first
  4. ret->length = initial_length;
  5. ret->data = (struct Ast_Node**)malloc(initial_length * sizeof(struct Ast_Node));
  6. ret->next_index = 0;
  7. return ret;
  8. }
  9. void append_to_Ast_Node_Array_List(Ast_Node_Array_List* list, struct Ast_Node* node) {
  10. if (list->next_index == list->length) {
  11. list->length *= 2;
  12. list->data = (struct Ast_Node**)realloc(list->data, list->length * sizeof(struct Ast_Node));
  13. }
  14. list->data[list->next_index++] = node;
  15. }
  16. void eat_comment_line(char* text, int* index_in_text) {
  17. // safety check if we are actually starting a comment here
  18. if (text[*index_in_text] != ';')
  19. return;
  20. // eat the comment line
  21. do {
  22. ++(*index_in_text);
  23. } while (text[(*index_in_text)] != '\n' &&
  24. text[(*index_in_text)] != '\r' &&
  25. text[(*index_in_text)] != '\0');
  26. }
  27. void eat_whitespace(char* text, int* index_in_text) {
  28. // skip whitespaces
  29. while (text[(*index_in_text)] == ' ' ||
  30. text[(*index_in_text)] == '\t' ||
  31. text[(*index_in_text)] == '\n' ||
  32. text[(*index_in_text)] == '\r') {
  33. ++(*index_in_text);
  34. }
  35. }
  36. void eat_until_code(char* text, int* index_in_text) {
  37. int position_before;
  38. do {
  39. position_before = *index_in_text;
  40. eat_comment_line(text, index_in_text);
  41. eat_whitespace(text, index_in_text);
  42. } while (position_before != *index_in_text);
  43. }
  44. char* read_atom(char* text, int* index_in_text) {
  45. int atom_length = 0;
  46. while (text[*index_in_text+atom_length] != ' ' &&
  47. text[*index_in_text+atom_length] != ')' &&
  48. text[*index_in_text+atom_length] != '(' &&
  49. text[*index_in_text+atom_length] != '\0' &&
  50. text[*index_in_text+atom_length] != '\n' &&
  51. text[*index_in_text+atom_length] != '\r' &&
  52. text[*index_in_text+atom_length] != '\t')
  53. {
  54. ++atom_length;
  55. }
  56. // let's mark the end of the atom there quickly, so the string can
  57. // be copied from there easily and then put the char that was
  58. // before there back
  59. char before = text[*index_in_text+atom_length];
  60. text[*index_in_text+atom_length] = '\0';
  61. // get the atom
  62. char* atom = (char*)malloc(atom_length*sizeof(char)+1); // plus null char
  63. strcpy(atom, text+(*index_in_text));
  64. // restore the original string
  65. text[*index_in_text+atom_length] = before;
  66. // update the index to point to the character after the atom
  67. // ended
  68. *index_in_text += atom_length;
  69. return atom;
  70. }
  71. Ast_Node* parse_number(char* text, int* index_in_text) {
  72. double number;
  73. char* str_number = read_atom(text, index_in_text);
  74. sscanf(str_number, "%lf", &number);
  75. return create_ast_node_number(number);
  76. }
  77. Ast_Node* parse_keyword(char* text, int* index_in_text) {
  78. // we are now on the colon
  79. ++(*index_in_text);
  80. char* str_keyword = read_atom(text, index_in_text);
  81. return create_ast_node_keyword(str_keyword);
  82. }
  83. Ast_Node* parse_symbol(char* text, int* index_in_text) {
  84. // we are now at the first char of the symbol
  85. char* str_symbol = read_atom(text, index_in_text);
  86. return create_ast_node_symbol(str_symbol);
  87. }
  88. Ast_Node* parse_string(char* text, int* index_in_text) {
  89. // the first character is the '"'
  90. ++(*index_in_text);
  91. // now we are at the first letter, if this is the closing '"' then
  92. // it's easy
  93. if (text[*index_in_text] == '"') {
  94. char* str = new(char);
  95. *str = '\0';
  96. return create_ast_node_string(str, 0);
  97. }
  98. // okay so the first letter was not actually closing the string...
  99. int string_length = 0;
  100. while (text[*index_in_text+string_length] != '"' ||
  101. text[*index_in_text+string_length] == '\\')
  102. {
  103. ++string_length;
  104. }
  105. // we found the end of the string
  106. text[*index_in_text+string_length] = '\0';
  107. char* string = (char*)malloc(string_length*sizeof(char)+1); // plus null char
  108. if (!unescape_string(text+(*index_in_text))) {
  109. create_error(Error_Type_Unknown_Error, create_ast_node_nil());
  110. return nullptr;
  111. }
  112. strcpy(string, text+(*index_in_text));
  113. /* manually copy to parse control sequences correctly */
  114. /* int temp_index = 0; */
  115. /* while (text+(temp_index+(*index_in_text)) != '\0') { */
  116. /* string[temp_index++] = text[temp_index+(*index_in_text)]; */
  117. /* } */
  118. /* string[temp_index++] = '\0'; */
  119. text[*index_in_text+string_length] = '"';
  120. *index_in_text += string_length +1; // plus one because we want to
  121. // go after the quotes
  122. return create_ast_node_string(string, string_length);
  123. }
  124. Ast_Node* parse_atom(char* text, int* index_in_text) {
  125. // numbers
  126. if ((text[*index_in_text] <= 57 && // if number
  127. text[*index_in_text] >= 48)
  128. ||
  129. ((text[*index_in_text] == '+' || // or if sign and then number
  130. text[*index_in_text] == '-')
  131. &&
  132. (text[*index_in_text +1] <= 57 &&
  133. text[*index_in_text +1] >= 48))
  134. ||
  135. ((text[*index_in_text] == '.') // or if . and then number
  136. &&
  137. (text[*index_in_text +1] <= 57 &&
  138. text[*index_in_text +1] >= 48)))
  139. return parse_number(text, index_in_text);
  140. // keywords
  141. if (text[*index_in_text] == ':')
  142. return parse_keyword(text, index_in_text);
  143. // strings
  144. if (text[*index_in_text] == '"')
  145. return parse_string(text, index_in_text);
  146. return parse_symbol(text, index_in_text);
  147. }
  148. Ast_Node* parse_expression(char* text, int* index_in_text) {
  149. if (text[*index_in_text] == '\'') {
  150. ++(*index_in_text);
  151. Ast_Node* result;
  152. if (text[*index_in_text] == '(' || text[*index_in_text] == '\'' ) {
  153. try {
  154. result = parse_expression(text, index_in_text);
  155. }
  156. } else {
  157. try {
  158. result = parse_atom(text, index_in_text);
  159. }
  160. }
  161. return create_ast_node_pair(
  162. create_ast_node_symbol("quote"),
  163. create_ast_node_pair(result, create_ast_node_nil()));
  164. }
  165. ++(*index_in_text);
  166. eat_whitespace(text, index_in_text);
  167. // if there was actually nothing in the list, return nil
  168. if (text[(*index_in_text)] == ')') {
  169. ++(*index_in_text);
  170. return create_ast_node_nil();
  171. }
  172. // okay there is something
  173. Ast_Node* head = new(Ast_Node);
  174. head->type = Ast_Node_Type_Pair;
  175. head->value.pair = new(Pair);
  176. Ast_Node* expression = head;
  177. while (true) {
  178. if (text[(*index_in_text)] == '(' || text[(*index_in_text)] == '\'' ) {
  179. try {
  180. head->value.pair->first = parse_expression(text, index_in_text);
  181. }
  182. } else {
  183. try {
  184. head->value.pair->first = parse_atom(text, index_in_text);
  185. }
  186. }
  187. eat_until_code(text, index_in_text);
  188. if (text[(*index_in_text)] == '\0') {
  189. create_error(Error_Type_Unexpected_Eof, expression);
  190. return nullptr;
  191. }
  192. if (text[(*index_in_text)] == ')') {
  193. head->value.pair->rest = create_ast_node_nil();
  194. ++(*index_in_text);
  195. break;
  196. } else if (text[(*index_in_text)] == '.') {
  197. ++(*index_in_text);
  198. eat_until_code(text, index_in_text);
  199. if (text[(*index_in_text)] == '(')
  200. head->value.pair->rest = parse_expression(text, index_in_text);
  201. else
  202. head->value.pair->rest = parse_atom(text, index_in_text);
  203. eat_until_code(text, index_in_text);
  204. if (text[(*index_in_text)] != ')')
  205. create_error(Error_Type_Syntax_Error, nullptr);
  206. ++(*index_in_text);
  207. break;
  208. } else {
  209. head->value.pair->rest = create_ast_node_pair(nullptr, nullptr);
  210. head = head->value.pair->rest;
  211. }
  212. }
  213. return expression;
  214. }
  215. Ast_Node* parse_single_expression(char* text) {
  216. int index_in_text = 0;
  217. Ast_Node* result;
  218. eat_until_code(text, &index_in_text);
  219. if (text[(index_in_text)] == '(' || text[(index_in_text)] == '\'' )
  220. try {
  221. result = parse_expression(text, &index_in_text);
  222. }
  223. else
  224. try {
  225. result = parse_atom(text, &index_in_text);
  226. }
  227. eat_until_code(text, &index_in_text);
  228. if (text[(index_in_text)] == '\0')
  229. return result;
  230. create_error(Error_Type_Trailing_Garbage, create_ast_node_nil());
  231. return nullptr;
  232. }
  233. Ast_Node_Array_List* parse_program(char* text) {
  234. Ast_Node_Array_List* program = create_Ast_Node_Array_List(16);
  235. int index_in_text = 0;
  236. while (text[index_in_text] != '\0') {
  237. switch (text[index_in_text]) {
  238. case '(': {
  239. Ast_Node* parsed;
  240. try {
  241. parsed = parse_expression(text, &index_in_text);
  242. }
  243. append_to_Ast_Node_Array_List(program, parsed);
  244. } break;
  245. case ';':
  246. case ' ':
  247. case '\t':
  248. case '\n':
  249. case '\r': {
  250. eat_until_code(text, &index_in_text);
  251. } break;
  252. default:
  253. /* syntax error */
  254. create_error(Error_Type_Syntax_Error, create_ast_node_nil());
  255. return nullptr;
  256. }
  257. }
  258. return program;
  259. }