src/debugger/parser.c (view raw)
1#include "parser.h"
2
3#include "util/string.h"
4
5static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
6 struct LexVector* lvNext = malloc(sizeof(struct LexVector));
7 lvNext->token.type = TOKEN_OPERATOR_TYPE;
8 switch (operator) {
9 case '+':
10 lvNext->token.operatorValue = OP_ADD;
11 break;
12 case '-':
13 lvNext->token.operatorValue = OP_SUBTRACT;
14 break;
15 case '*':
16 lvNext->token.operatorValue = OP_MULTIPLY;
17 break;
18 case '/':
19 lvNext->token.operatorValue = OP_DIVIDE;
20 break;
21 default:
22 lvNext->token.type = TOKEN_ERROR_TYPE;
23 break;
24 }
25 lvNext->next = lv->next;
26 lv->next = lvNext;
27 lv = lvNext;
28 lvNext = malloc(sizeof(struct LexVector));
29 lvNext->next = lv->next;
30 lvNext->token.type = TOKEN_ERROR_TYPE;
31 lv->next = lvNext;
32 return lvNext;
33}
34
35size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
36 if (!string || length < 1) {
37 return 0;
38 }
39
40 uint32_t next = 0;
41 size_t adjusted = 0;
42
43 enum LexState state = LEX_ROOT;
44 const char* tokenStart = 0;
45 struct LexVector* lvNext;
46
47 while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
48 char token = string[0];
49 ++string;
50 ++adjusted;
51 --length;
52 switch (state) {
53 case LEX_ROOT:
54 tokenStart = string - 1;
55 switch (token) {
56 case '1':
57 case '2':
58 case '3':
59 case '4':
60 case '5':
61 case '6':
62 case '7':
63 case '8':
64 case '9':
65 state = LEX_EXPECT_DECIMAL;
66 next = token - '0';
67 break;
68 case '0':
69 state = LEX_EXPECT_PREFIX;
70 next = 0;
71 break;
72 case '$':
73 state = LEX_EXPECT_HEX;
74 next = 0;
75 break;
76 case '(':
77 state = LEX_ROOT;
78 lv->token.type = TOKEN_OPEN_PAREN_TYPE;
79 lvNext = malloc(sizeof(struct LexVector));
80 lvNext->next = lv->next;
81 lvNext->token.type = TOKEN_ERROR_TYPE;
82 lv->next = lvNext;
83 lv = lvNext;
84 break;
85 default:
86 if (tolower(token) >= 'a' && tolower(token <= 'z')) {
87 state = LEX_EXPECT_IDENTIFIER;
88 } else {
89 state = LEX_ERROR;
90 }
91 break;
92 };
93 break;
94 case LEX_EXPECT_IDENTIFIER:
95 switch (token) {
96 case '+':
97 case '-':
98 case '*':
99 case '/':
100 lv->token.type = TOKEN_IDENTIFIER_TYPE;
101 lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
102 lv = _lexOperator(lv, token);
103 state = LEX_ROOT;
104 break;
105 case ')':
106 lv->token.type = TOKEN_IDENTIFIER_TYPE;
107 lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
108 state = LEX_EXPECT_OPERATOR;
109 break;
110 default:
111 break;
112 }
113 break;
114 case LEX_EXPECT_BINARY:
115 switch (token) {
116 case '0':
117 case '1':
118 // TODO: handle overflow
119 next <<= 1;
120 next += token - '0';
121 break;
122 case '+':
123 case '-':
124 case '*':
125 case '/':
126 lv->token.type = TOKEN_UINT_TYPE;
127 lv->token.uintValue = next;
128 lv = _lexOperator(lv, token);
129 state = LEX_ROOT;
130 break;
131 case ')':
132 lv->token.type = TOKEN_UINT_TYPE;
133 lv->token.uintValue = next;
134 state = LEX_EXPECT_OPERATOR;
135 break;
136 default:
137 state = LEX_ERROR;
138 break;
139 }
140 break;
141 case LEX_EXPECT_DECIMAL:
142 switch (token) {
143 case '0':
144 case '1':
145 case '2':
146 case '3':
147 case '4':
148 case '5':
149 case '6':
150 case '7':
151 case '8':
152 case '9':
153 // TODO: handle overflow
154 next *= 10;
155 next += token - '0';
156 break;
157 case '+':
158 case '-':
159 case '*':
160 case '/':
161 lv->token.type = TOKEN_UINT_TYPE;
162 lv->token.uintValue = next;
163 lv = _lexOperator(lv, token);
164 state = LEX_ROOT;
165 break;
166 case ')':
167 lv->token.type = TOKEN_UINT_TYPE;
168 lv->token.uintValue = next;
169 state = LEX_EXPECT_OPERATOR;
170 break;
171 default:
172 state = LEX_ERROR;
173 }
174 break;
175 case LEX_EXPECT_HEX:
176 switch (token) {
177 case '0':
178 case '1':
179 case '2':
180 case '3':
181 case '4':
182 case '5':
183 case '6':
184 case '7':
185 case '8':
186 case '9':
187 // TODO: handle overflow
188 next *= 16;
189 next += token - '0';
190 break;
191 case 'A':
192 case 'B':
193 case 'C':
194 case 'D':
195 case 'E':
196 case 'F':
197 // TODO: handle overflow
198 next *= 16;
199 next += token - 'A' + 10;
200 break;
201 case 'a':
202 case 'b':
203 case 'c':
204 case 'd':
205 case 'e':
206 case 'f':
207 // TODO: handle overflow
208 next *= 16;
209 next += token - 'a' + 10;
210 break;
211 case '+':
212 case '-':
213 case '*':
214 case '/':
215 lv->token.type = TOKEN_UINT_TYPE;
216 lv->token.uintValue = next;
217 lv = _lexOperator(lv, token);
218 state = LEX_ROOT;
219 break;
220 case ')':
221 lv->token.type = TOKEN_UINT_TYPE;
222 lv->token.uintValue = next;
223 state = LEX_EXPECT_OPERATOR;
224 break;
225 default:
226 state = LEX_ERROR;
227 break;
228 }
229 break;
230 case LEX_EXPECT_PREFIX:
231 switch (token) {
232 case 'X':
233 case 'x':
234 next = 0;
235 state = LEX_EXPECT_HEX;
236 break;
237 case 'B':
238 case 'b':
239 next = 0;
240 state = LEX_EXPECT_BINARY;
241 break;
242 case '+':
243 case '-':
244 case '*':
245 case '/':
246 lv->token.type = TOKEN_UINT_TYPE;
247 lv->token.uintValue = next;
248 lv = _lexOperator(lv, token);
249 state = LEX_ROOT;
250 break;
251 case ')':
252 lv->token.type = TOKEN_UINT_TYPE;
253 lv->token.uintValue = next;
254 state = LEX_EXPECT_OPERATOR;
255 break;
256 case '0':
257 case '1':
258 case '2':
259 case '3':
260 case '4':
261 case '5':
262 case '6':
263 case '7':
264 case '8':
265 case '9':
266 next = token - '0';
267 state = LEX_EXPECT_DECIMAL;
268 break;
269 default:
270 state = LEX_ERROR;
271 }
272 break;
273 case LEX_EXPECT_OPERATOR:
274 switch (token) {
275 case '+':
276 case '-':
277 case '*':
278 case '/':
279 lvNext = malloc(sizeof(struct LexVector));
280 lvNext->next = lv->next;
281 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
282 lv->next = lvNext;
283 lv = _lexOperator(lv->next, token);
284 state = LEX_ROOT;
285 break;
286 default:
287 state = LEX_ERROR;
288 }
289 break;
290 case LEX_ERROR:
291 // This shouldn't be reached
292 break;
293 }
294 }
295
296 switch (state) {
297 case LEX_EXPECT_BINARY:
298 case LEX_EXPECT_DECIMAL:
299 case LEX_EXPECT_HEX:
300 case LEX_EXPECT_PREFIX:
301 lv->token.type = TOKEN_UINT_TYPE;
302 lv->token.uintValue = next;
303 break;
304 case LEX_EXPECT_IDENTIFIER:
305 lv->token.type = TOKEN_IDENTIFIER_TYPE;
306 lv->token.identifierValue = strndup(tokenStart, string - tokenStart);
307 break;
308 case LEX_EXPECT_OPERATOR:
309 lvNext = malloc(sizeof(struct LexVector));
310 lvNext->next = lv->next;
311 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
312 lv->next = lvNext;
313 break;
314 case LEX_ERROR:
315 default:
316 lv->token.type = TOKEN_ERROR_TYPE;
317 break;
318 }
319 return adjusted;
320}
321
322static const int _operatorPrecedence[] = {
323 2,
324 1,
325 1,
326 0,
327 0
328};
329
330static struct ParseTree* _parseTreeCreate() {
331 struct ParseTree* tree = malloc(sizeof(struct ParseTree));
332 tree->token.type = TOKEN_ERROR_TYPE;
333 tree->rhs = 0;
334 tree->lhs = 0;
335 return tree;
336}
337
338static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
339 struct ParseTree* newTree = 0;
340 while (lv) {
341 int newPrecedence;
342 switch (lv->token.type) {
343 case TOKEN_IDENTIFIER_TYPE:
344 case TOKEN_UINT_TYPE:
345 if (tree->token.type == TOKEN_ERROR_TYPE) {
346 tree->token = lv->token;
347 lv = lv->next;
348 } else {
349 tree->token.type = TOKEN_ERROR_TYPE;
350 return 0;
351 }
352 break;
353 case TOKEN_OPEN_PAREN_TYPE:
354 lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
355 break;
356 case TOKEN_CLOSE_PAREN_TYPE:
357 if (openParens <= 0) {
358 tree->token.type = TOKEN_ERROR_TYPE;
359 return 0;
360 }
361 return lv->next;
362 break;
363 case TOKEN_OPERATOR_TYPE:
364 newPrecedence = _operatorPrecedence[lv->token.operatorValue];
365 if (newPrecedence < precedence) {
366 newTree = _parseTreeCreate();
367 *newTree = *tree;
368 tree->lhs = newTree;
369 tree->rhs = _parseTreeCreate();
370 tree->token = lv->token;
371 lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
372 if (tree->token.type == TOKEN_ERROR_TYPE) {
373 tree->token.type = TOKEN_ERROR_TYPE;
374 }
375 } else {
376 return lv;
377 }
378 break;
379 case TOKEN_ERROR_TYPE:
380 tree->token.type = TOKEN_ERROR_TYPE;
381 return 0;
382 }
383 }
384
385 return 0;
386}
387
388void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
389 if (!tree) {
390 return;
391 }
392
393 tree->token.type = TOKEN_ERROR_TYPE;
394 tree->lhs = 0;
395 tree->rhs = 0;
396
397 _parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
398}
399
400void lexFree(struct LexVector* lv) {
401 while (lv) {
402 struct LexVector* lvNext = lv->next;
403 free(lv);
404 lv = lvNext;
405 }
406}
407
408void parseFree(struct ParseTree* tree) {
409 if (!tree) {
410 return;
411 }
412
413 parseFree(tree->lhs);
414 parseFree(tree->rhs);
415
416 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
417 free(tree->token.identifierValue);
418 }
419 free(tree);
420}