src/debugger/parser.c (view raw)
1/* Copyright (c) 2013-2014 Jeffrey Pfau
2 *
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6#include <mgba/internal/debugger/parser.h>
7
8#include <mgba-util/string.h>
9
10enum LexState {
11 LEX_ERROR = -1,
12 LEX_ROOT = 0,
13 LEX_EXPECT_IDENTIFIER,
14 LEX_EXPECT_BINARY,
15 LEX_EXPECT_DECIMAL,
16 LEX_EXPECT_HEX,
17 LEX_EXPECT_PREFIX,
18 LEX_EXPECT_OPERATOR
19};
20
21static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
22 struct LexVector* lvNext = malloc(sizeof(struct LexVector));
23 lvNext->token.type = TOKEN_OPERATOR_TYPE;
24 switch (operator) {
25 case '+':
26 lvNext->token.operatorValue = OP_ADD;
27 break;
28 case '-':
29 lvNext->token.operatorValue = OP_SUBTRACT;
30 break;
31 case '*':
32 lvNext->token.operatorValue = OP_MULTIPLY;
33 break;
34 case '/':
35 lvNext->token.operatorValue = OP_DIVIDE;
36 break;
37 case '&':
38 lvNext->token.operatorValue = OP_AND;
39 break;
40 case '|':
41 lvNext->token.operatorValue = OP_OR;
42 break;
43 case '^':
44 lvNext->token.operatorValue = OP_XOR;
45 break;
46 default:
47 lvNext->token.type = TOKEN_ERROR_TYPE;
48 break;
49 }
50 lvNext->next = lv->next;
51 lv->next = lvNext;
52 lv = lvNext;
53 lvNext = malloc(sizeof(struct LexVector));
54 lvNext->next = lv->next;
55 lvNext->token.type = TOKEN_ERROR_TYPE;
56 lv->next = lvNext;
57 return lvNext;
58}
59
60size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
61 if (!string || length < 1) {
62 return 0;
63 }
64
65 uint32_t next = 0;
66 size_t adjusted = 0;
67
68 enum LexState state = LEX_ROOT;
69 const char* tokenStart = 0;
70 struct LexVector* lvNext;
71
72 while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
73 char token = string[0];
74 ++string;
75 ++adjusted;
76 --length;
77 switch (state) {
78 case LEX_ROOT:
79 tokenStart = string - 1;
80 switch (token) {
81 case '1':
82 case '2':
83 case '3':
84 case '4':
85 case '5':
86 case '6':
87 case '7':
88 case '8':
89 case '9':
90 state = LEX_EXPECT_DECIMAL;
91 next = token - '0';
92 break;
93 case '0':
94 state = LEX_EXPECT_PREFIX;
95 next = 0;
96 break;
97 case '$':
98 state = LEX_EXPECT_HEX;
99 next = 0;
100 break;
101 case '(':
102 state = LEX_ROOT;
103 lv->token.type = TOKEN_OPEN_PAREN_TYPE;
104 lvNext = malloc(sizeof(struct LexVector));
105 lvNext->next = lv->next;
106 lvNext->token.type = TOKEN_ERROR_TYPE;
107 lv->next = lvNext;
108 lv = lvNext;
109 break;
110 default:
111 if (tolower(token) >= 'a' && tolower(token <= 'z')) {
112 state = LEX_EXPECT_IDENTIFIER;
113 } else {
114 state = LEX_ERROR;
115 }
116 break;
117 };
118 break;
119 case LEX_EXPECT_IDENTIFIER:
120 switch (token) {
121 case '+':
122 case '-':
123 case '*':
124 case '/':
125 case '&':
126 case '|':
127 case '^':
128 lv->token.type = TOKEN_IDENTIFIER_TYPE;
129 lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
130 lv = _lexOperator(lv, token);
131 state = LEX_ROOT;
132 break;
133 case ')':
134 lv->token.type = TOKEN_IDENTIFIER_TYPE;
135 lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
136 state = LEX_EXPECT_OPERATOR;
137 break;
138 default:
139 break;
140 }
141 break;
142 case LEX_EXPECT_BINARY:
143 switch (token) {
144 case '0':
145 case '1':
146 // TODO: handle overflow
147 next <<= 1;
148 next += token - '0';
149 break;
150 case '+':
151 case '-':
152 case '*':
153 case '/':
154 lv->token.type = TOKEN_UINT_TYPE;
155 lv->token.uintValue = next;
156 lv = _lexOperator(lv, token);
157 state = LEX_ROOT;
158 break;
159 case ')':
160 lv->token.type = TOKEN_UINT_TYPE;
161 lv->token.uintValue = next;
162 state = LEX_EXPECT_OPERATOR;
163 break;
164 default:
165 state = LEX_ERROR;
166 break;
167 }
168 break;
169 case LEX_EXPECT_DECIMAL:
170 switch (token) {
171 case '0':
172 case '1':
173 case '2':
174 case '3':
175 case '4':
176 case '5':
177 case '6':
178 case '7':
179 case '8':
180 case '9':
181 // TODO: handle overflow
182 next *= 10;
183 next += token - '0';
184 break;
185 case '+':
186 case '-':
187 case '*':
188 case '/':
189 case '&':
190 case '|':
191 case '^':
192 lv->token.type = TOKEN_UINT_TYPE;
193 lv->token.uintValue = next;
194 lv = _lexOperator(lv, token);
195 state = LEX_ROOT;
196 break;
197 case ')':
198 lv->token.type = TOKEN_UINT_TYPE;
199 lv->token.uintValue = next;
200 state = LEX_EXPECT_OPERATOR;
201 break;
202 default:
203 state = LEX_ERROR;
204 }
205 break;
206 case LEX_EXPECT_HEX:
207 switch (token) {
208 case '0':
209 case '1':
210 case '2':
211 case '3':
212 case '4':
213 case '5':
214 case '6':
215 case '7':
216 case '8':
217 case '9':
218 // TODO: handle overflow
219 next *= 16;
220 next += token - '0';
221 break;
222 case 'A':
223 case 'B':
224 case 'C':
225 case 'D':
226 case 'E':
227 case 'F':
228 // TODO: handle overflow
229 next *= 16;
230 next += token - 'A' + 10;
231 break;
232 case 'a':
233 case 'b':
234 case 'c':
235 case 'd':
236 case 'e':
237 case 'f':
238 // TODO: handle overflow
239 next *= 16;
240 next += token - 'a' + 10;
241 break;
242 case '+':
243 case '-':
244 case '*':
245 case '/':
246 case '&':
247 case '|':
248 case '^':
249 lv->token.type = TOKEN_UINT_TYPE;
250 lv->token.uintValue = next;
251 lv = _lexOperator(lv, token);
252 state = LEX_ROOT;
253 break;
254 case ':':
255 lv->token.type = TOKEN_SEGMENT_TYPE;
256 lv->token.uintValue = next;
257 lvNext = malloc(sizeof(struct LexVector));
258 lvNext->next = lv->next;
259 lvNext->token.type = TOKEN_UINT_TYPE;
260 lv->next = lvNext;
261 lv = lvNext;
262 next = 0;
263 state = LEX_EXPECT_HEX;
264 break;
265 case ')':
266 lv->token.type = TOKEN_UINT_TYPE;
267 lv->token.uintValue = next;
268 state = LEX_EXPECT_OPERATOR;
269 break;
270 default:
271 state = LEX_ERROR;
272 break;
273 }
274 break;
275 case LEX_EXPECT_PREFIX:
276 switch (token) {
277 case 'X':
278 case 'x':
279 next = 0;
280 state = LEX_EXPECT_HEX;
281 break;
282 case 'B':
283 case 'b':
284 next = 0;
285 state = LEX_EXPECT_BINARY;
286 break;
287 case '+':
288 case '-':
289 case '*':
290 case '/':
291 case '&':
292 case '|':
293 case '^':
294 lv->token.type = TOKEN_UINT_TYPE;
295 lv->token.uintValue = next;
296 lv = _lexOperator(lv, token);
297 state = LEX_ROOT;
298 break;
299 case ')':
300 lv->token.type = TOKEN_UINT_TYPE;
301 lv->token.uintValue = next;
302 state = LEX_EXPECT_OPERATOR;
303 break;
304 case '0':
305 case '1':
306 case '2':
307 case '3':
308 case '4':
309 case '5':
310 case '6':
311 case '7':
312 case '8':
313 case '9':
314 next = token - '0';
315 state = LEX_EXPECT_DECIMAL;
316 break;
317 default:
318 state = LEX_ERROR;
319 }
320 break;
321 case LEX_EXPECT_OPERATOR:
322 switch (token) {
323 case '+':
324 case '-':
325 case '*':
326 case '/':
327 case '&':
328 case '|':
329 case '^':
330 lvNext = malloc(sizeof(struct LexVector));
331 lvNext->next = lv->next;
332 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
333 lv->next = lvNext;
334 lv = _lexOperator(lv->next, token);
335 state = LEX_ROOT;
336 break;
337 default:
338 state = LEX_ERROR;
339 }
340 break;
341 case LEX_ERROR:
342 // This shouldn't be reached
343 break;
344 }
345 }
346
347 switch (state) {
348 case LEX_EXPECT_BINARY:
349 case LEX_EXPECT_DECIMAL:
350 case LEX_EXPECT_HEX:
351 case LEX_EXPECT_PREFIX:
352 lv->token.type = TOKEN_UINT_TYPE;
353 lv->token.uintValue = next;
354 break;
355 case LEX_EXPECT_IDENTIFIER:
356 lv->token.type = TOKEN_IDENTIFIER_TYPE;
357 lv->token.identifierValue = strndup(tokenStart, string - tokenStart);
358 break;
359 case LEX_EXPECT_OPERATOR:
360 lvNext = malloc(sizeof(struct LexVector));
361 lvNext->next = lv->next;
362 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
363 lv->next = lvNext;
364 break;
365 case LEX_ERROR:
366 default:
367 lv->token.type = TOKEN_ERROR_TYPE;
368 break;
369 }
370 return adjusted;
371}
372
373static const int _operatorPrecedence[] = {
374 14,
375 4,
376 4,
377 3,
378 3,
379 8,
380 10,
381 9,
382 6,
383 6,
384 7,
385 6,
386 6
387};
388
389static struct ParseTree* _parseTreeCreate() {
390 struct ParseTree* tree = malloc(sizeof(struct ParseTree));
391 tree->token.type = TOKEN_ERROR_TYPE;
392 tree->rhs = 0;
393 tree->lhs = 0;
394 return tree;
395}
396
397static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
398 struct ParseTree* newTree = 0;
399 while (lv) {
400 int newPrecedence;
401 switch (lv->token.type) {
402 case TOKEN_IDENTIFIER_TYPE:
403 case TOKEN_UINT_TYPE:
404 if (tree->token.type == TOKEN_ERROR_TYPE) {
405 tree->token = lv->token;
406 lv = lv->next;
407 } else {
408 tree->token.type = TOKEN_ERROR_TYPE;
409 return 0;
410 }
411 break;
412 case TOKEN_SEGMENT_TYPE:
413 tree->lhs = _parseTreeCreate();
414 tree->lhs->token.type = TOKEN_UINT_TYPE;
415 tree->lhs->token.uintValue = lv->token.uintValue;
416 tree->rhs = _parseTreeCreate();
417 tree->token.type = TOKEN_SEGMENT_TYPE;
418 lv = _parseExpression(tree->rhs, lv->next, precedence, openParens);
419 if (tree->token.type == TOKEN_ERROR_TYPE) {
420 tree->token.type = TOKEN_ERROR_TYPE;
421 }
422 break;
423 case TOKEN_OPEN_PAREN_TYPE:
424 lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
425 break;
426 case TOKEN_CLOSE_PAREN_TYPE:
427 if (openParens <= 0) {
428 tree->token.type = TOKEN_ERROR_TYPE;
429 return 0;
430 }
431 return lv->next;
432 break;
433 case TOKEN_OPERATOR_TYPE:
434 newPrecedence = _operatorPrecedence[lv->token.operatorValue];
435 if (newPrecedence < precedence) {
436 newTree = _parseTreeCreate();
437 *newTree = *tree;
438 tree->lhs = newTree;
439 tree->rhs = _parseTreeCreate();
440 tree->token = lv->token;
441 lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
442 if (tree->token.type == TOKEN_ERROR_TYPE) {
443 tree->token.type = TOKEN_ERROR_TYPE;
444 }
445 } else {
446 return lv;
447 }
448 break;
449 case TOKEN_ERROR_TYPE:
450 tree->token.type = TOKEN_ERROR_TYPE;
451 return 0;
452 }
453 }
454
455 return 0;
456}
457
458void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
459 if (!tree) {
460 return;
461 }
462
463 tree->token.type = TOKEN_ERROR_TYPE;
464 tree->lhs = 0;
465 tree->rhs = 0;
466
467 _parseExpression(tree, lv, INT_MAX, 0);
468}
469
470void lexFree(struct LexVector* lv) {
471 while (lv) {
472 struct LexVector* lvNext = lv->next;
473 free(lv);
474 lv = lvNext;
475 }
476}
477
478void parseFree(struct ParseTree* tree) {
479 if (!tree) {
480 return;
481 }
482
483 parseFree(tree->lhs);
484 parseFree(tree->rhs);
485
486 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
487 free(tree->token.identifierValue);
488 }
489 free(tree);
490}