src/debugger/parser.c (view raw)
1/* Copyright (c) 2013-2014 Jeffrey Pfau
2 *
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6#include <mgba/internal/debugger/parser.h>
7
8#include <mgba/debugger/debugger.h>
9#include <mgba-util/string.h>
10
11DEFINE_VECTOR(LexVector, struct Token);
12
13enum LexState {
14 LEX_ERROR = -1,
15 LEX_ROOT = 0,
16 LEX_EXPECT_IDENTIFIER,
17 LEX_EXPECT_BINARY_FIRST,
18 LEX_EXPECT_BINARY,
19 LEX_EXPECT_DECIMAL,
20 LEX_EXPECT_HEX_FIRST,
21 LEX_EXPECT_HEX,
22 LEX_EXPECT_PREFIX,
23 LEX_EXPECT_OPERATOR,
24 LEX_EXPECT_OPERATOR2,
25};
26
27static void _lexOperator(struct LexVector* lv, char operator, enum LexState* state) {
28 if (*state == LEX_EXPECT_OPERATOR2) {
29 struct Token* lvNext = LexVectorGetPointer(lv, LexVectorSize(lv) - 1);
30 if (lvNext->type != TOKEN_OPERATOR_TYPE) {
31 lvNext->type = TOKEN_ERROR_TYPE;
32 *state = LEX_ERROR;
33 return;
34 }
35 switch (lvNext->operatorValue) {
36 case OP_AND:
37 if (operator == '&') {
38 lvNext->operatorValue = OP_LOGICAL_AND;
39 *state = LEX_ROOT;
40 return;
41 }
42 break;
43 case OP_OR:
44 if (operator == '|') {
45 lvNext->operatorValue = OP_LOGICAL_OR;
46 *state = LEX_ROOT;
47 return;
48 }
49 break;
50 case OP_LESS:
51 if (operator == '=') {
52 lvNext->operatorValue = OP_LE;
53 *state = LEX_ROOT;
54 return;
55 }
56 if (operator == '<') {
57 lvNext->operatorValue = OP_SHIFT_L;
58 *state = LEX_ROOT;
59 return;
60 }
61 break;
62 case OP_GREATER:
63 if (operator == '=') {
64 lvNext->operatorValue = OP_GE;
65 *state = LEX_ROOT;
66 return;
67 }
68 if (operator == '>') {
69 lvNext->operatorValue = OP_SHIFT_R;
70 *state = LEX_ROOT;
71 return;
72 }
73 break;
74 case OP_ASSIGN:
75 if (operator == '=') {
76 lvNext->operatorValue = OP_EQUAL;
77 *state = LEX_ROOT;
78 return;
79 }
80 break;
81 case OP_NOT:
82 if (operator == '=') {
83 lvNext->operatorValue = OP_NOT_EQUAL;
84 *state = LEX_ROOT;
85 return;
86 }
87 break;
88 default:
89 break;
90 }
91 *state = LEX_ERROR;
92 return;
93 }
94 struct Token* lvNext = LexVectorAppend(lv);
95 lvNext->type = TOKEN_OPERATOR_TYPE;
96 *state = LEX_EXPECT_OPERATOR2;
97 switch (operator) {
98 case '=':
99 lvNext->operatorValue = OP_ASSIGN;
100 break;
101 case '+':
102 lvNext->operatorValue = OP_ADD;
103 break;
104 case '-':
105 lvNext->operatorValue = OP_SUBTRACT;
106 break;
107 case '*':
108 lvNext->operatorValue = OP_MULTIPLY;
109 break;
110 case '/':
111 lvNext->operatorValue = OP_DIVIDE;
112 break;
113 case '%':
114 lvNext->operatorValue = OP_MODULO;
115 break;
116 case '&':
117 lvNext->operatorValue = OP_AND;
118 break;
119 case '|':
120 lvNext->operatorValue = OP_OR;
121 break;
122 case '^':
123 lvNext->operatorValue = OP_XOR;
124 break;
125 case '<':
126 lvNext->operatorValue = OP_LESS;
127 break;
128 case '>':
129 lvNext->operatorValue = OP_GREATER;
130 break;
131 case '!':
132 lvNext->operatorValue = OP_NOT;
133 break;
134 default:
135 lvNext->type = TOKEN_ERROR_TYPE;
136 break;
137 }
138}
139
140static void _lexValue(struct LexVector* lv, char token, uint32_t next, enum LexState* state) {
141 struct Token* lvNext;
142
143 switch (token) {
144 case '=':
145 case '+':
146 case '-':
147 case '*':
148 case '/':
149 case '%':
150 case '&':
151 case '|':
152 case '^':
153 case '<':
154 case '>':
155 case '!':
156 lvNext = LexVectorAppend(lv);
157 lvNext->type = TOKEN_UINT_TYPE;
158 lvNext->uintValue = next;
159 _lexOperator(lv, token, state);
160 break;
161 case ')':
162 lvNext = LexVectorAppend(lv);
163 lvNext->type = TOKEN_UINT_TYPE;
164 lvNext->uintValue = next;
165 lvNext = LexVectorAppend(lv);
166 lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
167 *state = LEX_EXPECT_OPERATOR;
168 break;
169 case ' ':
170 case '\t':
171 lvNext = LexVectorAppend(lv);
172 lvNext->type = TOKEN_UINT_TYPE;
173 lvNext->uintValue = next;
174 *state = LEX_EXPECT_OPERATOR;
175 break;
176 default:
177 *state = LEX_ERROR;
178 break;
179 }
180}
181
182size_t lexExpression(struct LexVector* lv, const char* string, size_t length, const char* eol) {
183 if (!string || length < 1) {
184 return 0;
185 }
186
187 uint32_t next = 0;
188 size_t adjusted = 0;
189
190 enum LexState state = LEX_ROOT;
191 const char* tokenStart = 0;
192 struct Token* lvNext;
193
194 if (!eol) {
195 eol = " \r\n";
196 }
197
198 while (length > 0 && string[0] && !strchr(eol, string[0]) && state != LEX_ERROR) {
199 char token = string[0];
200 ++string;
201 ++adjusted;
202 --length;
203 switch (state) {
204 case LEX_EXPECT_OPERATOR2:
205 switch (token) {
206 case '&':
207 case '|':
208 case '=':
209 case '<':
210 case '>':
211 _lexOperator(lv, token, &state);
212 break;
213 }
214 if (state != LEX_EXPECT_OPERATOR2) {
215 break;
216 }
217 // Fall through
218 case LEX_ROOT:
219 tokenStart = string - 1;
220 switch (token) {
221 case '1':
222 case '2':
223 case '3':
224 case '4':
225 case '5':
226 case '6':
227 case '7':
228 case '8':
229 case '9':
230 state = LEX_EXPECT_DECIMAL;
231 next = token - '0';
232 break;
233 case '0':
234 state = LEX_EXPECT_PREFIX;
235 next = 0;
236 break;
237 case '$':
238 state = LEX_EXPECT_HEX_FIRST;
239 next = 0;
240 break;
241 case '%':
242 state = LEX_EXPECT_BINARY_FIRST;
243 next = 0;
244 break;
245 case '(':
246 state = LEX_ROOT;
247 lvNext = LexVectorAppend(lv);
248 lvNext->type = TOKEN_OPEN_PAREN_TYPE;
249 break;
250 case ' ':
251 case '\t':
252 break;
253 default:
254 if (tolower(token) >= 'a' && tolower(token <= 'z')) {
255 state = LEX_EXPECT_IDENTIFIER;
256 } else {
257 state = LEX_ERROR;
258 }
259 break;
260 };
261 break;
262 case LEX_EXPECT_IDENTIFIER:
263 switch (token) {
264 case '=':
265 case '+':
266 case '-':
267 case '*':
268 case '/':
269 case '%':
270 case '&':
271 case '|':
272 case '^':
273 case '<':
274 case '>':
275 case '!':
276 lvNext = LexVectorAppend(lv);
277 lvNext->type = TOKEN_IDENTIFIER_TYPE;
278 lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
279 _lexOperator(lv, token, &state);
280 break;
281 case ')':
282 lvNext = LexVectorAppend(lv);
283 lvNext->type = TOKEN_IDENTIFIER_TYPE;
284 lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
285 lvNext = LexVectorAppend(lv);
286 lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
287 state = LEX_EXPECT_OPERATOR;
288 break;
289 case ' ':
290 case '\t':
291 lvNext = LexVectorAppend(lv);
292 lvNext->type = TOKEN_IDENTIFIER_TYPE;
293 lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
294 state = LEX_EXPECT_OPERATOR;
295 break;
296 default:
297 break;
298 }
299 break;
300 case LEX_EXPECT_BINARY_FIRST:
301 state = LEX_EXPECT_BINARY;
302 // Fall through
303 case LEX_EXPECT_BINARY:
304 switch (token) {
305 case '0':
306 case '1':
307 // TODO: handle overflow
308 next <<= 1;
309 next += token - '0';
310 break;
311 default:
312 _lexValue(lv, token, next, &state);
313 break;
314 }
315 break;
316 case LEX_EXPECT_DECIMAL:
317 switch (token) {
318 case '0':
319 case '1':
320 case '2':
321 case '3':
322 case '4':
323 case '5':
324 case '6':
325 case '7':
326 case '8':
327 case '9':
328 // TODO: handle overflow
329 next *= 10;
330 next += token - '0';
331 break;
332 default:
333 _lexValue(lv, token, next, &state);
334 break;
335 }
336 break;
337 case LEX_EXPECT_HEX_FIRST:
338 state = LEX_EXPECT_HEX;
339 // Fall through
340 case LEX_EXPECT_HEX:
341 switch (token) {
342 case '0':
343 case '1':
344 case '2':
345 case '3':
346 case '4':
347 case '5':
348 case '6':
349 case '7':
350 case '8':
351 case '9':
352 // TODO: handle overflow
353 next *= 16;
354 next += token - '0';
355 break;
356 case 'A':
357 case 'B':
358 case 'C':
359 case 'D':
360 case 'E':
361 case 'F':
362 // TODO: handle overflow
363 next *= 16;
364 next += token - 'A' + 10;
365 break;
366 case 'a':
367 case 'b':
368 case 'c':
369 case 'd':
370 case 'e':
371 case 'f':
372 // TODO: handle overflow
373 next *= 16;
374 next += token - 'a' + 10;
375 break;
376 case ':':
377 lvNext = LexVectorAppend(lv);
378 lvNext->type = TOKEN_SEGMENT_TYPE;
379 lvNext->uintValue = next;
380 next = 0;
381 break;
382 default:
383 _lexValue(lv, token, next, &state);
384 break;
385 }
386 break;
387 case LEX_EXPECT_PREFIX:
388 switch (token) {
389 case 'X':
390 case 'x':
391 next = 0;
392 state = LEX_EXPECT_HEX_FIRST;
393 break;
394 case 'B':
395 case 'b':
396 next = 0;
397 state = LEX_EXPECT_BINARY_FIRST;
398 break;
399 case '0':
400 case '1':
401 case '2':
402 case '3':
403 case '4':
404 case '5':
405 case '6':
406 case '7':
407 case '8':
408 case '9':
409 next = token - '0';
410 state = LEX_EXPECT_DECIMAL;
411 break;
412 default:
413 _lexValue(lv, token, next, &state);
414 break;
415 }
416 break;
417 case LEX_EXPECT_OPERATOR:
418 switch (token) {
419 case '=':
420 case '+':
421 case '-':
422 case '*':
423 case '/':
424 case '%':
425 case '&':
426 case '|':
427 case '^':
428 case '<':
429 case '>':
430 case '!':
431 _lexOperator(lv, token, &state);
432 break;
433 case ')':
434 lvNext = LexVectorAppend(lv);
435 lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
436 break;
437 case ' ':
438 case '\t':
439 break;
440 default:
441 state = LEX_ERROR;
442 }
443 break;
444 case LEX_ERROR:
445 // This shouldn't be reached
446 break;
447 }
448 }
449
450 switch (state) {
451 case LEX_EXPECT_BINARY:
452 case LEX_EXPECT_DECIMAL:
453 case LEX_EXPECT_HEX:
454 case LEX_EXPECT_PREFIX:
455 lvNext = LexVectorAppend(lv);
456 lvNext->type = TOKEN_UINT_TYPE;
457 lvNext->uintValue = next;
458 break;
459 case LEX_EXPECT_IDENTIFIER:
460 lvNext = LexVectorAppend(lv);
461 lvNext->type = TOKEN_IDENTIFIER_TYPE;
462 lvNext->identifierValue = strndup(tokenStart, string - tokenStart);
463 break;
464 case LEX_ROOT:
465 case LEX_EXPECT_OPERATOR:
466 case LEX_EXPECT_OPERATOR2:
467 break;
468 case LEX_EXPECT_BINARY_FIRST:
469 case LEX_EXPECT_HEX_FIRST:
470 case LEX_ERROR:
471 default:
472 lvNext = LexVectorAppend(lv);
473 lvNext->type = TOKEN_ERROR_TYPE;
474 break;
475 }
476 return adjusted;
477}
478
479static const int _operatorPrecedence[] = {
480 [OP_ASSIGN] = 14,
481 [OP_ADD] = 4,
482 [OP_SUBTRACT] = 4,
483 [OP_MULTIPLY] = 3,
484 [OP_DIVIDE] = 3,
485 [OP_MODULO] = 3,
486 [OP_AND] = 8,
487 [OP_OR] = 10,
488 [OP_XOR] = 9,
489 [OP_LESS] = 6,
490 [OP_GREATER] = 6,
491 [OP_EQUAL] = 7,
492 [OP_NOT_EQUAL] = 7,
493 [OP_LE] = 6,
494 [OP_GE] = 6,
495 [OP_LOGICAL_AND] = 11,
496 [OP_LOGICAL_OR] = 12,
497 [OP_NEGATE] = 2,
498 [OP_FLIP] = 2,
499 [OP_NOT] = 2,
500 [OP_SHIFT_L] = 5,
501 [OP_SHIFT_R] = 5,
502};
503
504static struct ParseTree* _parseTreeCreate() {
505 struct ParseTree* tree = malloc(sizeof(struct ParseTree));
506 tree->token.type = TOKEN_ERROR_TYPE;
507 tree->rhs = 0;
508 tree->lhs = 0;
509 return tree;
510}
511
512static size_t _parseExpression(struct ParseTree* tree, struct LexVector* lv, size_t i, int precedence, int* openParens) {
513 struct ParseTree* newTree = 0;
514 while (i < LexVectorSize(lv)) {
515 struct Token* token = LexVectorGetPointer(lv, i);
516 int newPrecedence;
517 switch (token->type) {
518 case TOKEN_IDENTIFIER_TYPE:
519 case TOKEN_UINT_TYPE:
520 if (tree->token.type == TOKEN_ERROR_TYPE) {
521 tree->token = *token;
522 if (token->type == TOKEN_IDENTIFIER_TYPE) {
523 tree->token.identifierValue = strdup(token->identifierValue);
524 }
525 ++i;
526 } else {
527 tree->token.type = TOKEN_ERROR_TYPE;
528 return i + 1;
529 }
530 break;
531 case TOKEN_SEGMENT_TYPE:
532 tree->lhs = _parseTreeCreate();
533 tree->lhs->token.type = TOKEN_UINT_TYPE;
534 tree->lhs->token.uintValue = token->uintValue;
535 tree->rhs = _parseTreeCreate();
536 tree->token.type = TOKEN_SEGMENT_TYPE;
537 i = _parseExpression(tree->rhs, lv, i + 1, precedence, openParens);
538 if (tree->token.type == TOKEN_ERROR_TYPE) {
539 tree->token.type = TOKEN_ERROR_TYPE;
540 }
541 break;
542 case TOKEN_OPEN_PAREN_TYPE:
543 ++*openParens;
544 i = _parseExpression(tree, lv, i + 1, INT_MAX, openParens);
545 break;
546 case TOKEN_CLOSE_PAREN_TYPE:
547 if (*openParens <= 0) {
548 tree->token.type = TOKEN_ERROR_TYPE;
549 }
550 --*openParens;
551 return i + 1;
552 case TOKEN_OPERATOR_TYPE:
553 newPrecedence = _operatorPrecedence[token->operatorValue];
554 if (newPrecedence < precedence) {
555 newTree = _parseTreeCreate();
556 *newTree = *tree;
557 tree->lhs = newTree;
558 tree->rhs = _parseTreeCreate();
559 tree->token = *token;
560 i = _parseExpression(tree->rhs, lv, i + 1, newPrecedence, openParens);
561 if (tree->token.type == TOKEN_ERROR_TYPE) {
562 tree->token.type = TOKEN_ERROR_TYPE;
563 }
564 } else {
565 return i;
566 }
567 break;
568 case TOKEN_ERROR_TYPE:
569 tree->token.type = TOKEN_ERROR_TYPE;
570 return i + 1;
571 }
572 }
573
574 return i;
575}
576
577void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
578 if (!tree) {
579 return;
580 }
581
582 tree->token.type = TOKEN_ERROR_TYPE;
583 tree->lhs = 0;
584 tree->rhs = 0;
585
586 int openParens = 0;
587 _parseExpression(tree, lv, 0, INT_MAX, &openParens);
588 if (openParens) {
589 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
590 free(tree->token.identifierValue);
591 }
592 tree->token.type = TOKEN_ERROR_TYPE;
593 }
594}
595
596void lexFree(struct LexVector* lv) {
597 size_t i;
598 for (i = 0; i < LexVectorSize(lv); ++i) {
599 struct Token* token = LexVectorGetPointer(lv, i);
600 if (token->type == TOKEN_IDENTIFIER_TYPE) {
601 free(token->identifierValue);
602 }
603 }
604}
605
606void parseFree(struct ParseTree* tree) {
607 if (!tree) {
608 return;
609 }
610
611 if (tree->lhs) {
612 parseFree(tree->lhs);
613 free(tree->lhs);
614 }
615 if (tree->rhs) {
616 parseFree(tree->rhs);
617 free(tree->rhs);
618 }
619
620 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
621 free(tree->token.identifierValue);
622 }
623}
624
625static bool _performOperation(enum Operation operation, int32_t current, int32_t next, int32_t* value) {
626 switch (operation) {
627 case OP_ASSIGN:
628 current = next;
629 break;
630 case OP_ADD:
631 current += next;
632 break;
633 case OP_SUBTRACT:
634 current -= next;
635 break;
636 case OP_MULTIPLY:
637 current *= next;
638 break;
639 case OP_DIVIDE:
640 if (next != 0) {
641 current /= next;
642 } else {
643 return false;
644 }
645 break;
646 case OP_MODULO:
647 if (next != 0) {
648 current %= next;
649 } else {
650 return false;
651 }
652 break;
653 case OP_AND:
654 current &= next;
655 break;
656 case OP_OR:
657 current |= next;
658 break;
659 case OP_XOR:
660 current ^= next;
661 break;
662 case OP_LESS:
663 current = current < next;
664 break;
665 case OP_GREATER:
666 current = current > next;
667 break;
668 case OP_EQUAL:
669 current = current == next;
670 break;
671 case OP_NOT_EQUAL:
672 current = current != next;
673 break;
674 case OP_LOGICAL_AND:
675 current = current && next;
676 break;
677 case OP_LOGICAL_OR:
678 current = current || next;
679 break;
680 case OP_LE:
681 current = current <= next;
682 break;
683 case OP_GE:
684 current = current >= next;
685 break;
686 case OP_SHIFT_L:
687 current <<= next;
688 break;
689 case OP_SHIFT_R:
690 current >>= next;
691 break;
692 default:
693 return false;
694 }
695 *value = current;
696 return true;
697}
698
699bool mDebuggerEvaluateParseTree(struct mDebugger* debugger, struct ParseTree* tree, int32_t* value, int* segment) {
700 if (!value) {
701 return false;
702 }
703 int32_t lhs, rhs;
704 switch (tree->token.type) {
705 case TOKEN_UINT_TYPE:
706 if (segment) {
707 *segment = -1;
708 }
709 *value = tree->token.uintValue;
710 return true;
711 case TOKEN_SEGMENT_TYPE:
712 if (!mDebuggerEvaluateParseTree(debugger, tree->rhs, value, segment)) {
713 return false;
714 }
715 return mDebuggerEvaluateParseTree(debugger, tree->lhs, segment, NULL);
716 case TOKEN_OPERATOR_TYPE:
717 if (!mDebuggerEvaluateParseTree(debugger, tree->lhs, &lhs, segment)) {
718 return false;
719 }
720 if (!mDebuggerEvaluateParseTree(debugger, tree->rhs, &rhs, segment)) {
721 return false;
722 }
723 return _performOperation(tree->token.operatorValue, lhs, rhs, value);
724 case TOKEN_IDENTIFIER_TYPE:
725 return mDebuggerLookupIdentifier(debugger, tree->token.identifierValue, value, segment);
726 case TOKEN_ERROR_TYPE:
727 default:
728 break;
729 }
730 return false;
731}