src/debugger/parser.c (view raw)
1/* Copyright (c) 2013-2014 Jeffrey Pfau
2 *
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6#include <mgba/internal/debugger/parser.h>
7
8#include <mgba/core/core.h>
9#include <mgba/debugger/debugger.h>
10#include <mgba-util/string.h>
11
12DEFINE_VECTOR(LexVector, struct Token);
13
14enum LexState {
15 LEX_ERROR = -1,
16 LEX_ROOT = 0,
17 LEX_EXPECT_IDENTIFIER,
18 LEX_EXPECT_BINARY_FIRST,
19 LEX_EXPECT_BINARY,
20 LEX_EXPECT_DECIMAL,
21 LEX_EXPECT_HEX_FIRST,
22 LEX_EXPECT_HEX,
23 LEX_EXPECT_PREFIX,
24 LEX_EXPECT_OPERATOR,
25 LEX_EXPECT_OPERATOR2,
26};
27
28static void _lexOperator(struct LexVector* lv, char operator, enum LexState* state) {
29 if (*state == LEX_EXPECT_OPERATOR2) {
30 struct Token* lvNext = LexVectorGetPointer(lv, LexVectorSize(lv) - 1);
31 if (lvNext->type != TOKEN_OPERATOR_TYPE) {
32 lvNext->type = TOKEN_ERROR_TYPE;
33 *state = LEX_ERROR;
34 return;
35 }
36 switch (lvNext->operatorValue) {
37 case OP_AND:
38 if (operator == '&') {
39 lvNext->operatorValue = OP_LOGICAL_AND;
40 *state = LEX_ROOT;
41 return;
42 }
43 break;
44 case OP_OR:
45 if (operator == '|') {
46 lvNext->operatorValue = OP_LOGICAL_OR;
47 *state = LEX_ROOT;
48 return;
49 }
50 break;
51 case OP_LESS:
52 if (operator == '=') {
53 lvNext->operatorValue = OP_LE;
54 *state = LEX_ROOT;
55 return;
56 }
57 if (operator == '<') {
58 lvNext->operatorValue = OP_SHIFT_L;
59 *state = LEX_ROOT;
60 return;
61 }
62 break;
63 case OP_GREATER:
64 if (operator == '=') {
65 lvNext->operatorValue = OP_GE;
66 *state = LEX_ROOT;
67 return;
68 }
69 if (operator == '>') {
70 lvNext->operatorValue = OP_SHIFT_R;
71 *state = LEX_ROOT;
72 return;
73 }
74 break;
75 case OP_ASSIGN:
76 if (operator == '=') {
77 lvNext->operatorValue = OP_EQUAL;
78 *state = LEX_ROOT;
79 return;
80 }
81 break;
82 case OP_NOT:
83 if (operator == '=') {
84 lvNext->operatorValue = OP_NOT_EQUAL;
85 *state = LEX_ROOT;
86 return;
87 }
88 break;
89 default:
90 break;
91 }
92 *state = LEX_ERROR;
93 }
94 if (*state == LEX_ROOT || *state == LEX_ERROR) {
95 struct Token* lvNext = LexVectorAppend(lv);
96 lvNext->type = TOKEN_OPERATOR_TYPE;
97 *state = LEX_ROOT;
98 switch (operator) {
99 case '-':
100 lvNext->operatorValue = OP_NEGATE;
101 break;
102 case '~':
103 lvNext->operatorValue = OP_FLIP;
104 break;
105 case '!':
106 lvNext->operatorValue = OP_NOT;
107 break;
108 case '*':
109 lvNext->operatorValue = OP_DEREFERENCE;
110 break;
111 default:
112 lvNext->type = TOKEN_ERROR_TYPE;
113 *state = LEX_ERROR;
114 break;
115 }
116 return;
117 }
118 struct Token* lvNext = LexVectorAppend(lv);
119 lvNext->type = TOKEN_OPERATOR_TYPE;
120 *state = LEX_EXPECT_OPERATOR2;
121 switch (operator) {
122 case '=':
123 lvNext->operatorValue = OP_ASSIGN;
124 break;
125 case '+':
126 lvNext->operatorValue = OP_ADD;
127 break;
128 case '-':
129 lvNext->operatorValue = OP_SUBTRACT;
130 break;
131 case '*':
132 lvNext->operatorValue = OP_MULTIPLY;
133 break;
134 case '/':
135 lvNext->operatorValue = OP_DIVIDE;
136 break;
137 case '%':
138 lvNext->operatorValue = OP_MODULO;
139 break;
140 case '&':
141 lvNext->operatorValue = OP_AND;
142 break;
143 case '|':
144 lvNext->operatorValue = OP_OR;
145 break;
146 case '^':
147 lvNext->operatorValue = OP_XOR;
148 break;
149 case '<':
150 lvNext->operatorValue = OP_LESS;
151 break;
152 case '>':
153 lvNext->operatorValue = OP_GREATER;
154 break;
155 case '!':
156 lvNext->operatorValue = OP_NOT;
157 break;
158 default:
159 lvNext->type = TOKEN_ERROR_TYPE;
160 break;
161 }
162}
163
164static void _lexValue(struct LexVector* lv, char token, uint32_t next, enum LexState* state) {
165 struct Token* lvNext;
166
167 switch (token) {
168 case '=':
169 case '+':
170 case '-':
171 case '*':
172 case '/':
173 case '%':
174 case '&':
175 case '|':
176 case '^':
177 case '<':
178 case '>':
179 case '!':
180 lvNext = LexVectorAppend(lv);
181 lvNext->type = TOKEN_UINT_TYPE;
182 lvNext->uintValue = next;
183 _lexOperator(lv, token, state);
184 break;
185 case ')':
186 lvNext = LexVectorAppend(lv);
187 lvNext->type = TOKEN_UINT_TYPE;
188 lvNext->uintValue = next;
189 lvNext = LexVectorAppend(lv);
190 lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
191 *state = LEX_EXPECT_OPERATOR;
192 break;
193 case ' ':
194 case '\t':
195 lvNext = LexVectorAppend(lv);
196 lvNext->type = TOKEN_UINT_TYPE;
197 lvNext->uintValue = next;
198 *state = LEX_EXPECT_OPERATOR;
199 break;
200 default:
201 *state = LEX_ERROR;
202 break;
203 }
204}
205
206size_t lexExpression(struct LexVector* lv, const char* string, size_t length, const char* eol) {
207 if (!string || length < 1) {
208 return 0;
209 }
210
211 uint32_t next = 0;
212 size_t adjusted = 0;
213
214 enum LexState state = LEX_ROOT;
215 const char* tokenStart = 0;
216 struct Token* lvNext;
217
218 if (!eol) {
219 eol = " \r\n";
220 }
221
222 while (length > 0 && string[0] && !strchr(eol, string[0]) && state != LEX_ERROR) {
223 char token = string[0];
224 ++string;
225 ++adjusted;
226 --length;
227 switch (state) {
228 case LEX_EXPECT_OPERATOR2:
229 switch (token) {
230 case '&':
231 case '|':
232 case '=':
233 case '<':
234 case '>':
235 _lexOperator(lv, token, &state);
236 break;
237 }
238 if (state != LEX_EXPECT_OPERATOR2) {
239 break;
240 }
241 // Fall through
242 case LEX_ROOT:
243 tokenStart = string - 1;
244 switch (token) {
245 case '1':
246 case '2':
247 case '3':
248 case '4':
249 case '5':
250 case '6':
251 case '7':
252 case '8':
253 case '9':
254 state = LEX_EXPECT_DECIMAL;
255 next = token - '0';
256 break;
257 case '0':
258 state = LEX_EXPECT_PREFIX;
259 next = 0;
260 break;
261 case '$':
262 state = LEX_EXPECT_HEX_FIRST;
263 next = 0;
264 break;
265 case '%':
266 state = LEX_EXPECT_BINARY_FIRST;
267 next = 0;
268 break;
269 case '(':
270 state = LEX_ROOT;
271 lvNext = LexVectorAppend(lv);
272 lvNext->type = TOKEN_OPEN_PAREN_TYPE;
273 break;
274 case '!':
275 case '-':
276 case '~':
277 case '*':
278 _lexOperator(lv, token, &state);
279 break;
280 case ' ':
281 case '\t':
282 break;
283 default:
284 if (tolower(token) >= 'a' && tolower(token <= 'z')) {
285 state = LEX_EXPECT_IDENTIFIER;
286 } else {
287 state = LEX_ERROR;
288 }
289 break;
290 };
291 break;
292 case LEX_EXPECT_IDENTIFIER:
293 switch (token) {
294 case '=':
295 case '+':
296 case '-':
297 case '*':
298 case '/':
299 case '%':
300 case '&':
301 case '|':
302 case '^':
303 case '<':
304 case '>':
305 case '!':
306 lvNext = LexVectorAppend(lv);
307 lvNext->type = TOKEN_IDENTIFIER_TYPE;
308 lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
309 _lexOperator(lv, token, &state);
310 break;
311 case ')':
312 lvNext = LexVectorAppend(lv);
313 lvNext->type = TOKEN_IDENTIFIER_TYPE;
314 lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
315 lvNext = LexVectorAppend(lv);
316 lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
317 state = LEX_EXPECT_OPERATOR;
318 break;
319 case ' ':
320 case '\t':
321 lvNext = LexVectorAppend(lv);
322 lvNext->type = TOKEN_IDENTIFIER_TYPE;
323 lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
324 state = LEX_EXPECT_OPERATOR;
325 break;
326 default:
327 break;
328 }
329 break;
330 case LEX_EXPECT_BINARY_FIRST:
331 state = LEX_EXPECT_BINARY;
332 // Fall through
333 case LEX_EXPECT_BINARY:
334 switch (token) {
335 case '0':
336 case '1':
337 // TODO: handle overflow
338 next <<= 1;
339 next += token - '0';
340 break;
341 default:
342 _lexValue(lv, token, next, &state);
343 break;
344 }
345 break;
346 case LEX_EXPECT_DECIMAL:
347 switch (token) {
348 case '0':
349 case '1':
350 case '2':
351 case '3':
352 case '4':
353 case '5':
354 case '6':
355 case '7':
356 case '8':
357 case '9':
358 // TODO: handle overflow
359 next *= 10;
360 next += token - '0';
361 break;
362 default:
363 _lexValue(lv, token, next, &state);
364 break;
365 }
366 break;
367 case LEX_EXPECT_HEX_FIRST:
368 state = LEX_EXPECT_HEX;
369 // Fall through
370 case LEX_EXPECT_HEX:
371 switch (token) {
372 case '0':
373 case '1':
374 case '2':
375 case '3':
376 case '4':
377 case '5':
378 case '6':
379 case '7':
380 case '8':
381 case '9':
382 // TODO: handle overflow
383 next *= 16;
384 next += token - '0';
385 break;
386 case 'A':
387 case 'B':
388 case 'C':
389 case 'D':
390 case 'E':
391 case 'F':
392 // TODO: handle overflow
393 next *= 16;
394 next += token - 'A' + 10;
395 break;
396 case 'a':
397 case 'b':
398 case 'c':
399 case 'd':
400 case 'e':
401 case 'f':
402 // TODO: handle overflow
403 next *= 16;
404 next += token - 'a' + 10;
405 break;
406 case ':':
407 lvNext = LexVectorAppend(lv);
408 lvNext->type = TOKEN_SEGMENT_TYPE;
409 lvNext->uintValue = next;
410 next = 0;
411 break;
412 default:
413 _lexValue(lv, token, next, &state);
414 break;
415 }
416 break;
417 case LEX_EXPECT_PREFIX:
418 switch (token) {
419 case 'X':
420 case 'x':
421 next = 0;
422 state = LEX_EXPECT_HEX_FIRST;
423 break;
424 case 'B':
425 case 'b':
426 next = 0;
427 state = LEX_EXPECT_BINARY_FIRST;
428 break;
429 case '0':
430 case '1':
431 case '2':
432 case '3':
433 case '4':
434 case '5':
435 case '6':
436 case '7':
437 case '8':
438 case '9':
439 next = token - '0';
440 state = LEX_EXPECT_DECIMAL;
441 break;
442 default:
443 _lexValue(lv, token, next, &state);
444 break;
445 }
446 break;
447 case LEX_EXPECT_OPERATOR:
448 switch (token) {
449 case '=':
450 case '+':
451 case '-':
452 case '*':
453 case '/':
454 case '%':
455 case '&':
456 case '|':
457 case '^':
458 case '<':
459 case '>':
460 case '!':
461 _lexOperator(lv, token, &state);
462 break;
463 case ')':
464 lvNext = LexVectorAppend(lv);
465 lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
466 break;
467 case ' ':
468 case '\t':
469 break;
470 default:
471 state = LEX_ERROR;
472 }
473 break;
474 case LEX_ERROR:
475 // This shouldn't be reached
476 break;
477 }
478 }
479
480 switch (state) {
481 case LEX_EXPECT_BINARY:
482 case LEX_EXPECT_DECIMAL:
483 case LEX_EXPECT_HEX:
484 case LEX_EXPECT_PREFIX:
485 lvNext = LexVectorAppend(lv);
486 lvNext->type = TOKEN_UINT_TYPE;
487 lvNext->uintValue = next;
488 break;
489 case LEX_EXPECT_IDENTIFIER:
490 lvNext = LexVectorAppend(lv);
491 lvNext->type = TOKEN_IDENTIFIER_TYPE;
492 lvNext->identifierValue = strndup(tokenStart, string - tokenStart);
493 break;
494 case LEX_ROOT:
495 case LEX_EXPECT_OPERATOR:
496 case LEX_EXPECT_OPERATOR2:
497 break;
498 case LEX_EXPECT_BINARY_FIRST:
499 case LEX_EXPECT_HEX_FIRST:
500 case LEX_ERROR:
501 default:
502 lvNext = LexVectorAppend(lv);
503 lvNext->type = TOKEN_ERROR_TYPE;
504 break;
505 }
506 return adjusted;
507}
508
509static const int _operatorPrecedence[] = {
510 [OP_ASSIGN] = 14,
511 [OP_ADD] = 4,
512 [OP_SUBTRACT] = 4,
513 [OP_MULTIPLY] = 3,
514 [OP_DIVIDE] = 3,
515 [OP_MODULO] = 3,
516 [OP_AND] = 8,
517 [OP_OR] = 10,
518 [OP_XOR] = 9,
519 [OP_LESS] = 6,
520 [OP_GREATER] = 6,
521 [OP_EQUAL] = 7,
522 [OP_NOT_EQUAL] = 7,
523 [OP_LE] = 6,
524 [OP_GE] = 6,
525 [OP_LOGICAL_AND] = 11,
526 [OP_LOGICAL_OR] = 12,
527 [OP_NEGATE] = 2,
528 [OP_FLIP] = 2,
529 [OP_NOT] = 2,
530 [OP_SHIFT_L] = 5,
531 [OP_SHIFT_R] = 5,
532 [OP_DEREFERENCE] = 2,
533};
534
535static struct ParseTree* _parseTreeCreate() {
536 struct ParseTree* tree = malloc(sizeof(struct ParseTree));
537 tree->token.type = TOKEN_ERROR_TYPE;
538 tree->rhs = 0;
539 tree->lhs = 0;
540 return tree;
541}
542
543static size_t _parseExpression(struct ParseTree* tree, struct LexVector* lv, size_t i, int precedence, int* openParens) {
544 struct ParseTree* newTree = 0;
545 while (i < LexVectorSize(lv)) {
546 struct Token* token = LexVectorGetPointer(lv, i);
547 int newPrecedence;
548 switch (token->type) {
549 case TOKEN_IDENTIFIER_TYPE:
550 case TOKEN_UINT_TYPE:
551 if (tree->token.type == TOKEN_ERROR_TYPE) {
552 tree->token = *token;
553 if (token->type == TOKEN_IDENTIFIER_TYPE) {
554 tree->token.identifierValue = strdup(token->identifierValue);
555 }
556 ++i;
557 } else {
558 tree->token.type = TOKEN_ERROR_TYPE;
559 return i + 1;
560 }
561 break;
562 case TOKEN_SEGMENT_TYPE:
563 tree->lhs = _parseTreeCreate();
564 tree->lhs->token.type = TOKEN_UINT_TYPE;
565 tree->lhs->token.uintValue = token->uintValue;
566 tree->rhs = _parseTreeCreate();
567 tree->token.type = TOKEN_SEGMENT_TYPE;
568 i = _parseExpression(tree->rhs, lv, i + 1, precedence, openParens);
569 if (tree->token.type == TOKEN_ERROR_TYPE) {
570 tree->token.type = TOKEN_ERROR_TYPE;
571 }
572 break;
573 case TOKEN_OPEN_PAREN_TYPE:
574 ++*openParens;
575 i = _parseExpression(tree, lv, i + 1, INT_MAX, openParens);
576 break;
577 case TOKEN_CLOSE_PAREN_TYPE:
578 if (*openParens <= 0) {
579 tree->token.type = TOKEN_ERROR_TYPE;
580 }
581 --*openParens;
582 return i + 1;
583 case TOKEN_OPERATOR_TYPE:
584 newPrecedence = _operatorPrecedence[token->operatorValue];
585 if (newPrecedence < precedence) {
586 newTree = _parseTreeCreate();
587 *newTree = *tree;
588 tree->lhs = newTree;
589 tree->rhs = _parseTreeCreate();
590 tree->token = *token;
591 i = _parseExpression(tree->rhs, lv, i + 1, newPrecedence, openParens);
592 if (tree->token.type == TOKEN_ERROR_TYPE) {
593 tree->token.type = TOKEN_ERROR_TYPE;
594 }
595 } else {
596 return i;
597 }
598 break;
599 case TOKEN_ERROR_TYPE:
600 tree->token.type = TOKEN_ERROR_TYPE;
601 return i + 1;
602 }
603 }
604
605 return i;
606}
607
608void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
609 if (!tree) {
610 return;
611 }
612
613 tree->token.type = TOKEN_ERROR_TYPE;
614 tree->lhs = 0;
615 tree->rhs = 0;
616
617 int openParens = 0;
618 _parseExpression(tree, lv, 0, INT_MAX, &openParens);
619 if (openParens) {
620 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
621 free(tree->token.identifierValue);
622 }
623 tree->token.type = TOKEN_ERROR_TYPE;
624 }
625}
626
627void lexFree(struct LexVector* lv) {
628 size_t i;
629 for (i = 0; i < LexVectorSize(lv); ++i) {
630 struct Token* token = LexVectorGetPointer(lv, i);
631 if (token->type == TOKEN_IDENTIFIER_TYPE) {
632 free(token->identifierValue);
633 }
634 }
635}
636
637void parseFree(struct ParseTree* tree) {
638 if (!tree) {
639 return;
640 }
641
642 if (tree->lhs) {
643 parseFree(tree->lhs);
644 free(tree->lhs);
645 }
646 if (tree->rhs) {
647 parseFree(tree->rhs);
648 free(tree->rhs);
649 }
650
651 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
652 free(tree->token.identifierValue);
653 }
654}
655
656static bool _performOperation(struct mDebugger* debugger, enum Operation operation, int32_t current, int32_t next, int32_t* value, int* segment) {
657 switch (operation) {
658 case OP_ASSIGN:
659 current = next;
660 break;
661 case OP_ADD:
662 current += next;
663 break;
664 case OP_SUBTRACT:
665 current -= next;
666 break;
667 case OP_MULTIPLY:
668 current *= next;
669 break;
670 case OP_DIVIDE:
671 if (next != 0) {
672 current /= next;
673 } else {
674 return false;
675 }
676 break;
677 case OP_MODULO:
678 if (next != 0) {
679 current %= next;
680 } else {
681 return false;
682 }
683 break;
684 case OP_AND:
685 current &= next;
686 break;
687 case OP_OR:
688 current |= next;
689 break;
690 case OP_XOR:
691 current ^= next;
692 break;
693 case OP_LESS:
694 current = current < next;
695 break;
696 case OP_GREATER:
697 current = current > next;
698 break;
699 case OP_EQUAL:
700 current = current == next;
701 break;
702 case OP_NOT_EQUAL:
703 current = current != next;
704 break;
705 case OP_LOGICAL_AND:
706 current = current && next;
707 break;
708 case OP_LOGICAL_OR:
709 current = current || next;
710 break;
711 case OP_LE:
712 current = current <= next;
713 break;
714 case OP_GE:
715 current = current >= next;
716 break;
717 case OP_NEGATE:
718 current = -next;
719 break;
720 case OP_FLIP:
721 current = ~next;
722 break;
723 case OP_NOT:
724 current = !next;
725 break;
726 case OP_SHIFT_L:
727 current <<= next;
728 break;
729 case OP_SHIFT_R:
730 current >>= next;
731 break;
732 case OP_DEREFERENCE:
733 if (*segment < 0) {
734 current = debugger->core->busRead8(debugger->core, next);
735 } else {
736 current = debugger->core->rawRead8(debugger->core, next, *segment);
737 }
738 *segment = -1;
739 break;
740 default:
741 return false;
742 }
743 *value = current;
744 return true;
745}
746
747bool mDebuggerEvaluateParseTree(struct mDebugger* debugger, struct ParseTree* tree, int32_t* value, int* segment) {
748 if (!value) {
749 return false;
750 }
751 int32_t lhs, rhs;
752 switch (tree->token.type) {
753 case TOKEN_UINT_TYPE:
754 if (segment) {
755 *segment = -1;
756 }
757 *value = tree->token.uintValue;
758 return true;
759 case TOKEN_SEGMENT_TYPE:
760 if (!mDebuggerEvaluateParseTree(debugger, tree->rhs, value, segment)) {
761 return false;
762 }
763 return mDebuggerEvaluateParseTree(debugger, tree->lhs, segment, NULL);
764 case TOKEN_OPERATOR_TYPE:
765 switch (tree->token.operatorValue) {
766 case OP_ASSIGN:
767 case OP_ADD:
768 case OP_SUBTRACT:
769 case OP_MULTIPLY:
770 case OP_DIVIDE:
771 case OP_MODULO:
772 case OP_AND:
773 case OP_OR:
774 case OP_XOR:
775 case OP_LESS:
776 case OP_GREATER:
777 case OP_EQUAL:
778 case OP_NOT_EQUAL:
779 case OP_LOGICAL_AND:
780 case OP_LOGICAL_OR:
781 case OP_LE:
782 case OP_GE:
783 case OP_SHIFT_L:
784 case OP_SHIFT_R:
785 if (!mDebuggerEvaluateParseTree(debugger, tree->lhs, &lhs, segment)) {
786 return false;
787 }
788 // Fall through
789 default:
790 if (!mDebuggerEvaluateParseTree(debugger, tree->rhs, &rhs, segment)) {
791 return false;
792 }
793 break;
794 }
795 return _performOperation(debugger, tree->token.operatorValue, lhs, rhs, value, segment);
796 case TOKEN_IDENTIFIER_TYPE:
797 return mDebuggerLookupIdentifier(debugger, tree->token.identifierValue, value, segment);
798 case TOKEN_ERROR_TYPE:
799 default:
800 break;
801 }
802 return false;
803}