src/debugger/parser.c (view raw)
1/* Copyright (c) 2013-2014 Jeffrey Pfau
2 *
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6#include <mgba/internal/debugger/parser.h>
7
8#include <mgba/core/core.h>
9#include <mgba/debugger/debugger.h>
10#include <mgba-util/string.h>
11
12DEFINE_VECTOR(LexVector, struct Token);
13
14enum LexState {
15 LEX_ERROR = -1,
16 LEX_ROOT = 0,
17 LEX_EXPECT_IDENTIFIER,
18 LEX_EXPECT_BINARY_FIRST,
19 LEX_EXPECT_BINARY,
20 LEX_EXPECT_DECIMAL,
21 LEX_EXPECT_HEX_FIRST,
22 LEX_EXPECT_HEX,
23 LEX_EXPECT_PREFIX,
24 LEX_EXPECT_OPERATOR,
25 LEX_EXPECT_OPERATOR2,
26};
27
28static void _lexOperator(struct LexVector* lv, char operator, enum LexState* state) {
29 if (*state == LEX_EXPECT_OPERATOR2) {
30 struct Token* lvNext = LexVectorGetPointer(lv, LexVectorSize(lv) - 1);
31 if (lvNext->type != TOKEN_OPERATOR_TYPE) {
32 lvNext->type = TOKEN_ERROR_TYPE;
33 *state = LEX_ERROR;
34 return;
35 }
36 switch (lvNext->operatorValue) {
37 case OP_AND:
38 if (operator == '&') {
39 lvNext->operatorValue = OP_LOGICAL_AND;
40 *state = LEX_ROOT;
41 return;
42 }
43 break;
44 case OP_OR:
45 if (operator == '|') {
46 lvNext->operatorValue = OP_LOGICAL_OR;
47 *state = LEX_ROOT;
48 return;
49 }
50 break;
51 case OP_LESS:
52 if (operator == '=') {
53 lvNext->operatorValue = OP_LE;
54 *state = LEX_ROOT;
55 return;
56 }
57 if (operator == '<') {
58 lvNext->operatorValue = OP_SHIFT_L;
59 *state = LEX_ROOT;
60 return;
61 }
62 break;
63 case OP_GREATER:
64 if (operator == '=') {
65 lvNext->operatorValue = OP_GE;
66 *state = LEX_ROOT;
67 return;
68 }
69 if (operator == '>') {
70 lvNext->operatorValue = OP_SHIFT_R;
71 *state = LEX_ROOT;
72 return;
73 }
74 break;
75 case OP_ASSIGN:
76 if (operator == '=') {
77 lvNext->operatorValue = OP_EQUAL;
78 *state = LEX_ROOT;
79 return;
80 }
81 break;
82 case OP_NOT:
83 if (operator == '=') {
84 lvNext->operatorValue = OP_NOT_EQUAL;
85 *state = LEX_ROOT;
86 return;
87 }
88 break;
89 default:
90 break;
91 }
92 *state = LEX_ERROR;
93 }
94 if (*state == LEX_ROOT || *state == LEX_ERROR) {
95 struct Token lvNext;
96 lvNext.type = TOKEN_OPERATOR_TYPE;
97 *state = LEX_ROOT;
98 switch (operator) {
99 case '-':
100 lvNext.operatorValue = OP_NEGATE;
101 break;
102 case '~':
103 lvNext.operatorValue = OP_FLIP;
104 break;
105 case '!':
106 lvNext.operatorValue = OP_NOT;
107 break;
108 case '*':
109 lvNext.operatorValue = OP_DEREFERENCE;
110 break;
111 default:
112 lvNext.type = TOKEN_ERROR_TYPE;
113 *state = LEX_ERROR;
114 return;
115 }
116 *LexVectorAppend(lv) = lvNext;
117 return;
118 }
119 struct Token lvNext;
120 lvNext.type = TOKEN_OPERATOR_TYPE;
121 switch (operator) {
122 case '=':
123 lvNext.operatorValue = OP_ASSIGN;
124 break;
125 case '+':
126 lvNext.operatorValue = OP_ADD;
127 break;
128 case '-':
129 lvNext.operatorValue = OP_SUBTRACT;
130 break;
131 case '*':
132 lvNext.operatorValue = OP_MULTIPLY;
133 break;
134 case '/':
135 lvNext.operatorValue = OP_DIVIDE;
136 break;
137 case '%':
138 lvNext.operatorValue = OP_MODULO;
139 break;
140 case '&':
141 lvNext.operatorValue = OP_AND;
142 break;
143 case '|':
144 lvNext.operatorValue = OP_OR;
145 break;
146 case '^':
147 lvNext.operatorValue = OP_XOR;
148 break;
149 case '<':
150 lvNext.operatorValue = OP_LESS;
151 break;
152 case '>':
153 lvNext.operatorValue = OP_GREATER;
154 break;
155 case '!':
156 lvNext.operatorValue = OP_NOT;
157 break;
158 default:
159 *state = LEX_ERROR;
160 return;
161 }
162 *state = LEX_EXPECT_OPERATOR2;
163 *LexVectorAppend(lv) = lvNext;
164}
165
166static void _lexValue(struct LexVector* lv, char token, uint32_t next, enum LexState* state) {
167 struct Token* lvNext;
168
169 switch (token) {
170 case '=':
171 case '+':
172 case '-':
173 case '*':
174 case '/':
175 case '%':
176 case '&':
177 case '|':
178 case '^':
179 case '<':
180 case '>':
181 case '!':
182 lvNext = LexVectorAppend(lv);
183 lvNext->type = TOKEN_UINT_TYPE;
184 lvNext->uintValue = next;
185 _lexOperator(lv, token, state);
186 break;
187 case ')':
188 lvNext = LexVectorAppend(lv);
189 lvNext->type = TOKEN_UINT_TYPE;
190 lvNext->uintValue = next;
191 lvNext = LexVectorAppend(lv);
192 lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
193 *state = LEX_EXPECT_OPERATOR;
194 break;
195 case ' ':
196 case '\t':
197 lvNext = LexVectorAppend(lv);
198 lvNext->type = TOKEN_UINT_TYPE;
199 lvNext->uintValue = next;
200 *state = LEX_EXPECT_OPERATOR;
201 break;
202 default:
203 *state = LEX_ERROR;
204 break;
205 }
206}
207
208size_t lexExpression(struct LexVector* lv, const char* string, size_t length, const char* eol) {
209 if (!string || length < 1) {
210 return 0;
211 }
212
213 uint32_t next = 0;
214 size_t adjusted = 0;
215
216 enum LexState state = LEX_ROOT;
217 const char* tokenStart = 0;
218 struct Token* lvNext;
219
220 if (!eol) {
221 eol = " \r\n";
222 }
223
224 while (length > 0 && string[0] && !strchr(eol, string[0]) && state != LEX_ERROR) {
225 char token = string[0];
226 ++string;
227 ++adjusted;
228 --length;
229 switch (state) {
230 case LEX_EXPECT_OPERATOR2:
231 switch (token) {
232 case '&':
233 case '|':
234 case '=':
235 case '<':
236 case '>':
237 _lexOperator(lv, token, &state);
238 break;
239 }
240 if (state != LEX_EXPECT_OPERATOR2) {
241 break;
242 }
243 // Fall through
244 case LEX_ROOT:
245 tokenStart = string - 1;
246 switch (token) {
247 case '1':
248 case '2':
249 case '3':
250 case '4':
251 case '5':
252 case '6':
253 case '7':
254 case '8':
255 case '9':
256 state = LEX_EXPECT_DECIMAL;
257 next = token - '0';
258 break;
259 case '0':
260 state = LEX_EXPECT_PREFIX;
261 next = 0;
262 break;
263 case '$':
264 state = LEX_EXPECT_HEX_FIRST;
265 next = 0;
266 break;
267 case '%':
268 state = LEX_EXPECT_BINARY_FIRST;
269 next = 0;
270 break;
271 case '(':
272 state = LEX_ROOT;
273 lvNext = LexVectorAppend(lv);
274 lvNext->type = TOKEN_OPEN_PAREN_TYPE;
275 break;
276 case '!':
277 case '-':
278 case '~':
279 case '*':
280 _lexOperator(lv, token, &state);
281 break;
282 case ' ':
283 case '\t':
284 break;
285 default:
286 if (tolower(token) >= 'a' && tolower(token <= 'z')) {
287 state = LEX_EXPECT_IDENTIFIER;
288 } else {
289 state = LEX_ERROR;
290 }
291 break;
292 };
293 break;
294 case LEX_EXPECT_IDENTIFIER:
295 switch (token) {
296 case '=':
297 case '+':
298 case '-':
299 case '*':
300 case '/':
301 case '%':
302 case '&':
303 case '|':
304 case '^':
305 case '<':
306 case '>':
307 case '!':
308 lvNext = LexVectorAppend(lv);
309 lvNext->type = TOKEN_IDENTIFIER_TYPE;
310 lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
311 _lexOperator(lv, token, &state);
312 break;
313 case ')':
314 lvNext = LexVectorAppend(lv);
315 lvNext->type = TOKEN_IDENTIFIER_TYPE;
316 lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
317 lvNext = LexVectorAppend(lv);
318 lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
319 state = LEX_EXPECT_OPERATOR;
320 break;
321 case ' ':
322 case '\t':
323 lvNext = LexVectorAppend(lv);
324 lvNext->type = TOKEN_IDENTIFIER_TYPE;
325 lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
326 state = LEX_EXPECT_OPERATOR;
327 break;
328 default:
329 break;
330 }
331 break;
332 case LEX_EXPECT_BINARY_FIRST:
333 state = LEX_EXPECT_BINARY;
334 // Fall through
335 case LEX_EXPECT_BINARY:
336 switch (token) {
337 case '0':
338 case '1':
339 // TODO: handle overflow
340 next <<= 1;
341 next += token - '0';
342 break;
343 default:
344 _lexValue(lv, token, next, &state);
345 break;
346 }
347 break;
348 case LEX_EXPECT_DECIMAL:
349 switch (token) {
350 case '0':
351 case '1':
352 case '2':
353 case '3':
354 case '4':
355 case '5':
356 case '6':
357 case '7':
358 case '8':
359 case '9':
360 // TODO: handle overflow
361 next *= 10;
362 next += token - '0';
363 break;
364 default:
365 _lexValue(lv, token, next, &state);
366 break;
367 }
368 break;
369 case LEX_EXPECT_HEX_FIRST:
370 state = LEX_EXPECT_HEX;
371 // Fall through
372 case LEX_EXPECT_HEX:
373 switch (token) {
374 case '0':
375 case '1':
376 case '2':
377 case '3':
378 case '4':
379 case '5':
380 case '6':
381 case '7':
382 case '8':
383 case '9':
384 // TODO: handle overflow
385 next *= 16;
386 next += token - '0';
387 break;
388 case 'A':
389 case 'B':
390 case 'C':
391 case 'D':
392 case 'E':
393 case 'F':
394 // TODO: handle overflow
395 next *= 16;
396 next += token - 'A' + 10;
397 break;
398 case 'a':
399 case 'b':
400 case 'c':
401 case 'd':
402 case 'e':
403 case 'f':
404 // TODO: handle overflow
405 next *= 16;
406 next += token - 'a' + 10;
407 break;
408 case ':':
409 lvNext = LexVectorAppend(lv);
410 lvNext->type = TOKEN_SEGMENT_TYPE;
411 lvNext->uintValue = next;
412 next = 0;
413 break;
414 default:
415 _lexValue(lv, token, next, &state);
416 break;
417 }
418 break;
419 case LEX_EXPECT_PREFIX:
420 switch (token) {
421 case 'X':
422 case 'x':
423 next = 0;
424 state = LEX_EXPECT_HEX_FIRST;
425 break;
426 case 'B':
427 case 'b':
428 next = 0;
429 state = LEX_EXPECT_BINARY_FIRST;
430 break;
431 case '0':
432 case '1':
433 case '2':
434 case '3':
435 case '4':
436 case '5':
437 case '6':
438 case '7':
439 case '8':
440 case '9':
441 next = token - '0';
442 state = LEX_EXPECT_DECIMAL;
443 break;
444 default:
445 _lexValue(lv, token, next, &state);
446 break;
447 }
448 break;
449 case LEX_EXPECT_OPERATOR:
450 switch (token) {
451 case '=':
452 case '+':
453 case '-':
454 case '*':
455 case '/':
456 case '%':
457 case '&':
458 case '|':
459 case '^':
460 case '<':
461 case '>':
462 case '!':
463 _lexOperator(lv, token, &state);
464 break;
465 case ')':
466 lvNext = LexVectorAppend(lv);
467 lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
468 break;
469 case ' ':
470 case '\t':
471 break;
472 default:
473 state = LEX_ERROR;
474 }
475 break;
476 case LEX_ERROR:
477 // This shouldn't be reached
478 break;
479 }
480 }
481
482 switch (state) {
483 case LEX_EXPECT_BINARY:
484 case LEX_EXPECT_DECIMAL:
485 case LEX_EXPECT_HEX:
486 case LEX_EXPECT_PREFIX:
487 lvNext = LexVectorAppend(lv);
488 lvNext->type = TOKEN_UINT_TYPE;
489 lvNext->uintValue = next;
490 break;
491 case LEX_EXPECT_IDENTIFIER:
492 lvNext = LexVectorAppend(lv);
493 lvNext->type = TOKEN_IDENTIFIER_TYPE;
494 lvNext->identifierValue = strndup(tokenStart, string - tokenStart);
495 break;
496 case LEX_ROOT:
497 case LEX_EXPECT_OPERATOR:
498 case LEX_EXPECT_OPERATOR2:
499 break;
500 case LEX_EXPECT_BINARY_FIRST:
501 case LEX_EXPECT_HEX_FIRST:
502 case LEX_ERROR:
503 default:
504 lvNext = LexVectorAppend(lv);
505 lvNext->type = TOKEN_ERROR_TYPE;
506 break;
507 }
508 return adjusted;
509}
510
511static const int _operatorPrecedence[] = {
512 [OP_ASSIGN] = 14,
513 [OP_ADD] = 4,
514 [OP_SUBTRACT] = 4,
515 [OP_MULTIPLY] = 3,
516 [OP_DIVIDE] = 3,
517 [OP_MODULO] = 3,
518 [OP_AND] = 8,
519 [OP_OR] = 10,
520 [OP_XOR] = 9,
521 [OP_LESS] = 6,
522 [OP_GREATER] = 6,
523 [OP_EQUAL] = 7,
524 [OP_NOT_EQUAL] = 7,
525 [OP_LE] = 6,
526 [OP_GE] = 6,
527 [OP_LOGICAL_AND] = 11,
528 [OP_LOGICAL_OR] = 12,
529 [OP_NEGATE] = 2,
530 [OP_FLIP] = 2,
531 [OP_NOT] = 2,
532 [OP_SHIFT_L] = 5,
533 [OP_SHIFT_R] = 5,
534 [OP_DEREFERENCE] = 2,
535};
536
537static struct ParseTree* _parseTreeCreate() {
538 struct ParseTree* tree = malloc(sizeof(struct ParseTree));
539 tree->token.type = TOKEN_ERROR_TYPE;
540 tree->rhs = 0;
541 tree->lhs = 0;
542 return tree;
543}
544
545static size_t _parseExpression(struct ParseTree* tree, struct LexVector* lv, size_t i, int precedence, int* openParens) {
546 struct ParseTree* newTree = 0;
547 while (i < LexVectorSize(lv)) {
548 struct Token* token = LexVectorGetPointer(lv, i);
549 int newPrecedence;
550 switch (token->type) {
551 case TOKEN_IDENTIFIER_TYPE:
552 case TOKEN_UINT_TYPE:
553 if (tree->token.type == TOKEN_ERROR_TYPE) {
554 tree->token = *token;
555 if (token->type == TOKEN_IDENTIFIER_TYPE) {
556 tree->token.identifierValue = strdup(token->identifierValue);
557 }
558 ++i;
559 } else {
560 tree->token.type = TOKEN_ERROR_TYPE;
561 return i + 1;
562 }
563 break;
564 case TOKEN_SEGMENT_TYPE:
565 tree->lhs = _parseTreeCreate();
566 tree->lhs->token.type = TOKEN_UINT_TYPE;
567 tree->lhs->token.uintValue = token->uintValue;
568 tree->rhs = _parseTreeCreate();
569 tree->token.type = TOKEN_SEGMENT_TYPE;
570 i = _parseExpression(tree->rhs, lv, i + 1, precedence, openParens);
571 if (tree->token.type == TOKEN_ERROR_TYPE) {
572 tree->token.type = TOKEN_ERROR_TYPE;
573 }
574 break;
575 case TOKEN_OPEN_PAREN_TYPE:
576 ++*openParens;
577 i = _parseExpression(tree, lv, i + 1, INT_MAX, openParens);
578 break;
579 case TOKEN_CLOSE_PAREN_TYPE:
580 if (*openParens <= 0) {
581 tree->token.type = TOKEN_ERROR_TYPE;
582 }
583 --*openParens;
584 return i + 1;
585 case TOKEN_OPERATOR_TYPE:
586 newPrecedence = _operatorPrecedence[token->operatorValue];
587 if (newPrecedence < precedence) {
588 newTree = _parseTreeCreate();
589 *newTree = *tree;
590 tree->lhs = newTree;
591 tree->rhs = _parseTreeCreate();
592 tree->token = *token;
593 i = _parseExpression(tree->rhs, lv, i + 1, newPrecedence, openParens);
594 if (tree->token.type == TOKEN_ERROR_TYPE) {
595 tree->token.type = TOKEN_ERROR_TYPE;
596 }
597 } else {
598 return i;
599 }
600 break;
601 case TOKEN_ERROR_TYPE:
602 tree->token.type = TOKEN_ERROR_TYPE;
603 return i + 1;
604 }
605 }
606
607 return i;
608}
609
610void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
611 if (!tree) {
612 return;
613 }
614
615 tree->token.type = TOKEN_ERROR_TYPE;
616 tree->lhs = 0;
617 tree->rhs = 0;
618
619 int openParens = 0;
620 _parseExpression(tree, lv, 0, INT_MAX, &openParens);
621 if (openParens) {
622 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
623 free(tree->token.identifierValue);
624 }
625 tree->token.type = TOKEN_ERROR_TYPE;
626 }
627}
628
629void lexFree(struct LexVector* lv) {
630 size_t i;
631 for (i = 0; i < LexVectorSize(lv); ++i) {
632 struct Token* token = LexVectorGetPointer(lv, i);
633 if (token->type == TOKEN_IDENTIFIER_TYPE) {
634 free(token->identifierValue);
635 }
636 }
637}
638
639void parseFree(struct ParseTree* tree) {
640 if (!tree) {
641 return;
642 }
643
644 if (tree->lhs) {
645 parseFree(tree->lhs);
646 free(tree->lhs);
647 }
648 if (tree->rhs) {
649 parseFree(tree->rhs);
650 free(tree->rhs);
651 }
652
653 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
654 free(tree->token.identifierValue);
655 }
656}
657
658static bool _performOperation(struct mDebugger* debugger, enum Operation operation, int32_t current, int32_t next, int32_t* value, int* segment) {
659 switch (operation) {
660 case OP_ASSIGN:
661 current = next;
662 break;
663 case OP_ADD:
664 current += next;
665 break;
666 case OP_SUBTRACT:
667 current -= next;
668 break;
669 case OP_MULTIPLY:
670 current *= next;
671 break;
672 case OP_DIVIDE:
673 if (next != 0) {
674 current /= next;
675 } else {
676 return false;
677 }
678 break;
679 case OP_MODULO:
680 if (next != 0) {
681 current %= next;
682 } else {
683 return false;
684 }
685 break;
686 case OP_AND:
687 current &= next;
688 break;
689 case OP_OR:
690 current |= next;
691 break;
692 case OP_XOR:
693 current ^= next;
694 break;
695 case OP_LESS:
696 current = current < next;
697 break;
698 case OP_GREATER:
699 current = current > next;
700 break;
701 case OP_EQUAL:
702 current = current == next;
703 break;
704 case OP_NOT_EQUAL:
705 current = current != next;
706 break;
707 case OP_LOGICAL_AND:
708 current = current && next;
709 break;
710 case OP_LOGICAL_OR:
711 current = current || next;
712 break;
713 case OP_LE:
714 current = current <= next;
715 break;
716 case OP_GE:
717 current = current >= next;
718 break;
719 case OP_NEGATE:
720 current = -next;
721 break;
722 case OP_FLIP:
723 current = ~next;
724 break;
725 case OP_NOT:
726 current = !next;
727 break;
728 case OP_SHIFT_L:
729 current <<= next;
730 break;
731 case OP_SHIFT_R:
732 current >>= next;
733 break;
734 case OP_DEREFERENCE:
735 if (*segment < 0) {
736 current = debugger->core->busRead8(debugger->core, next);
737 } else {
738 current = debugger->core->rawRead8(debugger->core, next, *segment);
739 }
740 *segment = -1;
741 break;
742 default:
743 return false;
744 }
745 *value = current;
746 return true;
747}
748
749bool mDebuggerEvaluateParseTree(struct mDebugger* debugger, struct ParseTree* tree, int32_t* value, int* segment) {
750 if (!value) {
751 return false;
752 }
753 int32_t lhs, rhs;
754 switch (tree->token.type) {
755 case TOKEN_UINT_TYPE:
756 if (segment) {
757 *segment = -1;
758 }
759 *value = tree->token.uintValue;
760 return true;
761 case TOKEN_SEGMENT_TYPE:
762 if (!mDebuggerEvaluateParseTree(debugger, tree->rhs, value, segment)) {
763 return false;
764 }
765 return mDebuggerEvaluateParseTree(debugger, tree->lhs, segment, NULL);
766 case TOKEN_OPERATOR_TYPE:
767 switch (tree->token.operatorValue) {
768 case OP_ASSIGN:
769 case OP_ADD:
770 case OP_SUBTRACT:
771 case OP_MULTIPLY:
772 case OP_DIVIDE:
773 case OP_MODULO:
774 case OP_AND:
775 case OP_OR:
776 case OP_XOR:
777 case OP_LESS:
778 case OP_GREATER:
779 case OP_EQUAL:
780 case OP_NOT_EQUAL:
781 case OP_LOGICAL_AND:
782 case OP_LOGICAL_OR:
783 case OP_LE:
784 case OP_GE:
785 case OP_SHIFT_L:
786 case OP_SHIFT_R:
787 if (!mDebuggerEvaluateParseTree(debugger, tree->lhs, &lhs, segment)) {
788 return false;
789 }
790 // Fall through
791 default:
792 if (!mDebuggerEvaluateParseTree(debugger, tree->rhs, &rhs, segment)) {
793 return false;
794 }
795 break;
796 }
797 return _performOperation(debugger, tree->token.operatorValue, lhs, rhs, value, segment);
798 case TOKEN_IDENTIFIER_TYPE:
799 return mDebuggerLookupIdentifier(debugger, tree->token.identifierValue, value, segment);
800 case TOKEN_ERROR_TYPE:
801 default:
802 break;
803 }
804 return false;
805}