src/debugger/parser.c (view raw)
1/* Copyright (c) 2013-2014 Jeffrey Pfau
2 *
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6#include <mgba/internal/debugger/parser.h>
7
8#include <mgba/core/core.h>
9#include <mgba/debugger/debugger.h>
10#include <mgba-util/string.h>
11
12DEFINE_VECTOR(LexVector, struct Token);
13
14enum LexState {
15 LEX_ERROR = -1,
16 LEX_ROOT = 0,
17 LEX_EXPECT_IDENTIFIER,
18 LEX_EXPECT_BINARY_FIRST,
19 LEX_EXPECT_BINARY,
20 LEX_EXPECT_DECIMAL,
21 LEX_EXPECT_HEX_FIRST,
22 LEX_EXPECT_HEX,
23 LEX_EXPECT_PREFIX,
24 LEX_EXPECT_OPERATOR2,
25};
26
27static void _lexOperator(struct LexVector* lv, char operator, enum LexState* state) {
28 if (*state == LEX_EXPECT_OPERATOR2) {
29 struct Token* lvNext = LexVectorGetPointer(lv, LexVectorSize(lv) - 1);
30 if (lvNext->type != TOKEN_OPERATOR_TYPE) {
31 lvNext->type = TOKEN_ERROR_TYPE;
32 *state = LEX_ERROR;
33 return;
34 }
35 switch (lvNext->operatorValue) {
36 case OP_AND:
37 if (operator == '&') {
38 lvNext->operatorValue = OP_LOGICAL_AND;
39 *state = LEX_ROOT;
40 return;
41 }
42 break;
43 case OP_OR:
44 if (operator == '|') {
45 lvNext->operatorValue = OP_LOGICAL_OR;
46 *state = LEX_ROOT;
47 return;
48 }
49 break;
50 case OP_LESS:
51 if (operator == '=') {
52 lvNext->operatorValue = OP_LE;
53 *state = LEX_ROOT;
54 return;
55 }
56 if (operator == '<') {
57 lvNext->operatorValue = OP_SHIFT_L;
58 *state = LEX_ROOT;
59 return;
60 }
61 break;
62 case OP_GREATER:
63 if (operator == '=') {
64 lvNext->operatorValue = OP_GE;
65 *state = LEX_ROOT;
66 return;
67 }
68 if (operator == '>') {
69 lvNext->operatorValue = OP_SHIFT_R;
70 *state = LEX_ROOT;
71 return;
72 }
73 break;
74 case OP_ASSIGN:
75 if (operator == '=') {
76 lvNext->operatorValue = OP_EQUAL;
77 *state = LEX_ROOT;
78 return;
79 }
80 break;
81 case OP_NOT:
82 if (operator == '=') {
83 lvNext->operatorValue = OP_NOT_EQUAL;
84 *state = LEX_ROOT;
85 return;
86 }
87 break;
88 default:
89 break;
90 }
91 *state = LEX_ERROR;
92 }
93 struct Token lvNext;
94 lvNext.type = TOKEN_OPERATOR_TYPE;
95 switch (operator) {
96 case '=':
97 lvNext.operatorValue = OP_ASSIGN;
98 break;
99 case '+':
100 lvNext.operatorValue = OP_ADD;
101 break;
102 case '-':
103 lvNext.operatorValue = OP_SUBTRACT;
104 break;
105 case '*':
106 lvNext.operatorValue = OP_MULTIPLY;
107 break;
108 case '/':
109 lvNext.operatorValue = OP_DIVIDE;
110 break;
111 case '%':
112 lvNext.operatorValue = OP_MODULO;
113 break;
114 case '&':
115 lvNext.operatorValue = OP_AND;
116 break;
117 case '|':
118 lvNext.operatorValue = OP_OR;
119 break;
120 case '^':
121 lvNext.operatorValue = OP_XOR;
122 break;
123 case '<':
124 lvNext.operatorValue = OP_LESS;
125 break;
126 case '>':
127 lvNext.operatorValue = OP_GREATER;
128 break;
129 case '!':
130 lvNext.operatorValue = OP_NOT;
131 break;
132 case '~':
133 lvNext.operatorValue = OP_FLIP;
134 break;
135 default:
136 *state = LEX_ERROR;
137 return;
138 }
139 *state = LEX_EXPECT_OPERATOR2;
140 *LexVectorAppend(lv) = lvNext;
141}
142
143static void _lexValue(struct LexVector* lv, char token, uint32_t next, enum LexState* state) {
144 struct Token* lvNext;
145
146 switch (token) {
147 case '=':
148 case '+':
149 case '-':
150 case '*':
151 case '/':
152 case '%':
153 case '&':
154 case '|':
155 case '^':
156 case '<':
157 case '>':
158 case '!':
159 lvNext = LexVectorAppend(lv);
160 lvNext->type = TOKEN_UINT_TYPE;
161 lvNext->uintValue = next;
162 _lexOperator(lv, token, state);
163 break;
164 case ')':
165 lvNext = LexVectorAppend(lv);
166 lvNext->type = TOKEN_UINT_TYPE;
167 lvNext->uintValue = next;
168 lvNext = LexVectorAppend(lv);
169 lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
170 *state = LEX_ROOT;
171 break;
172 case ' ':
173 case '\t':
174 lvNext = LexVectorAppend(lv);
175 lvNext->type = TOKEN_UINT_TYPE;
176 lvNext->uintValue = next;
177 *state = LEX_ROOT;
178 break;
179 default:
180 *state = LEX_ERROR;
181 break;
182 }
183}
184
185size_t lexExpression(struct LexVector* lv, const char* string, size_t length, const char* eol) {
186 if (!string || length < 1) {
187 return 0;
188 }
189
190 uint32_t next = 0;
191 size_t adjusted = 0;
192
193 enum LexState state = LEX_ROOT;
194 const char* tokenStart = 0;
195 struct Token* lvNext;
196
197 if (!eol) {
198 eol = " \r\n";
199 }
200
201 while (length > 0 && string[0] && !strchr(eol, string[0]) && state != LEX_ERROR) {
202 char token = string[0];
203 ++string;
204 ++adjusted;
205 --length;
206 switch (state) {
207 case LEX_EXPECT_OPERATOR2:
208 switch (token) {
209 case '&':
210 case '|':
211 case '=':
212 case '<':
213 case '>':
214 _lexOperator(lv, token, &state);
215 break;
216 }
217 if (state != LEX_EXPECT_OPERATOR2) {
218 break;
219 }
220 // Fall through
221 case LEX_ROOT:
222 tokenStart = string - 1;
223 switch (token) {
224 case '1':
225 case '2':
226 case '3':
227 case '4':
228 case '5':
229 case '6':
230 case '7':
231 case '8':
232 case '9':
233 state = LEX_EXPECT_DECIMAL;
234 next = token - '0';
235 break;
236 case '0':
237 state = LEX_EXPECT_PREFIX;
238 next = 0;
239 break;
240 case '$':
241 state = LEX_EXPECT_HEX_FIRST;
242 next = 0;
243 break;
244 case '(':
245 state = LEX_ROOT;
246 lvNext = LexVectorAppend(lv);
247 lvNext->type = TOKEN_OPEN_PAREN_TYPE;
248 break;
249 case '=':
250 case '+':
251 case '-':
252 case '*':
253 case '/':
254 case '%':
255 case '&':
256 case '|':
257 case '^':
258 case '<':
259 case '>':
260 case '!':
261 case '~':
262 _lexOperator(lv, token, &state);
263 break;
264 case ')':
265 lvNext = LexVectorAppend(lv);
266 lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
267 break;
268 case ' ':
269 case '\t':
270 break;
271 default:
272 if (tolower(token) >= 'a' && tolower(token <= 'z')) {
273 state = LEX_EXPECT_IDENTIFIER;
274 } else {
275 state = LEX_ERROR;
276 }
277 break;
278 };
279 break;
280 case LEX_EXPECT_IDENTIFIER:
281 switch (token) {
282 case '=':
283 case '+':
284 case '-':
285 case '*':
286 case '/':
287 case '%':
288 case '&':
289 case '|':
290 case '^':
291 case '<':
292 case '>':
293 case '!':
294 case '~':
295 lvNext = LexVectorAppend(lv);
296 lvNext->type = TOKEN_IDENTIFIER_TYPE;
297 lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
298 _lexOperator(lv, token, &state);
299 break;
300 case ')':
301 lvNext = LexVectorAppend(lv);
302 lvNext->type = TOKEN_IDENTIFIER_TYPE;
303 lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
304 lvNext = LexVectorAppend(lv);
305 lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
306 state = LEX_ROOT;
307 break;
308 case ' ':
309 case '\t':
310 lvNext = LexVectorAppend(lv);
311 lvNext->type = TOKEN_IDENTIFIER_TYPE;
312 lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
313 state = LEX_ROOT;
314 break;
315 default:
316 break;
317 }
318 break;
319 case LEX_EXPECT_BINARY_FIRST:
320 state = LEX_EXPECT_BINARY;
321 // Fall through
322 case LEX_EXPECT_BINARY:
323 switch (token) {
324 case '0':
325 case '1':
326 // TODO: handle overflow
327 next <<= 1;
328 next += token - '0';
329 break;
330 default:
331 _lexValue(lv, token, next, &state);
332 break;
333 }
334 break;
335 case LEX_EXPECT_DECIMAL:
336 switch (token) {
337 case '0':
338 case '1':
339 case '2':
340 case '3':
341 case '4':
342 case '5':
343 case '6':
344 case '7':
345 case '8':
346 case '9':
347 // TODO: handle overflow
348 next *= 10;
349 next += token - '0';
350 break;
351 default:
352 _lexValue(lv, token, next, &state);
353 break;
354 }
355 break;
356 case LEX_EXPECT_HEX_FIRST:
357 state = LEX_EXPECT_HEX;
358 // Fall through
359 case LEX_EXPECT_HEX:
360 switch (token) {
361 case '0':
362 case '1':
363 case '2':
364 case '3':
365 case '4':
366 case '5':
367 case '6':
368 case '7':
369 case '8':
370 case '9':
371 // TODO: handle overflow
372 next *= 16;
373 next += token - '0';
374 break;
375 case 'A':
376 case 'B':
377 case 'C':
378 case 'D':
379 case 'E':
380 case 'F':
381 // TODO: handle overflow
382 next *= 16;
383 next += token - 'A' + 10;
384 break;
385 case 'a':
386 case 'b':
387 case 'c':
388 case 'd':
389 case 'e':
390 case 'f':
391 // TODO: handle overflow
392 next *= 16;
393 next += token - 'a' + 10;
394 break;
395 case ':':
396 lvNext = LexVectorAppend(lv);
397 lvNext->type = TOKEN_SEGMENT_TYPE;
398 lvNext->uintValue = next;
399 next = 0;
400 break;
401 default:
402 _lexValue(lv, token, next, &state);
403 break;
404 }
405 break;
406 case LEX_EXPECT_PREFIX:
407 switch (token) {
408 case 'X':
409 case 'x':
410 next = 0;
411 state = LEX_EXPECT_HEX_FIRST;
412 break;
413 case 'B':
414 case 'b':
415 next = 0;
416 state = LEX_EXPECT_BINARY_FIRST;
417 break;
418 case '0':
419 case '1':
420 case '2':
421 case '3':
422 case '4':
423 case '5':
424 case '6':
425 case '7':
426 case '8':
427 case '9':
428 next = token - '0';
429 state = LEX_EXPECT_DECIMAL;
430 break;
431 default:
432 _lexValue(lv, token, next, &state);
433 break;
434 }
435 break;
436 case LEX_ERROR:
437 // This shouldn't be reached
438 break;
439 }
440 }
441
442 switch (state) {
443 case LEX_EXPECT_BINARY:
444 case LEX_EXPECT_DECIMAL:
445 case LEX_EXPECT_HEX:
446 case LEX_EXPECT_PREFIX:
447 lvNext = LexVectorAppend(lv);
448 lvNext->type = TOKEN_UINT_TYPE;
449 lvNext->uintValue = next;
450 break;
451 case LEX_EXPECT_IDENTIFIER:
452 lvNext = LexVectorAppend(lv);
453 lvNext->type = TOKEN_IDENTIFIER_TYPE;
454 lvNext->identifierValue = strndup(tokenStart, string - tokenStart);
455 break;
456 case LEX_ROOT:
457 case LEX_EXPECT_OPERATOR2:
458 break;
459 case LEX_EXPECT_BINARY_FIRST:
460 case LEX_EXPECT_HEX_FIRST:
461 case LEX_ERROR:
462 default:
463 lvNext = LexVectorAppend(lv);
464 lvNext->type = TOKEN_ERROR_TYPE;
465 break;
466 }
467 return adjusted;
468}
469
470static const int _operatorPrecedence[] = {
471 [OP_ASSIGN] = 14,
472 [OP_ADD] = 4,
473 [OP_SUBTRACT] = 4,
474 [OP_MULTIPLY] = 3,
475 [OP_DIVIDE] = 3,
476 [OP_MODULO] = 3,
477 [OP_AND] = 8,
478 [OP_OR] = 10,
479 [OP_XOR] = 9,
480 [OP_LESS] = 6,
481 [OP_GREATER] = 6,
482 [OP_EQUAL] = 7,
483 [OP_NOT_EQUAL] = 7,
484 [OP_LE] = 6,
485 [OP_GE] = 6,
486 [OP_LOGICAL_AND] = 11,
487 [OP_LOGICAL_OR] = 12,
488 [OP_NEGATE] = 2,
489 [OP_FLIP] = 2,
490 [OP_NOT] = 2,
491 [OP_SHIFT_L] = 5,
492 [OP_SHIFT_R] = 5,
493 [OP_DEREFERENCE] = 2,
494};
495
496static struct ParseTree* _parseTreeCreate() {
497 struct ParseTree* tree = malloc(sizeof(struct ParseTree));
498 tree->token.type = TOKEN_ERROR_TYPE;
499 tree->rhs = 0;
500 tree->lhs = 0;
501 return tree;
502}
503
504static size_t _parseExpression(struct ParseTree* tree, struct LexVector* lv, size_t i, int precedence, int* openParens) {
505 struct ParseTree* newTree = 0;
506 while (i < LexVectorSize(lv)) {
507 struct Token* token = LexVectorGetPointer(lv, i);
508 int newPrecedence;
509 switch (token->type) {
510 case TOKEN_IDENTIFIER_TYPE:
511 case TOKEN_UINT_TYPE:
512 if (tree->token.type == TOKEN_ERROR_TYPE) {
513 tree->token = *token;
514 if (token->type == TOKEN_IDENTIFIER_TYPE) {
515 tree->token.identifierValue = strdup(token->identifierValue);
516 }
517 ++i;
518 } else {
519 tree->token.type = TOKEN_ERROR_TYPE;
520 return i + 1;
521 }
522 break;
523 case TOKEN_SEGMENT_TYPE:
524 tree->lhs = _parseTreeCreate();
525 tree->lhs->token.type = TOKEN_UINT_TYPE;
526 tree->lhs->token.uintValue = token->uintValue;
527 tree->rhs = _parseTreeCreate();
528 tree->token.type = TOKEN_SEGMENT_TYPE;
529 i = _parseExpression(tree->rhs, lv, i + 1, precedence, openParens);
530 break;
531 case TOKEN_OPEN_PAREN_TYPE:
532 ++*openParens;
533 i = _parseExpression(tree, lv, i + 1, INT_MAX, openParens);
534 break;
535 case TOKEN_CLOSE_PAREN_TYPE:
536 if (*openParens <= 0) {
537 tree->token.type = TOKEN_ERROR_TYPE;
538 }
539 --*openParens;
540 return i + 1;
541 case TOKEN_OPERATOR_TYPE:
542 if (tree->token.type == TOKEN_ERROR_TYPE) {
543 switch (token->operatorValue) {
544 case OP_SUBTRACT:
545 token->operatorValue = OP_NEGATE;
546 break;
547 case OP_MULTIPLY:
548 token->operatorValue = OP_DEREFERENCE;
549 break;
550 case OP_NOT:
551 case OP_FLIP:
552 break;
553 default:
554 break;
555 }
556 }
557 newPrecedence = _operatorPrecedence[token->operatorValue];
558 if (newPrecedence < precedence) {
559 newTree = _parseTreeCreate();
560 *newTree = *tree;
561 tree->lhs = newTree;
562 tree->rhs = _parseTreeCreate();
563 tree->token = *token;
564 i = _parseExpression(tree->rhs, lv, i + 1, newPrecedence, openParens);
565 if (tree->token.type == TOKEN_ERROR_TYPE) {
566 tree->token.type = TOKEN_ERROR_TYPE;
567 }
568 } else {
569 return i;
570 }
571 break;
572 case TOKEN_ERROR_TYPE:
573 tree->token.type = TOKEN_ERROR_TYPE;
574 return i + 1;
575 }
576 }
577
578 return i;
579}
580
581void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
582 if (!tree) {
583 return;
584 }
585
586 tree->token.type = TOKEN_ERROR_TYPE;
587 tree->lhs = 0;
588 tree->rhs = 0;
589
590 int openParens = 0;
591 _parseExpression(tree, lv, 0, INT_MAX, &openParens);
592 if (openParens) {
593 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
594 free(tree->token.identifierValue);
595 }
596 tree->token.type = TOKEN_ERROR_TYPE;
597 }
598}
599
600void lexFree(struct LexVector* lv) {
601 size_t i;
602 for (i = 0; i < LexVectorSize(lv); ++i) {
603 struct Token* token = LexVectorGetPointer(lv, i);
604 if (token->type == TOKEN_IDENTIFIER_TYPE) {
605 free(token->identifierValue);
606 }
607 }
608}
609
610void parseFree(struct ParseTree* tree) {
611 if (!tree) {
612 return;
613 }
614
615 if (tree->lhs) {
616 parseFree(tree->lhs);
617 free(tree->lhs);
618 }
619 if (tree->rhs) {
620 parseFree(tree->rhs);
621 free(tree->rhs);
622 }
623
624 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
625 free(tree->token.identifierValue);
626 }
627}
628
629static bool _performOperation(struct mDebugger* debugger, enum Operation operation, int32_t current, int32_t next, int32_t* value, int* segment) {
630 switch (operation) {
631 case OP_ASSIGN:
632 current = next;
633 break;
634 case OP_ADD:
635 current += next;
636 break;
637 case OP_SUBTRACT:
638 current -= next;
639 break;
640 case OP_MULTIPLY:
641 current *= next;
642 break;
643 case OP_DIVIDE:
644 if (next != 0) {
645 current /= next;
646 } else {
647 return false;
648 }
649 break;
650 case OP_MODULO:
651 if (next != 0) {
652 current %= next;
653 } else {
654 return false;
655 }
656 break;
657 case OP_AND:
658 current &= next;
659 break;
660 case OP_OR:
661 current |= next;
662 break;
663 case OP_XOR:
664 current ^= next;
665 break;
666 case OP_LESS:
667 current = current < next;
668 break;
669 case OP_GREATER:
670 current = current > next;
671 break;
672 case OP_EQUAL:
673 current = current == next;
674 break;
675 case OP_NOT_EQUAL:
676 current = current != next;
677 break;
678 case OP_LOGICAL_AND:
679 current = current && next;
680 break;
681 case OP_LOGICAL_OR:
682 current = current || next;
683 break;
684 case OP_LE:
685 current = current <= next;
686 break;
687 case OP_GE:
688 current = current >= next;
689 break;
690 case OP_NEGATE:
691 current = -next;
692 break;
693 case OP_FLIP:
694 current = ~next;
695 break;
696 case OP_NOT:
697 current = !next;
698 break;
699 case OP_SHIFT_L:
700 current <<= next;
701 break;
702 case OP_SHIFT_R:
703 current >>= next;
704 break;
705 case OP_DEREFERENCE:
706 if (*segment < 0) {
707 current = debugger->core->busRead8(debugger->core, next);
708 } else {
709 current = debugger->core->rawRead8(debugger->core, next, *segment);
710 }
711 *segment = -1;
712 break;
713 default:
714 return false;
715 }
716 *value = current;
717 return true;
718}
719
720bool mDebuggerEvaluateParseTree(struct mDebugger* debugger, struct ParseTree* tree, int32_t* value, int* segment) {
721 if (!value) {
722 return false;
723 }
724 int32_t lhs, rhs;
725 switch (tree->token.type) {
726 case TOKEN_UINT_TYPE:
727 if (segment) {
728 *segment = -1;
729 }
730 *value = tree->token.uintValue;
731 return true;
732 case TOKEN_SEGMENT_TYPE:
733 if (!mDebuggerEvaluateParseTree(debugger, tree->rhs, value, segment)) {
734 return false;
735 }
736 return mDebuggerEvaluateParseTree(debugger, tree->lhs, segment, NULL);
737 case TOKEN_OPERATOR_TYPE:
738 switch (tree->token.operatorValue) {
739 case OP_ASSIGN:
740 case OP_ADD:
741 case OP_SUBTRACT:
742 case OP_MULTIPLY:
743 case OP_DIVIDE:
744 case OP_MODULO:
745 case OP_AND:
746 case OP_OR:
747 case OP_XOR:
748 case OP_LESS:
749 case OP_GREATER:
750 case OP_EQUAL:
751 case OP_NOT_EQUAL:
752 case OP_LOGICAL_AND:
753 case OP_LOGICAL_OR:
754 case OP_LE:
755 case OP_GE:
756 case OP_SHIFT_L:
757 case OP_SHIFT_R:
758 if (!mDebuggerEvaluateParseTree(debugger, tree->lhs, &lhs, segment)) {
759 return false;
760 }
761 // Fall through
762 default:
763 if (!mDebuggerEvaluateParseTree(debugger, tree->rhs, &rhs, segment)) {
764 return false;
765 }
766 break;
767 }
768 return _performOperation(debugger, tree->token.operatorValue, lhs, rhs, value, segment);
769 case TOKEN_IDENTIFIER_TYPE:
770 return mDebuggerLookupIdentifier(debugger, tree->token.identifierValue, value, segment);
771 case TOKEN_ERROR_TYPE:
772 default:
773 break;
774 }
775 return false;
776}