all repos — mgba @ d6ac0dc6f526ac111c4e750e282050c034724b76

mGBA Game Boy Advance Emulator

src/debugger/parser.c (view raw)

  1/* Copyright (c) 2013-2014 Jeffrey Pfau
  2 *
  3 * This Source Code Form is subject to the terms of the Mozilla Public
  4 * License, v. 2.0. If a copy of the MPL was not distributed with this
  5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6#include <mgba/internal/debugger/parser.h>
  7
  8#include <mgba/core/core.h>
  9#include <mgba/debugger/debugger.h>
 10#include <mgba-util/string.h>
 11
 12DEFINE_VECTOR(LexVector, struct Token);
 13
 14enum LexState {
 15	LEX_ERROR = -1,
 16	LEX_ROOT = 0,
 17	LEX_EXPECT_IDENTIFIER,
 18	LEX_EXPECT_BINARY_FIRST,
 19	LEX_EXPECT_BINARY,
 20	LEX_EXPECT_DECIMAL,
 21	LEX_EXPECT_HEX_FIRST,
 22	LEX_EXPECT_HEX,
 23	LEX_EXPECT_PREFIX,
 24	LEX_EXPECT_OPERATOR,
 25	LEX_EXPECT_OPERATOR2,
 26};
 27
 28static void _lexOperator(struct LexVector* lv, char operator, enum LexState* state) {
 29	if (*state == LEX_EXPECT_OPERATOR2) {
 30		struct Token* lvNext = LexVectorGetPointer(lv, LexVectorSize(lv) - 1);
 31		if (lvNext->type != TOKEN_OPERATOR_TYPE) {
 32			lvNext->type = TOKEN_ERROR_TYPE;
 33			*state = LEX_ERROR;
 34			return;
 35		}
 36		switch (lvNext->operatorValue) {
 37		case OP_AND:
 38			if (operator == '&') {
 39				lvNext->operatorValue = OP_LOGICAL_AND;
 40				*state = LEX_ROOT;
 41				return;
 42			}
 43			break;
 44		case OP_OR:
 45			if (operator == '|') {
 46				lvNext->operatorValue = OP_LOGICAL_OR;
 47				*state = LEX_ROOT;
 48				return;
 49			}
 50			break;
 51		case OP_LESS:
 52			if (operator == '=') {
 53				lvNext->operatorValue = OP_LE;
 54				*state = LEX_ROOT;
 55				return;
 56			}
 57			if (operator == '<') {
 58				lvNext->operatorValue = OP_SHIFT_L;
 59				*state = LEX_ROOT;
 60				return;
 61			}
 62			break;
 63		case OP_GREATER:
 64			if (operator == '=') {
 65				lvNext->operatorValue = OP_GE;
 66				*state = LEX_ROOT;
 67				return;
 68			}
 69			if (operator == '>') {
 70				lvNext->operatorValue = OP_SHIFT_R;
 71				*state = LEX_ROOT;
 72				return;
 73			}
 74			break;
 75		case OP_ASSIGN:
 76			if (operator == '=') {
 77				lvNext->operatorValue = OP_EQUAL;
 78				*state = LEX_ROOT;
 79				return;
 80			}
 81			break;
 82		case OP_NOT:
 83			if (operator == '=') {
 84				lvNext->operatorValue = OP_NOT_EQUAL;
 85				*state = LEX_ROOT;
 86				return;
 87			}
 88			break;
 89		default:
 90			break;
 91		}
 92		*state = LEX_ERROR;
 93	}
 94	if (*state == LEX_ROOT || *state == LEX_ERROR) {
 95		struct Token* lvNext = LexVectorAppend(lv);
 96		lvNext->type = TOKEN_OPERATOR_TYPE;
 97		*state = LEX_ROOT;
 98		switch (operator) {
 99		case '-':
100			lvNext->operatorValue = OP_NEGATE;
101			break;
102		case '~':
103			lvNext->operatorValue = OP_FLIP;
104			break;
105		case '!':
106			lvNext->operatorValue = OP_NOT;
107			break;
108		case '*':
109			lvNext->operatorValue = OP_DEREFERENCE;
110			break;
111		default:
112			lvNext->type = TOKEN_ERROR_TYPE;
113			*state = LEX_ERROR;
114			break;
115		}
116		return;
117	}
118	struct Token* lvNext = LexVectorAppend(lv);
119	lvNext->type = TOKEN_OPERATOR_TYPE;
120	*state = LEX_EXPECT_OPERATOR2;
121	switch (operator) {
122	case '=':
123		lvNext->operatorValue = OP_ASSIGN;
124		break;
125	case '+':
126		lvNext->operatorValue = OP_ADD;
127		break;
128	case '-':
129		lvNext->operatorValue = OP_SUBTRACT;
130		break;
131	case '*':
132		lvNext->operatorValue = OP_MULTIPLY;
133		break;
134	case '/':
135		lvNext->operatorValue = OP_DIVIDE;
136		break;
137	case '%':
138		lvNext->operatorValue = OP_MODULO;
139		break;
140	case '&':
141		lvNext->operatorValue = OP_AND;
142		break;
143	case '|':
144		lvNext->operatorValue = OP_OR;
145		break;
146	case '^':
147		lvNext->operatorValue = OP_XOR;
148		break;
149	case '<':
150		lvNext->operatorValue = OP_LESS;
151		break;
152	case '>':
153		lvNext->operatorValue = OP_GREATER;
154		break;
155	case '!':
156		lvNext->operatorValue = OP_NOT;
157		break;
158	default:
159		lvNext->type = TOKEN_ERROR_TYPE;
160		break;
161	}
162}
163
164static void _lexValue(struct LexVector* lv, char token, uint32_t next, enum LexState* state) {
165	struct Token* lvNext;
166
167	switch (token) {
168	case '=':
169	case '+':
170	case '-':
171	case '*':
172	case '/':
173	case '%':
174	case '&':
175	case '|':
176	case '^':
177	case '<':
178	case '>':
179	case '!':
180		lvNext = LexVectorAppend(lv);
181		lvNext->type = TOKEN_UINT_TYPE;
182		lvNext->uintValue = next;
183		_lexOperator(lv, token, state);
184		break;
185	case ')':
186		lvNext = LexVectorAppend(lv);
187		lvNext->type = TOKEN_UINT_TYPE;
188		lvNext->uintValue = next;
189		lvNext = LexVectorAppend(lv);
190		lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
191		*state = LEX_EXPECT_OPERATOR;
192		break;
193	case ' ':
194	case '\t':
195		lvNext = LexVectorAppend(lv);
196		lvNext->type = TOKEN_UINT_TYPE;
197		lvNext->uintValue = next;
198		*state = LEX_EXPECT_OPERATOR;
199		break;
200	default:
201		*state = LEX_ERROR;
202		break;
203	}
204}
205
206size_t lexExpression(struct LexVector* lv, const char* string, size_t length, const char* eol) {
207	if (!string || length < 1) {
208		return 0;
209	}
210
211	uint32_t next = 0;
212	size_t adjusted = 0;
213
214	enum LexState state = LEX_ROOT;
215	const char* tokenStart = 0;
216	struct Token* lvNext;
217
218	if (!eol) {
219		eol = " \r\n";
220	}
221
222	while (length > 0 && string[0] && !strchr(eol, string[0]) && state != LEX_ERROR) {
223		char token = string[0];
224		++string;
225		++adjusted;
226		--length;
227		switch (state) {
228		case LEX_EXPECT_OPERATOR2:
229			switch (token) {
230			case '&':
231			case '|':
232			case '=':
233			case '<':
234			case '>':
235				_lexOperator(lv, token, &state);
236				break;
237			}
238			if (state != LEX_EXPECT_OPERATOR2) {
239				break;
240			}
241			// Fall through
242		case LEX_ROOT:
243			tokenStart = string - 1;
244			switch (token) {
245			case '1':
246			case '2':
247			case '3':
248			case '4':
249			case '5':
250			case '6':
251			case '7':
252			case '8':
253			case '9':
254				state = LEX_EXPECT_DECIMAL;
255				next = token - '0';
256				break;
257			case '0':
258				state = LEX_EXPECT_PREFIX;
259				next = 0;
260				break;
261			case '$':
262				state = LEX_EXPECT_HEX_FIRST;
263				next = 0;
264				break;
265			case '%':
266				state = LEX_EXPECT_BINARY_FIRST;
267				next = 0;
268				break;
269			case '(':
270				state = LEX_ROOT;
271				lvNext = LexVectorAppend(lv);
272				lvNext->type = TOKEN_OPEN_PAREN_TYPE;
273				break;
274			case '!':
275			case '-':
276			case '~':
277			case '*':
278				_lexOperator(lv, token, &state);
279				break;
280			case ' ':
281			case '\t':
282				break;
283			default:
284				if (tolower(token) >= 'a' && tolower(token <= 'z')) {
285					state = LEX_EXPECT_IDENTIFIER;
286				} else {
287					state = LEX_ERROR;
288				}
289				break;
290			};
291			break;
292		case LEX_EXPECT_IDENTIFIER:
293			switch (token) {
294			case '=':
295			case '+':
296			case '-':
297			case '*':
298			case '/':
299			case '%':
300			case '&':
301			case '|':
302			case '^':
303			case '<':
304			case '>':
305			case '!':
306				lvNext = LexVectorAppend(lv);
307				lvNext->type = TOKEN_IDENTIFIER_TYPE;
308				lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
309				_lexOperator(lv, token, &state);
310				break;
311			case ')':
312				lvNext = LexVectorAppend(lv);
313				lvNext->type = TOKEN_IDENTIFIER_TYPE;
314				lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
315				lvNext = LexVectorAppend(lv);
316				lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
317				state = LEX_EXPECT_OPERATOR;
318				break;
319			case ' ':
320			case '\t':
321				lvNext = LexVectorAppend(lv);
322				lvNext->type = TOKEN_IDENTIFIER_TYPE;
323				lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
324				state = LEX_EXPECT_OPERATOR;
325				break;
326			default:
327				break;
328			}
329			break;
330		case LEX_EXPECT_BINARY_FIRST:
331			state = LEX_EXPECT_BINARY;
332			// Fall through
333		case LEX_EXPECT_BINARY:
334			switch (token) {
335			case '0':
336			case '1':
337				// TODO: handle overflow
338				next <<= 1;
339				next += token - '0';
340				break;
341			default:
342				_lexValue(lv, token, next, &state);
343				break;
344			}
345			break;
346		case LEX_EXPECT_DECIMAL:
347			switch (token) {
348			case '0':
349			case '1':
350			case '2':
351			case '3':
352			case '4':
353			case '5':
354			case '6':
355			case '7':
356			case '8':
357			case '9':
358				// TODO: handle overflow
359				next *= 10;
360				next += token - '0';
361				break;
362			default:
363				_lexValue(lv, token, next, &state);
364				break;
365			}
366			break;
367		case LEX_EXPECT_HEX_FIRST:
368			state = LEX_EXPECT_HEX;
369			// Fall through
370		case LEX_EXPECT_HEX:
371			switch (token) {
372			case '0':
373			case '1':
374			case '2':
375			case '3':
376			case '4':
377			case '5':
378			case '6':
379			case '7':
380			case '8':
381			case '9':
382				// TODO: handle overflow
383				next *= 16;
384				next += token - '0';
385				break;
386			case 'A':
387			case 'B':
388			case 'C':
389			case 'D':
390			case 'E':
391			case 'F':
392				// TODO: handle overflow
393				next *= 16;
394				next += token - 'A' + 10;
395				break;
396			case 'a':
397			case 'b':
398			case 'c':
399			case 'd':
400			case 'e':
401			case 'f':
402				// TODO: handle overflow
403				next *= 16;
404				next += token - 'a' + 10;
405				break;
406			case ':':
407				lvNext = LexVectorAppend(lv);
408				lvNext->type = TOKEN_SEGMENT_TYPE;
409				lvNext->uintValue = next;
410				next = 0;
411				break;
412			default:
413				_lexValue(lv, token, next, &state);
414				break;
415			}
416			break;
417		case LEX_EXPECT_PREFIX:
418			switch (token) {
419			case 'X':
420			case 'x':
421				next = 0;
422				state = LEX_EXPECT_HEX_FIRST;
423				break;
424			case 'B':
425			case 'b':
426				next = 0;
427				state = LEX_EXPECT_BINARY_FIRST;
428				break;
429			case '0':
430			case '1':
431			case '2':
432			case '3':
433			case '4':
434			case '5':
435			case '6':
436			case '7':
437			case '8':
438			case '9':
439				next = token - '0';
440				state = LEX_EXPECT_DECIMAL;
441				break;
442			default:
443				_lexValue(lv, token, next, &state);
444				break;
445			}
446			break;
447		case LEX_EXPECT_OPERATOR:
448			switch (token) {
449			case '=':
450			case '+':
451			case '-':
452			case '*':
453			case '/':
454			case '%':
455			case '&':
456			case '|':
457			case '^':
458			case '<':
459			case '>':
460			case '!':
461				_lexOperator(lv, token, &state);
462				break;
463			case ')':
464				lvNext = LexVectorAppend(lv);
465				lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
466				break;
467			case ' ':
468			case '\t':
469				break;
470			default:
471				state = LEX_ERROR;
472			}
473			break;
474		case LEX_ERROR:
475			// This shouldn't be reached
476			break;
477		}
478	}
479
480	switch (state) {
481	case LEX_EXPECT_BINARY:
482	case LEX_EXPECT_DECIMAL:
483	case LEX_EXPECT_HEX:
484	case LEX_EXPECT_PREFIX:
485		lvNext = LexVectorAppend(lv);
486		lvNext->type = TOKEN_UINT_TYPE;
487		lvNext->uintValue = next;
488		break;
489	case LEX_EXPECT_IDENTIFIER:
490		lvNext = LexVectorAppend(lv);
491		lvNext->type = TOKEN_IDENTIFIER_TYPE;
492		lvNext->identifierValue = strndup(tokenStart, string - tokenStart);
493		break;
494	case LEX_ROOT:
495	case LEX_EXPECT_OPERATOR:
496	case LEX_EXPECT_OPERATOR2:
497		break;
498	case LEX_EXPECT_BINARY_FIRST:
499	case LEX_EXPECT_HEX_FIRST:
500	case LEX_ERROR:
501	default:
502		lvNext = LexVectorAppend(lv);
503		lvNext->type = TOKEN_ERROR_TYPE;
504		break;
505	}
506	return adjusted;
507}
508
509static const int _operatorPrecedence[] = {
510	[OP_ASSIGN] = 14,
511	[OP_ADD] = 4,
512	[OP_SUBTRACT] = 4,
513	[OP_MULTIPLY] = 3,
514	[OP_DIVIDE] = 3,
515	[OP_MODULO] = 3,
516	[OP_AND] = 8,
517	[OP_OR] = 10,
518	[OP_XOR] = 9,
519	[OP_LESS] = 6,
520	[OP_GREATER] = 6,
521	[OP_EQUAL] = 7,
522	[OP_NOT_EQUAL] = 7,
523	[OP_LE] = 6,
524	[OP_GE] = 6,
525	[OP_LOGICAL_AND] = 11,
526	[OP_LOGICAL_OR] = 12,
527	[OP_NEGATE] = 2,
528	[OP_FLIP] = 2,
529	[OP_NOT] = 2,
530	[OP_SHIFT_L] = 5,
531	[OP_SHIFT_R] = 5,
532	[OP_DEREFERENCE] = 2,
533};
534
535static struct ParseTree* _parseTreeCreate() {
536	struct ParseTree* tree = malloc(sizeof(struct ParseTree));
537	tree->token.type = TOKEN_ERROR_TYPE;
538	tree->rhs = 0;
539	tree->lhs = 0;
540	return tree;
541}
542
543static size_t _parseExpression(struct ParseTree* tree, struct LexVector* lv, size_t i, int precedence, int* openParens) {
544	struct ParseTree* newTree = 0;
545	while (i < LexVectorSize(lv)) {
546		struct Token* token = LexVectorGetPointer(lv, i);
547		int newPrecedence;
548		switch (token->type) {
549		case TOKEN_IDENTIFIER_TYPE:
550		case TOKEN_UINT_TYPE:
551			if (tree->token.type == TOKEN_ERROR_TYPE) {
552				tree->token = *token;
553				if (token->type == TOKEN_IDENTIFIER_TYPE) {
554					tree->token.identifierValue = strdup(token->identifierValue);
555				}
556				++i;
557			} else {
558				tree->token.type = TOKEN_ERROR_TYPE;
559				return i + 1;
560			}
561			break;
562		case TOKEN_SEGMENT_TYPE:
563			tree->lhs = _parseTreeCreate();
564			tree->lhs->token.type = TOKEN_UINT_TYPE;
565			tree->lhs->token.uintValue = token->uintValue;
566			tree->rhs = _parseTreeCreate();
567			tree->token.type = TOKEN_SEGMENT_TYPE;
568			i = _parseExpression(tree->rhs, lv, i + 1, precedence, openParens);
569			if (tree->token.type == TOKEN_ERROR_TYPE) {
570				tree->token.type = TOKEN_ERROR_TYPE;
571			}
572			break;
573		case TOKEN_OPEN_PAREN_TYPE:
574			++*openParens;
575			i = _parseExpression(tree, lv, i + 1, INT_MAX, openParens);
576			break;
577		case TOKEN_CLOSE_PAREN_TYPE:
578			if (*openParens <= 0) {
579				tree->token.type = TOKEN_ERROR_TYPE;
580			}
581			--*openParens;
582			return i + 1;
583		case TOKEN_OPERATOR_TYPE:
584			newPrecedence = _operatorPrecedence[token->operatorValue];
585			if (newPrecedence < precedence) {
586				newTree = _parseTreeCreate();
587				*newTree = *tree;
588				tree->lhs = newTree;
589				tree->rhs = _parseTreeCreate();
590				tree->token = *token;
591				i = _parseExpression(tree->rhs, lv, i + 1, newPrecedence, openParens);
592				if (tree->token.type == TOKEN_ERROR_TYPE) {
593					tree->token.type = TOKEN_ERROR_TYPE;
594				}
595			} else {
596				return i;
597			}
598			break;
599		case TOKEN_ERROR_TYPE:
600			tree->token.type = TOKEN_ERROR_TYPE;
601			return i + 1;
602		}
603	}
604
605	return i;
606}
607
608void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
609	if (!tree) {
610		return;
611	}
612
613	tree->token.type = TOKEN_ERROR_TYPE;
614	tree->lhs = 0;
615	tree->rhs = 0;
616
617	int openParens = 0;
618	_parseExpression(tree, lv, 0, INT_MAX, &openParens);
619	if (openParens) {
620		if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
621			free(tree->token.identifierValue);
622		}
623		tree->token.type = TOKEN_ERROR_TYPE;
624	}
625}
626
627void lexFree(struct LexVector* lv) {
628	size_t i;
629	for (i = 0; i < LexVectorSize(lv); ++i) {
630		struct Token* token = LexVectorGetPointer(lv, i);
631		if (token->type == TOKEN_IDENTIFIER_TYPE) {
632			free(token->identifierValue);
633		}
634	}
635}
636
637void parseFree(struct ParseTree* tree) {
638	if (!tree) {
639		return;
640	}
641
642	if (tree->lhs) {
643		parseFree(tree->lhs);
644		free(tree->lhs);
645	}
646	if (tree->rhs) {
647		parseFree(tree->rhs);
648		free(tree->rhs);
649	}
650
651	if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
652		free(tree->token.identifierValue);
653	}
654}
655
656static bool _performOperation(struct mDebugger* debugger, enum Operation operation, int32_t current, int32_t next, int32_t* value, int* segment) {
657	switch (operation) {
658	case OP_ASSIGN:
659		current = next;
660		break;
661	case OP_ADD:
662		current += next;
663		break;
664	case OP_SUBTRACT:
665		current -= next;
666		break;
667	case OP_MULTIPLY:
668		current *= next;
669		break;
670	case OP_DIVIDE:
671		if (next != 0) {
672			current /= next;
673		} else {
674			return false;
675		}
676		break;
677	case OP_MODULO:
678		if (next != 0) {
679			current %= next;
680		} else {
681			return false;
682		}
683		break;
684	case OP_AND:
685		current &= next;
686		break;
687	case OP_OR:
688		current |= next;
689		break;
690	case OP_XOR:
691		current ^= next;
692		break;
693	case OP_LESS:
694		current = current < next;
695		break;
696	case OP_GREATER:
697		current = current > next;
698		break;
699	case OP_EQUAL:
700		current = current == next;
701		break;
702	case OP_NOT_EQUAL:
703		current = current != next;
704		break;
705	case OP_LOGICAL_AND:
706		current = current && next;
707		break;
708	case OP_LOGICAL_OR:
709		current = current || next;
710		break;
711	case OP_LE:
712		current = current <= next;
713		break;
714	case OP_GE:
715		current = current >= next;
716		break;
717	case OP_NEGATE:
718		current = -next;
719		break;
720	case OP_FLIP:
721		current = ~next;
722		break;
723	case OP_NOT:
724		current = !next;
725		break;
726	case OP_SHIFT_L:
727		current <<= next;
728		break;
729	case OP_SHIFT_R:
730		current >>= next;
731		break;
732	case OP_DEREFERENCE:
733		if (*segment < 0) {
734			current = debugger->core->busRead8(debugger->core, next);
735		} else {
736			current = debugger->core->rawRead8(debugger->core, next, *segment);
737		}
738		*segment = -1;
739		break;
740	default:
741		return false;
742	}
743	*value = current;
744	return true;
745}
746
747bool mDebuggerEvaluateParseTree(struct mDebugger* debugger, struct ParseTree* tree, int32_t* value, int* segment) {
748	if (!value) {
749		return false;
750	}
751	int32_t lhs, rhs;
752	switch (tree->token.type) {
753	case TOKEN_UINT_TYPE:
754		if (segment) {
755			*segment = -1;
756		}
757		*value = tree->token.uintValue;
758		return true;
759	case TOKEN_SEGMENT_TYPE:
760		if (!mDebuggerEvaluateParseTree(debugger, tree->rhs, value, segment)) {
761			return false;
762		}
763		return mDebuggerEvaluateParseTree(debugger, tree->lhs, segment, NULL);
764	case TOKEN_OPERATOR_TYPE:
765		switch (tree->token.operatorValue) {
766		case OP_ASSIGN:
767		case OP_ADD:
768		case OP_SUBTRACT:
769		case OP_MULTIPLY:
770		case OP_DIVIDE:
771		case OP_MODULO:
772		case OP_AND:
773		case OP_OR:
774		case OP_XOR:
775		case OP_LESS:
776		case OP_GREATER:
777		case OP_EQUAL:
778		case OP_NOT_EQUAL:
779		case OP_LOGICAL_AND:
780		case OP_LOGICAL_OR:
781		case OP_LE:
782		case OP_GE:
783		case OP_SHIFT_L:
784		case OP_SHIFT_R:
785			if (!mDebuggerEvaluateParseTree(debugger, tree->lhs, &lhs, segment)) {
786				return false;
787			}
788			// Fall through
789		default:
790			if (!mDebuggerEvaluateParseTree(debugger, tree->rhs, &rhs, segment)) {
791				return false;
792			}
793			break;
794		}
795		return _performOperation(debugger, tree->token.operatorValue, lhs, rhs, value, segment);
796	case TOKEN_IDENTIFIER_TYPE:
797		return mDebuggerLookupIdentifier(debugger, tree->token.identifierValue, value, segment);
798	case TOKEN_ERROR_TYPE:
799	default:
800		break;
801	}
802	return false;
803}