all repos — mgba @ f6755a6e1b7b0cf2b944cd8ca842746f11d6bf82

mGBA Game Boy Advance Emulator

src/debugger/parser.c (view raw)

  1/* Copyright (c) 2013-2014 Jeffrey Pfau
  2 *
  3 * This Source Code Form is subject to the terms of the Mozilla Public
  4 * License, v. 2.0. If a copy of the MPL was not distributed with this
  5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6#include <mgba/internal/debugger/parser.h>
  7
  8#include <mgba/core/core.h>
  9#include <mgba/debugger/debugger.h>
 10#include <mgba-util/string.h>
 11
 12DEFINE_VECTOR(LexVector, struct Token);
 13
 14enum LexState {
 15	LEX_ERROR = -1,
 16	LEX_ROOT = 0,
 17	LEX_EXPECT_IDENTIFIER,
 18	LEX_EXPECT_BINARY_FIRST,
 19	LEX_EXPECT_BINARY,
 20	LEX_EXPECT_DECIMAL,
 21	LEX_EXPECT_HEX_FIRST,
 22	LEX_EXPECT_HEX,
 23	LEX_EXPECT_PREFIX,
 24	LEX_EXPECT_OPERATOR2,
 25};
 26
 27static void _lexOperator(struct LexVector* lv, char operator, enum LexState* state) {
 28	if (*state == LEX_EXPECT_OPERATOR2) {
 29		struct Token* lvNext = LexVectorGetPointer(lv, LexVectorSize(lv) - 1);
 30		if (lvNext->type != TOKEN_OPERATOR_TYPE) {
 31			lvNext->type = TOKEN_ERROR_TYPE;
 32			*state = LEX_ERROR;
 33			return;
 34		}
 35		switch (lvNext->operatorValue) {
 36		case OP_AND:
 37			if (operator == '&') {
 38				lvNext->operatorValue = OP_LOGICAL_AND;
 39				*state = LEX_ROOT;
 40				return;
 41			}
 42			break;
 43		case OP_OR:
 44			if (operator == '|') {
 45				lvNext->operatorValue = OP_LOGICAL_OR;
 46				*state = LEX_ROOT;
 47				return;
 48			}
 49			break;
 50		case OP_LESS:
 51			if (operator == '=') {
 52				lvNext->operatorValue = OP_LE;
 53				*state = LEX_ROOT;
 54				return;
 55			}
 56			if (operator == '<') {
 57				lvNext->operatorValue = OP_SHIFT_L;
 58				*state = LEX_ROOT;
 59				return;
 60			}
 61			break;
 62		case OP_GREATER:
 63			if (operator == '=') {
 64				lvNext->operatorValue = OP_GE;
 65				*state = LEX_ROOT;
 66				return;
 67			}
 68			if (operator == '>') {
 69				lvNext->operatorValue = OP_SHIFT_R;
 70				*state = LEX_ROOT;
 71				return;
 72			}
 73			break;
 74		case OP_ASSIGN:
 75			if (operator == '=') {
 76				lvNext->operatorValue = OP_EQUAL;
 77				*state = LEX_ROOT;
 78				return;
 79			}
 80			break;
 81		case OP_NOT:
 82			if (operator == '=') {
 83				lvNext->operatorValue = OP_NOT_EQUAL;
 84				*state = LEX_ROOT;
 85				return;
 86			}
 87			break;
 88		default:
 89			break;
 90		}
 91		*state = LEX_ERROR;
 92	}
 93	struct Token lvNext;
 94	lvNext.type = TOKEN_OPERATOR_TYPE;
 95	switch (operator) {
 96	case '=':
 97		lvNext.operatorValue = OP_ASSIGN;
 98		break;
 99	case '+':
100		lvNext.operatorValue = OP_ADD;
101		break;
102	case '-':
103		lvNext.operatorValue = OP_SUBTRACT;
104		break;
105	case '*':
106		lvNext.operatorValue = OP_MULTIPLY;
107		break;
108	case '/':
109		lvNext.operatorValue = OP_DIVIDE;
110		break;
111	case '%':
112		lvNext.operatorValue = OP_MODULO;
113		break;
114	case '&':
115		lvNext.operatorValue = OP_AND;
116		break;
117	case '|':
118		lvNext.operatorValue = OP_OR;
119		break;
120	case '^':
121		lvNext.operatorValue = OP_XOR;
122		break;
123	case '<':
124		lvNext.operatorValue = OP_LESS;
125		break;
126	case '>':
127		lvNext.operatorValue = OP_GREATER;
128		break;
129	case '!':
130		lvNext.operatorValue = OP_NOT;
131		break;
132	case '~':
133		lvNext.operatorValue = OP_FLIP;
134		break;
135	default:
136		*state = LEX_ERROR;
137		return;
138	}
139	*state = LEX_EXPECT_OPERATOR2;
140	*LexVectorAppend(lv) = lvNext;
141}
142
143static void _lexValue(struct LexVector* lv, char token, uint32_t next, enum LexState* state) {
144	struct Token* lvNext;
145
146	switch (token) {
147	case '=':
148	case '+':
149	case '-':
150	case '*':
151	case '/':
152	case '%':
153	case '&':
154	case '|':
155	case '^':
156	case '<':
157	case '>':
158	case '!':
159		lvNext = LexVectorAppend(lv);
160		lvNext->type = TOKEN_UINT_TYPE;
161		lvNext->uintValue = next;
162		_lexOperator(lv, token, state);
163		break;
164	case ')':
165		lvNext = LexVectorAppend(lv);
166		lvNext->type = TOKEN_UINT_TYPE;
167		lvNext->uintValue = next;
168		lvNext = LexVectorAppend(lv);
169		lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
170		*state = LEX_ROOT;
171		break;
172	case ' ':
173	case '\t':
174		lvNext = LexVectorAppend(lv);
175		lvNext->type = TOKEN_UINT_TYPE;
176		lvNext->uintValue = next;
177		*state = LEX_ROOT;
178		break;
179	default:
180		*state = LEX_ERROR;
181		break;
182	}
183}
184
185size_t lexExpression(struct LexVector* lv, const char* string, size_t length, const char* eol) {
186	if (!string || length < 1) {
187		return 0;
188	}
189
190	uint32_t next = 0;
191	size_t adjusted = 0;
192
193	enum LexState state = LEX_ROOT;
194	const char* tokenStart = 0;
195	struct Token* lvNext;
196
197	if (!eol) {
198		eol = " \r\n";
199	}
200
201	while (length > 0 && string[0] && !strchr(eol, string[0]) && state != LEX_ERROR) {
202		char token = string[0];
203		++string;
204		++adjusted;
205		--length;
206		switch (state) {
207		case LEX_EXPECT_OPERATOR2:
208			switch (token) {
209			case '&':
210			case '|':
211			case '=':
212			case '<':
213			case '>':
214				_lexOperator(lv, token, &state);
215				break;
216			}
217			if (state != LEX_EXPECT_OPERATOR2) {
218				break;
219			}
220			// Fall through
221		case LEX_ROOT:
222			tokenStart = string - 1;
223			switch (token) {
224			case '1':
225			case '2':
226			case '3':
227			case '4':
228			case '5':
229			case '6':
230			case '7':
231			case '8':
232			case '9':
233				state = LEX_EXPECT_DECIMAL;
234				next = token - '0';
235				break;
236			case '0':
237				state = LEX_EXPECT_PREFIX;
238				next = 0;
239				break;
240			case '$':
241				state = LEX_EXPECT_HEX_FIRST;
242				next = 0;
243				break;
244			case '(':
245				state = LEX_ROOT;
246				lvNext = LexVectorAppend(lv);
247				lvNext->type = TOKEN_OPEN_PAREN_TYPE;
248				break;
249			case '=':
250			case '+':
251			case '-':
252			case '*':
253			case '/':
254			case '%':
255			case '&':
256			case '|':
257			case '^':
258			case '<':
259			case '>':
260			case '!':
261			case '~':
262				_lexOperator(lv, token, &state);
263				break;
264			case ')':
265				lvNext = LexVectorAppend(lv);
266				lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
267				break;
268			case ' ':
269			case '\t':
270				break;
271			default:
272				if (tolower(token) >= 'a' && tolower(token <= 'z')) {
273					state = LEX_EXPECT_IDENTIFIER;
274				} else {
275					state = LEX_ERROR;
276				}
277				break;
278			};
279			break;
280		case LEX_EXPECT_IDENTIFIER:
281			switch (token) {
282			case '=':
283			case '+':
284			case '-':
285			case '*':
286			case '/':
287			case '%':
288			case '&':
289			case '|':
290			case '^':
291			case '<':
292			case '>':
293			case '!':
294			case '~':
295				lvNext = LexVectorAppend(lv);
296				lvNext->type = TOKEN_IDENTIFIER_TYPE;
297				lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
298				_lexOperator(lv, token, &state);
299				break;
300			case ')':
301				lvNext = LexVectorAppend(lv);
302				lvNext->type = TOKEN_IDENTIFIER_TYPE;
303				lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
304				lvNext = LexVectorAppend(lv);
305				lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
306				state = LEX_ROOT;
307				break;
308			case ' ':
309			case '\t':
310				lvNext = LexVectorAppend(lv);
311				lvNext->type = TOKEN_IDENTIFIER_TYPE;
312				lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
313				state = LEX_ROOT;
314				break;
315			default:
316				break;
317			}
318			break;
319		case LEX_EXPECT_BINARY_FIRST:
320			state = LEX_EXPECT_BINARY;
321			// Fall through
322		case LEX_EXPECT_BINARY:
323			switch (token) {
324			case '0':
325			case '1':
326				// TODO: handle overflow
327				next <<= 1;
328				next += token - '0';
329				break;
330			default:
331				_lexValue(lv, token, next, &state);
332				break;
333			}
334			break;
335		case LEX_EXPECT_DECIMAL:
336			switch (token) {
337			case '0':
338			case '1':
339			case '2':
340			case '3':
341			case '4':
342			case '5':
343			case '6':
344			case '7':
345			case '8':
346			case '9':
347				// TODO: handle overflow
348				next *= 10;
349				next += token - '0';
350				break;
351			default:
352				_lexValue(lv, token, next, &state);
353				break;
354			}
355			break;
356		case LEX_EXPECT_HEX_FIRST:
357			state = LEX_EXPECT_HEX;
358			// Fall through
359		case LEX_EXPECT_HEX:
360			switch (token) {
361			case '0':
362			case '1':
363			case '2':
364			case '3':
365			case '4':
366			case '5':
367			case '6':
368			case '7':
369			case '8':
370			case '9':
371				// TODO: handle overflow
372				next *= 16;
373				next += token - '0';
374				break;
375			case 'A':
376			case 'B':
377			case 'C':
378			case 'D':
379			case 'E':
380			case 'F':
381				// TODO: handle overflow
382				next *= 16;
383				next += token - 'A' + 10;
384				break;
385			case 'a':
386			case 'b':
387			case 'c':
388			case 'd':
389			case 'e':
390			case 'f':
391				// TODO: handle overflow
392				next *= 16;
393				next += token - 'a' + 10;
394				break;
395			case ':':
396				lvNext = LexVectorAppend(lv);
397				lvNext->type = TOKEN_SEGMENT_TYPE;
398				lvNext->uintValue = next;
399				next = 0;
400				break;
401			default:
402				_lexValue(lv, token, next, &state);
403				break;
404			}
405			break;
406		case LEX_EXPECT_PREFIX:
407			switch (token) {
408			case 'X':
409			case 'x':
410				next = 0;
411				state = LEX_EXPECT_HEX_FIRST;
412				break;
413			case 'B':
414			case 'b':
415				next = 0;
416				state = LEX_EXPECT_BINARY_FIRST;
417				break;
418			case '0':
419			case '1':
420			case '2':
421			case '3':
422			case '4':
423			case '5':
424			case '6':
425			case '7':
426			case '8':
427			case '9':
428				next = token - '0';
429				state = LEX_EXPECT_DECIMAL;
430				break;
431			default:
432				_lexValue(lv, token, next, &state);
433				break;
434			}
435			break;
436		case LEX_ERROR:
437			// This shouldn't be reached
438			break;
439		}
440	}
441
442	switch (state) {
443	case LEX_EXPECT_BINARY:
444	case LEX_EXPECT_DECIMAL:
445	case LEX_EXPECT_HEX:
446	case LEX_EXPECT_PREFIX:
447		lvNext = LexVectorAppend(lv);
448		lvNext->type = TOKEN_UINT_TYPE;
449		lvNext->uintValue = next;
450		break;
451	case LEX_EXPECT_IDENTIFIER:
452		lvNext = LexVectorAppend(lv);
453		lvNext->type = TOKEN_IDENTIFIER_TYPE;
454		lvNext->identifierValue = strndup(tokenStart, string - tokenStart);
455		break;
456	case LEX_ROOT:
457	case LEX_EXPECT_OPERATOR2:
458		break;
459	case LEX_EXPECT_BINARY_FIRST:
460	case LEX_EXPECT_HEX_FIRST:
461	case LEX_ERROR:
462	default:
463		lvNext = LexVectorAppend(lv);
464		lvNext->type = TOKEN_ERROR_TYPE;
465		break;
466	}
467	return adjusted;
468}
469
470static const int _operatorPrecedence[] = {
471	[OP_ASSIGN] = 14,
472	[OP_ADD] = 4,
473	[OP_SUBTRACT] = 4,
474	[OP_MULTIPLY] = 3,
475	[OP_DIVIDE] = 3,
476	[OP_MODULO] = 3,
477	[OP_AND] = 8,
478	[OP_OR] = 10,
479	[OP_XOR] = 9,
480	[OP_LESS] = 6,
481	[OP_GREATER] = 6,
482	[OP_EQUAL] = 7,
483	[OP_NOT_EQUAL] = 7,
484	[OP_LE] = 6,
485	[OP_GE] = 6,
486	[OP_LOGICAL_AND] = 11,
487	[OP_LOGICAL_OR] = 12,
488	[OP_NEGATE] = 2,
489	[OP_FLIP] = 2,
490	[OP_NOT] = 2,
491	[OP_SHIFT_L] = 5,
492	[OP_SHIFT_R] = 5,
493	[OP_DEREFERENCE] = 2,
494};
495
496static struct ParseTree* _parseTreeCreate() {
497	struct ParseTree* tree = malloc(sizeof(struct ParseTree));
498	tree->token.type = TOKEN_ERROR_TYPE;
499	tree->rhs = 0;
500	tree->lhs = 0;
501	return tree;
502}
503
504static size_t _parseExpression(struct ParseTree* tree, struct LexVector* lv, size_t i, int precedence, int* openParens) {
505	struct ParseTree* newTree = 0;
506	while (i < LexVectorSize(lv)) {
507		struct Token* token = LexVectorGetPointer(lv, i);
508		int newPrecedence;
509		switch (token->type) {
510		case TOKEN_IDENTIFIER_TYPE:
511		case TOKEN_UINT_TYPE:
512			if (tree->token.type == TOKEN_ERROR_TYPE) {
513				tree->token = *token;
514				if (token->type == TOKEN_IDENTIFIER_TYPE) {
515					tree->token.identifierValue = strdup(token->identifierValue);
516				}
517				++i;
518			} else {
519				tree->token.type = TOKEN_ERROR_TYPE;
520				return i + 1;
521			}
522			break;
523		case TOKEN_SEGMENT_TYPE:
524			tree->lhs = _parseTreeCreate();
525			tree->lhs->token.type = TOKEN_UINT_TYPE;
526			tree->lhs->token.uintValue = token->uintValue;
527			tree->rhs = _parseTreeCreate();
528			tree->token.type = TOKEN_SEGMENT_TYPE;
529			i = _parseExpression(tree->rhs, lv, i + 1, precedence, openParens);
530			break;
531		case TOKEN_OPEN_PAREN_TYPE:
532			++*openParens;
533			i = _parseExpression(tree, lv, i + 1, INT_MAX, openParens);
534			break;
535		case TOKEN_CLOSE_PAREN_TYPE:
536			if (*openParens <= 0) {
537				tree->token.type = TOKEN_ERROR_TYPE;
538			}
539			--*openParens;
540			return i + 1;
541		case TOKEN_OPERATOR_TYPE:
542			if (tree->token.type == TOKEN_ERROR_TYPE) {
543				switch (token->operatorValue) {
544				case OP_SUBTRACT:
545					token->operatorValue = OP_NEGATE;
546					break;
547				case OP_MULTIPLY:
548					token->operatorValue = OP_DEREFERENCE;
549					break;
550				case OP_NOT:
551				case OP_FLIP:
552					break;
553				default:
554					break;
555				}
556			}
557			newPrecedence = _operatorPrecedence[token->operatorValue];
558			if (newPrecedence < precedence) {
559				newTree = _parseTreeCreate();
560				*newTree = *tree;
561				tree->lhs = newTree;
562				tree->rhs = _parseTreeCreate();
563				tree->token = *token;
564				i = _parseExpression(tree->rhs, lv, i + 1, newPrecedence, openParens);
565				if (tree->token.type == TOKEN_ERROR_TYPE) {
566					tree->token.type = TOKEN_ERROR_TYPE;
567				}
568			} else {
569				return i;
570			}
571			break;
572		case TOKEN_ERROR_TYPE:
573			tree->token.type = TOKEN_ERROR_TYPE;
574			return i + 1;
575		}
576	}
577
578	return i;
579}
580
581void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
582	if (!tree) {
583		return;
584	}
585
586	tree->token.type = TOKEN_ERROR_TYPE;
587	tree->lhs = 0;
588	tree->rhs = 0;
589
590	int openParens = 0;
591	_parseExpression(tree, lv, 0, INT_MAX, &openParens);
592	if (openParens) {
593		if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
594			free(tree->token.identifierValue);
595		}
596		tree->token.type = TOKEN_ERROR_TYPE;
597	}
598}
599
600void lexFree(struct LexVector* lv) {
601	size_t i;
602	for (i = 0; i < LexVectorSize(lv); ++i) {
603		struct Token* token = LexVectorGetPointer(lv, i);
604		if (token->type == TOKEN_IDENTIFIER_TYPE) {
605			free(token->identifierValue);
606		}
607	}
608}
609
610void parseFree(struct ParseTree* tree) {
611	if (!tree) {
612		return;
613	}
614
615	if (tree->lhs) {
616		parseFree(tree->lhs);
617		free(tree->lhs);
618	}
619	if (tree->rhs) {
620		parseFree(tree->rhs);
621		free(tree->rhs);
622	}
623
624	if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
625		free(tree->token.identifierValue);
626	}
627}
628
629static bool _performOperation(struct mDebugger* debugger, enum Operation operation, int32_t current, int32_t next, int32_t* value, int* segment) {
630	switch (operation) {
631	case OP_ASSIGN:
632		current = next;
633		break;
634	case OP_ADD:
635		current += next;
636		break;
637	case OP_SUBTRACT:
638		current -= next;
639		break;
640	case OP_MULTIPLY:
641		current *= next;
642		break;
643	case OP_DIVIDE:
644		if (next != 0) {
645			current /= next;
646		} else {
647			return false;
648		}
649		break;
650	case OP_MODULO:
651		if (next != 0) {
652			current %= next;
653		} else {
654			return false;
655		}
656		break;
657	case OP_AND:
658		current &= next;
659		break;
660	case OP_OR:
661		current |= next;
662		break;
663	case OP_XOR:
664		current ^= next;
665		break;
666	case OP_LESS:
667		current = current < next;
668		break;
669	case OP_GREATER:
670		current = current > next;
671		break;
672	case OP_EQUAL:
673		current = current == next;
674		break;
675	case OP_NOT_EQUAL:
676		current = current != next;
677		break;
678	case OP_LOGICAL_AND:
679		current = current && next;
680		break;
681	case OP_LOGICAL_OR:
682		current = current || next;
683		break;
684	case OP_LE:
685		current = current <= next;
686		break;
687	case OP_GE:
688		current = current >= next;
689		break;
690	case OP_NEGATE:
691		current = -next;
692		break;
693	case OP_FLIP:
694		current = ~next;
695		break;
696	case OP_NOT:
697		current = !next;
698		break;
699	case OP_SHIFT_L:
700		current <<= next;
701		break;
702	case OP_SHIFT_R:
703		current >>= next;
704		break;
705	case OP_DEREFERENCE:
706		if (*segment < 0) {
707			current = debugger->core->busRead8(debugger->core, next);
708		} else {
709			current = debugger->core->rawRead8(debugger->core, next, *segment);
710		}
711		*segment = -1;
712		break;
713	default:
714		return false;
715	}
716	*value = current;
717	return true;
718}
719
720bool mDebuggerEvaluateParseTree(struct mDebugger* debugger, struct ParseTree* tree, int32_t* value, int* segment) {
721	if (!value) {
722		return false;
723	}
724	int32_t lhs, rhs;
725	switch (tree->token.type) {
726	case TOKEN_UINT_TYPE:
727		if (segment) {
728			*segment = -1;
729		}
730		*value = tree->token.uintValue;
731		return true;
732	case TOKEN_SEGMENT_TYPE:
733		if (!mDebuggerEvaluateParseTree(debugger, tree->rhs, value, segment)) {
734			return false;
735		}
736		return mDebuggerEvaluateParseTree(debugger, tree->lhs, segment, NULL);
737	case TOKEN_OPERATOR_TYPE:
738		switch (tree->token.operatorValue) {
739		case OP_ASSIGN:
740		case OP_ADD:
741		case OP_SUBTRACT:
742		case OP_MULTIPLY:
743		case OP_DIVIDE:
744		case OP_MODULO:
745		case OP_AND:
746		case OP_OR:
747		case OP_XOR:
748		case OP_LESS:
749		case OP_GREATER:
750		case OP_EQUAL:
751		case OP_NOT_EQUAL:
752		case OP_LOGICAL_AND:
753		case OP_LOGICAL_OR:
754		case OP_LE:
755		case OP_GE:
756		case OP_SHIFT_L:
757		case OP_SHIFT_R:
758			if (!mDebuggerEvaluateParseTree(debugger, tree->lhs, &lhs, segment)) {
759				return false;
760			}
761			// Fall through
762		default:
763			if (!mDebuggerEvaluateParseTree(debugger, tree->rhs, &rhs, segment)) {
764				return false;
765			}
766			break;
767		}
768		return _performOperation(debugger, tree->token.operatorValue, lhs, rhs, value, segment);
769	case TOKEN_IDENTIFIER_TYPE:
770		return mDebuggerLookupIdentifier(debugger, tree->token.identifierValue, value, segment);
771	case TOKEN_ERROR_TYPE:
772	default:
773		break;
774	}
775	return false;
776}