all repos — mgba @ 5d129e26bf53c68261b217ac9b28950e148915c7

mGBA Game Boy Advance Emulator

src/debugger/parser.c (view raw)

  1/* Copyright (c) 2013-2014 Jeffrey Pfau
  2 *
  3 * This Source Code Form is subject to the terms of the Mozilla Public
  4 * License, v. 2.0. If a copy of the MPL was not distributed with this
  5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6#include <mgba/internal/debugger/parser.h>
  7
  8#include <mgba/core/core.h>
  9#include <mgba/debugger/debugger.h>
 10#include <mgba-util/string.h>
 11
 12DEFINE_VECTOR(LexVector, struct Token);
 13
 14enum LexState {
 15	LEX_ERROR = -1,
 16	LEX_ROOT = 0,
 17	LEX_EXPECT_IDENTIFIER,
 18	LEX_EXPECT_BINARY_FIRST,
 19	LEX_EXPECT_BINARY,
 20	LEX_EXPECT_DECIMAL,
 21	LEX_EXPECT_HEX_FIRST,
 22	LEX_EXPECT_HEX,
 23	LEX_EXPECT_PREFIX,
 24	LEX_EXPECT_OPERATOR,
 25	LEX_EXPECT_OPERATOR2,
 26};
 27
 28static void _lexOperator(struct LexVector* lv, char operator, enum LexState* state) {
 29	if (*state == LEX_EXPECT_OPERATOR2) {
 30		struct Token* lvNext = LexVectorGetPointer(lv, LexVectorSize(lv) - 1);
 31		if (lvNext->type != TOKEN_OPERATOR_TYPE) {
 32			lvNext->type = TOKEN_ERROR_TYPE;
 33			*state = LEX_ERROR;
 34			return;
 35		}
 36		switch (lvNext->operatorValue) {
 37		case OP_AND:
 38			if (operator == '&') {
 39				lvNext->operatorValue = OP_LOGICAL_AND;
 40				*state = LEX_ROOT;
 41				return;
 42			}
 43			break;
 44		case OP_OR:
 45			if (operator == '|') {
 46				lvNext->operatorValue = OP_LOGICAL_OR;
 47				*state = LEX_ROOT;
 48				return;
 49			}
 50			break;
 51		case OP_LESS:
 52			if (operator == '=') {
 53				lvNext->operatorValue = OP_LE;
 54				*state = LEX_ROOT;
 55				return;
 56			}
 57			if (operator == '<') {
 58				lvNext->operatorValue = OP_SHIFT_L;
 59				*state = LEX_ROOT;
 60				return;
 61			}
 62			break;
 63		case OP_GREATER:
 64			if (operator == '=') {
 65				lvNext->operatorValue = OP_GE;
 66				*state = LEX_ROOT;
 67				return;
 68			}
 69			if (operator == '>') {
 70				lvNext->operatorValue = OP_SHIFT_R;
 71				*state = LEX_ROOT;
 72				return;
 73			}
 74			break;
 75		case OP_ASSIGN:
 76			if (operator == '=') {
 77				lvNext->operatorValue = OP_EQUAL;
 78				*state = LEX_ROOT;
 79				return;
 80			}
 81			break;
 82		case OP_NOT:
 83			if (operator == '=') {
 84				lvNext->operatorValue = OP_NOT_EQUAL;
 85				*state = LEX_ROOT;
 86				return;
 87			}
 88			break;
 89		default:
 90			break;
 91		}
 92		*state = LEX_ERROR;
 93	}
 94	if (*state == LEX_ROOT || *state == LEX_ERROR) {
 95		struct Token lvNext;
 96		lvNext.type = TOKEN_OPERATOR_TYPE;
 97		*state = LEX_ROOT;
 98		switch (operator) {
 99		case '-':
100			lvNext.operatorValue = OP_NEGATE;
101			break;
102		case '~':
103			lvNext.operatorValue = OP_FLIP;
104			break;
105		case '!':
106			lvNext.operatorValue = OP_NOT;
107			break;
108		case '*':
109			lvNext.operatorValue = OP_DEREFERENCE;
110			break;
111		default:
112			lvNext.type = TOKEN_ERROR_TYPE;
113			*state = LEX_ERROR;
114			return;
115		}
116		*LexVectorAppend(lv) = lvNext;
117		return;
118	}
119	struct Token lvNext;
120	lvNext.type = TOKEN_OPERATOR_TYPE;
121	switch (operator) {
122	case '=':
123		lvNext.operatorValue = OP_ASSIGN;
124		break;
125	case '+':
126		lvNext.operatorValue = OP_ADD;
127		break;
128	case '-':
129		lvNext.operatorValue = OP_SUBTRACT;
130		break;
131	case '*':
132		lvNext.operatorValue = OP_MULTIPLY;
133		break;
134	case '/':
135		lvNext.operatorValue = OP_DIVIDE;
136		break;
137	case '%':
138		lvNext.operatorValue = OP_MODULO;
139		break;
140	case '&':
141		lvNext.operatorValue = OP_AND;
142		break;
143	case '|':
144		lvNext.operatorValue = OP_OR;
145		break;
146	case '^':
147		lvNext.operatorValue = OP_XOR;
148		break;
149	case '<':
150		lvNext.operatorValue = OP_LESS;
151		break;
152	case '>':
153		lvNext.operatorValue = OP_GREATER;
154		break;
155	case '!':
156		lvNext.operatorValue = OP_NOT;
157		break;
158	default:
159		*state = LEX_ERROR;
160		return;
161	}
162	*state = LEX_EXPECT_OPERATOR2;
163	*LexVectorAppend(lv) = lvNext;
164}
165
166static void _lexValue(struct LexVector* lv, char token, uint32_t next, enum LexState* state) {
167	struct Token* lvNext;
168
169	switch (token) {
170	case '=':
171	case '+':
172	case '-':
173	case '*':
174	case '/':
175	case '%':
176	case '&':
177	case '|':
178	case '^':
179	case '<':
180	case '>':
181	case '!':
182		lvNext = LexVectorAppend(lv);
183		lvNext->type = TOKEN_UINT_TYPE;
184		lvNext->uintValue = next;
185		_lexOperator(lv, token, state);
186		break;
187	case ')':
188		lvNext = LexVectorAppend(lv);
189		lvNext->type = TOKEN_UINT_TYPE;
190		lvNext->uintValue = next;
191		lvNext = LexVectorAppend(lv);
192		lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
193		*state = LEX_EXPECT_OPERATOR;
194		break;
195	case ' ':
196	case '\t':
197		lvNext = LexVectorAppend(lv);
198		lvNext->type = TOKEN_UINT_TYPE;
199		lvNext->uintValue = next;
200		*state = LEX_EXPECT_OPERATOR;
201		break;
202	default:
203		*state = LEX_ERROR;
204		break;
205	}
206}
207
208size_t lexExpression(struct LexVector* lv, const char* string, size_t length, const char* eol) {
209	if (!string || length < 1) {
210		return 0;
211	}
212
213	uint32_t next = 0;
214	size_t adjusted = 0;
215
216	enum LexState state = LEX_ROOT;
217	const char* tokenStart = 0;
218	struct Token* lvNext;
219
220	if (!eol) {
221		eol = " \r\n";
222	}
223
224	while (length > 0 && string[0] && !strchr(eol, string[0]) && state != LEX_ERROR) {
225		char token = string[0];
226		++string;
227		++adjusted;
228		--length;
229		switch (state) {
230		case LEX_EXPECT_OPERATOR2:
231			switch (token) {
232			case '&':
233			case '|':
234			case '=':
235			case '<':
236			case '>':
237				_lexOperator(lv, token, &state);
238				break;
239			}
240			if (state != LEX_EXPECT_OPERATOR2) {
241				break;
242			}
243			// Fall through
244		case LEX_ROOT:
245			tokenStart = string - 1;
246			switch (token) {
247			case '1':
248			case '2':
249			case '3':
250			case '4':
251			case '5':
252			case '6':
253			case '7':
254			case '8':
255			case '9':
256				state = LEX_EXPECT_DECIMAL;
257				next = token - '0';
258				break;
259			case '0':
260				state = LEX_EXPECT_PREFIX;
261				next = 0;
262				break;
263			case '$':
264				state = LEX_EXPECT_HEX_FIRST;
265				next = 0;
266				break;
267			case '%':
268				state = LEX_EXPECT_BINARY_FIRST;
269				next = 0;
270				break;
271			case '(':
272				state = LEX_ROOT;
273				lvNext = LexVectorAppend(lv);
274				lvNext->type = TOKEN_OPEN_PAREN_TYPE;
275				break;
276			case '!':
277			case '-':
278			case '~':
279			case '*':
280				_lexOperator(lv, token, &state);
281				break;
282			case ' ':
283			case '\t':
284				break;
285			default:
286				if (tolower(token) >= 'a' && tolower(token <= 'z')) {
287					state = LEX_EXPECT_IDENTIFIER;
288				} else {
289					state = LEX_ERROR;
290				}
291				break;
292			};
293			break;
294		case LEX_EXPECT_IDENTIFIER:
295			switch (token) {
296			case '=':
297			case '+':
298			case '-':
299			case '*':
300			case '/':
301			case '%':
302			case '&':
303			case '|':
304			case '^':
305			case '<':
306			case '>':
307			case '!':
308				lvNext = LexVectorAppend(lv);
309				lvNext->type = TOKEN_IDENTIFIER_TYPE;
310				lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
311				_lexOperator(lv, token, &state);
312				break;
313			case ')':
314				lvNext = LexVectorAppend(lv);
315				lvNext->type = TOKEN_IDENTIFIER_TYPE;
316				lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
317				lvNext = LexVectorAppend(lv);
318				lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
319				state = LEX_EXPECT_OPERATOR;
320				break;
321			case ' ':
322			case '\t':
323				lvNext = LexVectorAppend(lv);
324				lvNext->type = TOKEN_IDENTIFIER_TYPE;
325				lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
326				state = LEX_EXPECT_OPERATOR;
327				break;
328			default:
329				break;
330			}
331			break;
332		case LEX_EXPECT_BINARY_FIRST:
333			state = LEX_EXPECT_BINARY;
334			// Fall through
335		case LEX_EXPECT_BINARY:
336			switch (token) {
337			case '0':
338			case '1':
339				// TODO: handle overflow
340				next <<= 1;
341				next += token - '0';
342				break;
343			default:
344				_lexValue(lv, token, next, &state);
345				break;
346			}
347			break;
348		case LEX_EXPECT_DECIMAL:
349			switch (token) {
350			case '0':
351			case '1':
352			case '2':
353			case '3':
354			case '4':
355			case '5':
356			case '6':
357			case '7':
358			case '8':
359			case '9':
360				// TODO: handle overflow
361				next *= 10;
362				next += token - '0';
363				break;
364			default:
365				_lexValue(lv, token, next, &state);
366				break;
367			}
368			break;
369		case LEX_EXPECT_HEX_FIRST:
370			state = LEX_EXPECT_HEX;
371			// Fall through
372		case LEX_EXPECT_HEX:
373			switch (token) {
374			case '0':
375			case '1':
376			case '2':
377			case '3':
378			case '4':
379			case '5':
380			case '6':
381			case '7':
382			case '8':
383			case '9':
384				// TODO: handle overflow
385				next *= 16;
386				next += token - '0';
387				break;
388			case 'A':
389			case 'B':
390			case 'C':
391			case 'D':
392			case 'E':
393			case 'F':
394				// TODO: handle overflow
395				next *= 16;
396				next += token - 'A' + 10;
397				break;
398			case 'a':
399			case 'b':
400			case 'c':
401			case 'd':
402			case 'e':
403			case 'f':
404				// TODO: handle overflow
405				next *= 16;
406				next += token - 'a' + 10;
407				break;
408			case ':':
409				lvNext = LexVectorAppend(lv);
410				lvNext->type = TOKEN_SEGMENT_TYPE;
411				lvNext->uintValue = next;
412				next = 0;
413				break;
414			default:
415				_lexValue(lv, token, next, &state);
416				break;
417			}
418			break;
419		case LEX_EXPECT_PREFIX:
420			switch (token) {
421			case 'X':
422			case 'x':
423				next = 0;
424				state = LEX_EXPECT_HEX_FIRST;
425				break;
426			case 'B':
427			case 'b':
428				next = 0;
429				state = LEX_EXPECT_BINARY_FIRST;
430				break;
431			case '0':
432			case '1':
433			case '2':
434			case '3':
435			case '4':
436			case '5':
437			case '6':
438			case '7':
439			case '8':
440			case '9':
441				next = token - '0';
442				state = LEX_EXPECT_DECIMAL;
443				break;
444			default:
445				_lexValue(lv, token, next, &state);
446				break;
447			}
448			break;
449		case LEX_EXPECT_OPERATOR:
450			switch (token) {
451			case '=':
452			case '+':
453			case '-':
454			case '*':
455			case '/':
456			case '%':
457			case '&':
458			case '|':
459			case '^':
460			case '<':
461			case '>':
462			case '!':
463				_lexOperator(lv, token, &state);
464				break;
465			case ')':
466				lvNext = LexVectorAppend(lv);
467				lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
468				break;
469			case ' ':
470			case '\t':
471				break;
472			default:
473				state = LEX_ERROR;
474			}
475			break;
476		case LEX_ERROR:
477			// This shouldn't be reached
478			break;
479		}
480	}
481
482	switch (state) {
483	case LEX_EXPECT_BINARY:
484	case LEX_EXPECT_DECIMAL:
485	case LEX_EXPECT_HEX:
486	case LEX_EXPECT_PREFIX:
487		lvNext = LexVectorAppend(lv);
488		lvNext->type = TOKEN_UINT_TYPE;
489		lvNext->uintValue = next;
490		break;
491	case LEX_EXPECT_IDENTIFIER:
492		lvNext = LexVectorAppend(lv);
493		lvNext->type = TOKEN_IDENTIFIER_TYPE;
494		lvNext->identifierValue = strndup(tokenStart, string - tokenStart);
495		break;
496	case LEX_ROOT:
497	case LEX_EXPECT_OPERATOR:
498	case LEX_EXPECT_OPERATOR2:
499		break;
500	case LEX_EXPECT_BINARY_FIRST:
501	case LEX_EXPECT_HEX_FIRST:
502	case LEX_ERROR:
503	default:
504		lvNext = LexVectorAppend(lv);
505		lvNext->type = TOKEN_ERROR_TYPE;
506		break;
507	}
508	return adjusted;
509}
510
511static const int _operatorPrecedence[] = {
512	[OP_ASSIGN] = 14,
513	[OP_ADD] = 4,
514	[OP_SUBTRACT] = 4,
515	[OP_MULTIPLY] = 3,
516	[OP_DIVIDE] = 3,
517	[OP_MODULO] = 3,
518	[OP_AND] = 8,
519	[OP_OR] = 10,
520	[OP_XOR] = 9,
521	[OP_LESS] = 6,
522	[OP_GREATER] = 6,
523	[OP_EQUAL] = 7,
524	[OP_NOT_EQUAL] = 7,
525	[OP_LE] = 6,
526	[OP_GE] = 6,
527	[OP_LOGICAL_AND] = 11,
528	[OP_LOGICAL_OR] = 12,
529	[OP_NEGATE] = 2,
530	[OP_FLIP] = 2,
531	[OP_NOT] = 2,
532	[OP_SHIFT_L] = 5,
533	[OP_SHIFT_R] = 5,
534	[OP_DEREFERENCE] = 2,
535};
536
537static struct ParseTree* _parseTreeCreate() {
538	struct ParseTree* tree = malloc(sizeof(struct ParseTree));
539	tree->token.type = TOKEN_ERROR_TYPE;
540	tree->rhs = 0;
541	tree->lhs = 0;
542	return tree;
543}
544
545static size_t _parseExpression(struct ParseTree* tree, struct LexVector* lv, size_t i, int precedence, int* openParens) {
546	struct ParseTree* newTree = 0;
547	while (i < LexVectorSize(lv)) {
548		struct Token* token = LexVectorGetPointer(lv, i);
549		int newPrecedence;
550		switch (token->type) {
551		case TOKEN_IDENTIFIER_TYPE:
552		case TOKEN_UINT_TYPE:
553			if (tree->token.type == TOKEN_ERROR_TYPE) {
554				tree->token = *token;
555				if (token->type == TOKEN_IDENTIFIER_TYPE) {
556					tree->token.identifierValue = strdup(token->identifierValue);
557				}
558				++i;
559			} else {
560				tree->token.type = TOKEN_ERROR_TYPE;
561				return i + 1;
562			}
563			break;
564		case TOKEN_SEGMENT_TYPE:
565			tree->lhs = _parseTreeCreate();
566			tree->lhs->token.type = TOKEN_UINT_TYPE;
567			tree->lhs->token.uintValue = token->uintValue;
568			tree->rhs = _parseTreeCreate();
569			tree->token.type = TOKEN_SEGMENT_TYPE;
570			i = _parseExpression(tree->rhs, lv, i + 1, precedence, openParens);
571			if (tree->token.type == TOKEN_ERROR_TYPE) {
572				tree->token.type = TOKEN_ERROR_TYPE;
573			}
574			break;
575		case TOKEN_OPEN_PAREN_TYPE:
576			++*openParens;
577			i = _parseExpression(tree, lv, i + 1, INT_MAX, openParens);
578			break;
579		case TOKEN_CLOSE_PAREN_TYPE:
580			if (*openParens <= 0) {
581				tree->token.type = TOKEN_ERROR_TYPE;
582			}
583			--*openParens;
584			return i + 1;
585		case TOKEN_OPERATOR_TYPE:
586			newPrecedence = _operatorPrecedence[token->operatorValue];
587			if (newPrecedence < precedence) {
588				newTree = _parseTreeCreate();
589				*newTree = *tree;
590				tree->lhs = newTree;
591				tree->rhs = _parseTreeCreate();
592				tree->token = *token;
593				i = _parseExpression(tree->rhs, lv, i + 1, newPrecedence, openParens);
594				if (tree->token.type == TOKEN_ERROR_TYPE) {
595					tree->token.type = TOKEN_ERROR_TYPE;
596				}
597			} else {
598				return i;
599			}
600			break;
601		case TOKEN_ERROR_TYPE:
602			tree->token.type = TOKEN_ERROR_TYPE;
603			return i + 1;
604		}
605	}
606
607	return i;
608}
609
610void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
611	if (!tree) {
612		return;
613	}
614
615	tree->token.type = TOKEN_ERROR_TYPE;
616	tree->lhs = 0;
617	tree->rhs = 0;
618
619	int openParens = 0;
620	_parseExpression(tree, lv, 0, INT_MAX, &openParens);
621	if (openParens) {
622		if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
623			free(tree->token.identifierValue);
624		}
625		tree->token.type = TOKEN_ERROR_TYPE;
626	}
627}
628
629void lexFree(struct LexVector* lv) {
630	size_t i;
631	for (i = 0; i < LexVectorSize(lv); ++i) {
632		struct Token* token = LexVectorGetPointer(lv, i);
633		if (token->type == TOKEN_IDENTIFIER_TYPE) {
634			free(token->identifierValue);
635		}
636	}
637}
638
639void parseFree(struct ParseTree* tree) {
640	if (!tree) {
641		return;
642	}
643
644	if (tree->lhs) {
645		parseFree(tree->lhs);
646		free(tree->lhs);
647	}
648	if (tree->rhs) {
649		parseFree(tree->rhs);
650		free(tree->rhs);
651	}
652
653	if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
654		free(tree->token.identifierValue);
655	}
656}
657
658static bool _performOperation(struct mDebugger* debugger, enum Operation operation, int32_t current, int32_t next, int32_t* value, int* segment) {
659	switch (operation) {
660	case OP_ASSIGN:
661		current = next;
662		break;
663	case OP_ADD:
664		current += next;
665		break;
666	case OP_SUBTRACT:
667		current -= next;
668		break;
669	case OP_MULTIPLY:
670		current *= next;
671		break;
672	case OP_DIVIDE:
673		if (next != 0) {
674			current /= next;
675		} else {
676			return false;
677		}
678		break;
679	case OP_MODULO:
680		if (next != 0) {
681			current %= next;
682		} else {
683			return false;
684		}
685		break;
686	case OP_AND:
687		current &= next;
688		break;
689	case OP_OR:
690		current |= next;
691		break;
692	case OP_XOR:
693		current ^= next;
694		break;
695	case OP_LESS:
696		current = current < next;
697		break;
698	case OP_GREATER:
699		current = current > next;
700		break;
701	case OP_EQUAL:
702		current = current == next;
703		break;
704	case OP_NOT_EQUAL:
705		current = current != next;
706		break;
707	case OP_LOGICAL_AND:
708		current = current && next;
709		break;
710	case OP_LOGICAL_OR:
711		current = current || next;
712		break;
713	case OP_LE:
714		current = current <= next;
715		break;
716	case OP_GE:
717		current = current >= next;
718		break;
719	case OP_NEGATE:
720		current = -next;
721		break;
722	case OP_FLIP:
723		current = ~next;
724		break;
725	case OP_NOT:
726		current = !next;
727		break;
728	case OP_SHIFT_L:
729		current <<= next;
730		break;
731	case OP_SHIFT_R:
732		current >>= next;
733		break;
734	case OP_DEREFERENCE:
735		if (*segment < 0) {
736			current = debugger->core->busRead8(debugger->core, next);
737		} else {
738			current = debugger->core->rawRead8(debugger->core, next, *segment);
739		}
740		*segment = -1;
741		break;
742	default:
743		return false;
744	}
745	*value = current;
746	return true;
747}
748
749bool mDebuggerEvaluateParseTree(struct mDebugger* debugger, struct ParseTree* tree, int32_t* value, int* segment) {
750	if (!value) {
751		return false;
752	}
753	int32_t lhs, rhs;
754	switch (tree->token.type) {
755	case TOKEN_UINT_TYPE:
756		if (segment) {
757			*segment = -1;
758		}
759		*value = tree->token.uintValue;
760		return true;
761	case TOKEN_SEGMENT_TYPE:
762		if (!mDebuggerEvaluateParseTree(debugger, tree->rhs, value, segment)) {
763			return false;
764		}
765		return mDebuggerEvaluateParseTree(debugger, tree->lhs, segment, NULL);
766	case TOKEN_OPERATOR_TYPE:
767		switch (tree->token.operatorValue) {
768		case OP_ASSIGN:
769		case OP_ADD:
770		case OP_SUBTRACT:
771		case OP_MULTIPLY:
772		case OP_DIVIDE:
773		case OP_MODULO:
774		case OP_AND:
775		case OP_OR:
776		case OP_XOR:
777		case OP_LESS:
778		case OP_GREATER:
779		case OP_EQUAL:
780		case OP_NOT_EQUAL:
781		case OP_LOGICAL_AND:
782		case OP_LOGICAL_OR:
783		case OP_LE:
784		case OP_GE:
785		case OP_SHIFT_L:
786		case OP_SHIFT_R:
787			if (!mDebuggerEvaluateParseTree(debugger, tree->lhs, &lhs, segment)) {
788				return false;
789			}
790			// Fall through
791		default:
792			if (!mDebuggerEvaluateParseTree(debugger, tree->rhs, &rhs, segment)) {
793				return false;
794			}
795			break;
796		}
797		return _performOperation(debugger, tree->token.operatorValue, lhs, rhs, value, segment);
798	case TOKEN_IDENTIFIER_TYPE:
799		return mDebuggerLookupIdentifier(debugger, tree->token.identifierValue, value, segment);
800	case TOKEN_ERROR_TYPE:
801	default:
802		break;
803	}
804	return false;
805}