all repos — mgba @ 0383c82b469e7ca38832f66d07418cd890ea68a5

mGBA Game Boy Advance Emulator

src/debugger/parser.c (view raw)

  1/* Copyright (c) 2013-2014 Jeffrey Pfau
  2 *
  3 * This Source Code Form is subject to the terms of the Mozilla Public
  4 * License, v. 2.0. If a copy of the MPL was not distributed with this
  5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6#include <mgba/internal/debugger/parser.h>
  7
  8#include <mgba/debugger/debugger.h>
  9#include <mgba-util/string.h>
 10
 11DEFINE_VECTOR(LexVector, struct Token);
 12
 13enum LexState {
 14	LEX_ERROR = -1,
 15	LEX_ROOT = 0,
 16	LEX_EXPECT_IDENTIFIER,
 17	LEX_EXPECT_BINARY_FIRST,
 18	LEX_EXPECT_BINARY,
 19	LEX_EXPECT_DECIMAL,
 20	LEX_EXPECT_HEX_FIRST,
 21	LEX_EXPECT_HEX,
 22	LEX_EXPECT_PREFIX,
 23	LEX_EXPECT_OPERATOR,
 24	LEX_EXPECT_OPERATOR2,
 25};
 26
 27static void _lexOperator(struct LexVector* lv, char operator, enum LexState* state) {
 28	if (*state == LEX_EXPECT_OPERATOR2) {
 29		struct Token* lvNext = LexVectorGetPointer(lv, LexVectorSize(lv) - 1);
 30		if (lvNext->type != TOKEN_OPERATOR_TYPE) {
 31			lvNext->type = TOKEN_ERROR_TYPE;
 32			*state = LEX_ERROR;
 33			return;
 34		}
 35		switch (lvNext->operatorValue) {
 36		case OP_AND:
 37			if (operator == '&') {
 38				lvNext->operatorValue = OP_LOGICAL_AND;
 39				*state = LEX_ROOT;
 40				return;
 41			}
 42			break;
 43		case OP_OR:
 44			if (operator == '|') {
 45				lvNext->operatorValue = OP_LOGICAL_OR;
 46				*state = LEX_ROOT;
 47				return;
 48			}
 49			break;
 50		case OP_LESS:
 51			if (operator == '=') {
 52				lvNext->operatorValue = OP_LE;
 53				*state = LEX_ROOT;
 54				return;
 55			}
 56			if (operator == '<') {
 57				lvNext->operatorValue = OP_SHIFT_L;
 58				*state = LEX_ROOT;
 59				return;
 60			}
 61			break;
 62		case OP_GREATER:
 63			if (operator == '=') {
 64				lvNext->operatorValue = OP_GE;
 65				*state = LEX_ROOT;
 66				return;
 67			}
 68			if (operator == '>') {
 69				lvNext->operatorValue = OP_SHIFT_R;
 70				*state = LEX_ROOT;
 71				return;
 72			}
 73			break;
 74		case OP_ASSIGN:
 75			if (operator == '=') {
 76				lvNext->operatorValue = OP_EQUAL;
 77				*state = LEX_ROOT;
 78				return;
 79			}
 80			break;
 81		case OP_NOT:
 82			if (operator == '=') {
 83				lvNext->operatorValue = OP_NOT_EQUAL;
 84				*state = LEX_ROOT;
 85				return;
 86			}
 87			break;
 88		default:
 89			break;
 90		}
 91		*state = LEX_ERROR;
 92		return;
 93	}
 94	struct Token* lvNext = LexVectorAppend(lv);
 95	lvNext->type = TOKEN_OPERATOR_TYPE;
 96	*state = LEX_EXPECT_OPERATOR2;
 97	switch (operator) {
 98	case '=':
 99		lvNext->operatorValue = OP_ASSIGN;
100		break;
101	case '+':
102		lvNext->operatorValue = OP_ADD;
103		break;
104	case '-':
105		lvNext->operatorValue = OP_SUBTRACT;
106		break;
107	case '*':
108		lvNext->operatorValue = OP_MULTIPLY;
109		break;
110	case '/':
111		lvNext->operatorValue = OP_DIVIDE;
112		break;
113	case '%':
114		lvNext->operatorValue = OP_MODULO;
115		break;
116	case '&':
117		lvNext->operatorValue = OP_AND;
118		break;
119	case '|':
120		lvNext->operatorValue = OP_OR;
121		break;
122	case '^':
123		lvNext->operatorValue = OP_XOR;
124		break;
125	case '<':
126		lvNext->operatorValue = OP_LESS;
127		break;
128	case '>':
129		lvNext->operatorValue = OP_GREATER;
130		break;
131	case '!':
132		lvNext->operatorValue = OP_NOT;
133		break;
134	default:
135		lvNext->type = TOKEN_ERROR_TYPE;
136		break;
137	}
138}
139
140static void _lexValue(struct LexVector* lv, char token, uint32_t next, enum LexState* state) {
141	struct Token* lvNext;
142
143	switch (token) {
144	case '=':
145	case '+':
146	case '-':
147	case '*':
148	case '/':
149	case '%':
150	case '&':
151	case '|':
152	case '^':
153	case '<':
154	case '>':
155	case '!':
156		lvNext = LexVectorAppend(lv);
157		lvNext->type = TOKEN_UINT_TYPE;
158		lvNext->uintValue = next;
159		_lexOperator(lv, token, state);
160		break;
161	case ')':
162		lvNext = LexVectorAppend(lv);
163		lvNext->type = TOKEN_UINT_TYPE;
164		lvNext->uintValue = next;
165		lvNext = LexVectorAppend(lv);
166		lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
167		*state = LEX_EXPECT_OPERATOR;
168		break;
169	case ' ':
170	case '\t':
171		lvNext = LexVectorAppend(lv);
172		lvNext->type = TOKEN_UINT_TYPE;
173		lvNext->uintValue = next;
174		*state = LEX_EXPECT_OPERATOR;
175		break;
176	default:
177		*state = LEX_ERROR;
178		break;
179	}
180}
181
182size_t lexExpression(struct LexVector* lv, const char* string, size_t length, const char* eol) {
183	if (!string || length < 1) {
184		return 0;
185	}
186
187	uint32_t next = 0;
188	size_t adjusted = 0;
189
190	enum LexState state = LEX_ROOT;
191	const char* tokenStart = 0;
192	struct Token* lvNext;
193
194	if (!eol) {
195		eol = " \r\n";
196	}
197
198	while (length > 0 && string[0] && !strchr(eol, string[0]) && state != LEX_ERROR) {
199		char token = string[0];
200		++string;
201		++adjusted;
202		--length;
203		switch (state) {
204		case LEX_EXPECT_OPERATOR2:
205			switch (token) {
206			case '&':
207			case '|':
208			case '=':
209			case '<':
210			case '>':
211				_lexOperator(lv, token, &state);
212				break;
213			}
214			if (state != LEX_EXPECT_OPERATOR2) {
215				break;
216			}
217			// Fall through
218		case LEX_ROOT:
219			tokenStart = string - 1;
220			switch (token) {
221			case '1':
222			case '2':
223			case '3':
224			case '4':
225			case '5':
226			case '6':
227			case '7':
228			case '8':
229			case '9':
230				state = LEX_EXPECT_DECIMAL;
231				next = token - '0';
232				break;
233			case '0':
234				state = LEX_EXPECT_PREFIX;
235				next = 0;
236				break;
237			case '$':
238				state = LEX_EXPECT_HEX_FIRST;
239				next = 0;
240				break;
241			case '%':
242				state = LEX_EXPECT_BINARY_FIRST;
243				next = 0;
244				break;
245			case '(':
246				state = LEX_ROOT;
247				lvNext = LexVectorAppend(lv);
248				lvNext->type = TOKEN_OPEN_PAREN_TYPE;
249				break;
250			case ' ':
251			case '\t':
252				break;
253			default:
254				if (tolower(token) >= 'a' && tolower(token <= 'z')) {
255					state = LEX_EXPECT_IDENTIFIER;
256				} else {
257					state = LEX_ERROR;
258				}
259				break;
260			};
261			break;
262		case LEX_EXPECT_IDENTIFIER:
263			switch (token) {
264			case '=':
265			case '+':
266			case '-':
267			case '*':
268			case '/':
269			case '%':
270			case '&':
271			case '|':
272			case '^':
273			case '<':
274			case '>':
275			case '!':
276				lvNext = LexVectorAppend(lv);
277				lvNext->type = TOKEN_IDENTIFIER_TYPE;
278				lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
279				_lexOperator(lv, token, &state);
280				break;
281			case ')':
282				lvNext = LexVectorAppend(lv);
283				lvNext->type = TOKEN_IDENTIFIER_TYPE;
284				lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
285				lvNext = LexVectorAppend(lv);
286				lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
287				state = LEX_EXPECT_OPERATOR;
288				break;
289			case ' ':
290			case '\t':
291				lvNext = LexVectorAppend(lv);
292				lvNext->type = TOKEN_IDENTIFIER_TYPE;
293				lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
294				state = LEX_EXPECT_OPERATOR;
295				break;
296			default:
297				break;
298			}
299			break;
300		case LEX_EXPECT_BINARY_FIRST:
301			state = LEX_EXPECT_BINARY;
302			// Fall through
303		case LEX_EXPECT_BINARY:
304			switch (token) {
305			case '0':
306			case '1':
307				// TODO: handle overflow
308				next <<= 1;
309				next += token - '0';
310				break;
311			default:
312				_lexValue(lv, token, next, &state);
313				break;
314			}
315			break;
316		case LEX_EXPECT_DECIMAL:
317			switch (token) {
318			case '0':
319			case '1':
320			case '2':
321			case '3':
322			case '4':
323			case '5':
324			case '6':
325			case '7':
326			case '8':
327			case '9':
328				// TODO: handle overflow
329				next *= 10;
330				next += token - '0';
331				break;
332			default:
333				_lexValue(lv, token, next, &state);
334				break;
335			}
336			break;
337		case LEX_EXPECT_HEX_FIRST:
338			state = LEX_EXPECT_HEX;
339			// Fall through
340		case LEX_EXPECT_HEX:
341			switch (token) {
342			case '0':
343			case '1':
344			case '2':
345			case '3':
346			case '4':
347			case '5':
348			case '6':
349			case '7':
350			case '8':
351			case '9':
352				// TODO: handle overflow
353				next *= 16;
354				next += token - '0';
355				break;
356			case 'A':
357			case 'B':
358			case 'C':
359			case 'D':
360			case 'E':
361			case 'F':
362				// TODO: handle overflow
363				next *= 16;
364				next += token - 'A' + 10;
365				break;
366			case 'a':
367			case 'b':
368			case 'c':
369			case 'd':
370			case 'e':
371			case 'f':
372				// TODO: handle overflow
373				next *= 16;
374				next += token - 'a' + 10;
375				break;
376			case ':':
377				lvNext = LexVectorAppend(lv);
378				lvNext->type = TOKEN_SEGMENT_TYPE;
379				lvNext->uintValue = next;
380				next = 0;
381				break;
382			default:
383				_lexValue(lv, token, next, &state);
384				break;
385			}
386			break;
387		case LEX_EXPECT_PREFIX:
388			switch (token) {
389			case 'X':
390			case 'x':
391				next = 0;
392				state = LEX_EXPECT_HEX_FIRST;
393				break;
394			case 'B':
395			case 'b':
396				next = 0;
397				state = LEX_EXPECT_BINARY_FIRST;
398				break;
399			case '0':
400			case '1':
401			case '2':
402			case '3':
403			case '4':
404			case '5':
405			case '6':
406			case '7':
407			case '8':
408			case '9':
409				next = token - '0';
410				state = LEX_EXPECT_DECIMAL;
411				break;
412			default:
413				_lexValue(lv, token, next, &state);
414				break;
415			}
416			break;
417		case LEX_EXPECT_OPERATOR:
418			switch (token) {
419			case '=':
420			case '+':
421			case '-':
422			case '*':
423			case '/':
424			case '%':
425			case '&':
426			case '|':
427			case '^':
428			case '<':
429			case '>':
430			case '!':
431				_lexOperator(lv, token, &state);
432				break;
433			case ')':
434				lvNext = LexVectorAppend(lv);
435				lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
436				break;
437			case ' ':
438			case '\t':
439				break;
440			default:
441				state = LEX_ERROR;
442			}
443			break;
444		case LEX_ERROR:
445			// This shouldn't be reached
446			break;
447		}
448	}
449
450	switch (state) {
451	case LEX_EXPECT_BINARY:
452	case LEX_EXPECT_DECIMAL:
453	case LEX_EXPECT_HEX:
454	case LEX_EXPECT_PREFIX:
455		lvNext = LexVectorAppend(lv);
456		lvNext->type = TOKEN_UINT_TYPE;
457		lvNext->uintValue = next;
458		break;
459	case LEX_EXPECT_IDENTIFIER:
460		lvNext = LexVectorAppend(lv);
461		lvNext->type = TOKEN_IDENTIFIER_TYPE;
462		lvNext->identifierValue = strndup(tokenStart, string - tokenStart);
463		break;
464	case LEX_ROOT:
465	case LEX_EXPECT_OPERATOR:
466	case LEX_EXPECT_OPERATOR2:
467		break;
468	case LEX_EXPECT_BINARY_FIRST:
469	case LEX_EXPECT_HEX_FIRST:
470	case LEX_ERROR:
471	default:
472		lvNext = LexVectorAppend(lv);
473		lvNext->type = TOKEN_ERROR_TYPE;
474		break;
475	}
476	return adjusted;
477}
478
479static const int _operatorPrecedence[] = {
480	[OP_ASSIGN] = 14,
481	[OP_ADD] = 4,
482	[OP_SUBTRACT] = 4,
483	[OP_MULTIPLY] = 3,
484	[OP_DIVIDE] = 3,
485	[OP_MODULO] = 3,
486	[OP_AND] = 8,
487	[OP_OR] = 10,
488	[OP_XOR] = 9,
489	[OP_LESS] = 6,
490	[OP_GREATER] = 6,
491	[OP_EQUAL] = 7,
492	[OP_NOT_EQUAL] = 7,
493	[OP_LE] = 6,
494	[OP_GE] = 6,
495	[OP_LOGICAL_AND] = 11,
496	[OP_LOGICAL_OR] = 12,
497	[OP_NEGATE] = 2,
498	[OP_FLIP] = 2,
499	[OP_NOT] = 2,
500	[OP_SHIFT_L] = 5,
501	[OP_SHIFT_R] = 5,
502};
503
504static struct ParseTree* _parseTreeCreate() {
505	struct ParseTree* tree = malloc(sizeof(struct ParseTree));
506	tree->token.type = TOKEN_ERROR_TYPE;
507	tree->rhs = 0;
508	tree->lhs = 0;
509	return tree;
510}
511
512static size_t _parseExpression(struct ParseTree* tree, struct LexVector* lv, size_t i, int precedence, int* openParens) {
513	struct ParseTree* newTree = 0;
514	while (i < LexVectorSize(lv)) {
515		struct Token* token = LexVectorGetPointer(lv, i);
516		int newPrecedence;
517		switch (token->type) {
518		case TOKEN_IDENTIFIER_TYPE:
519		case TOKEN_UINT_TYPE:
520			if (tree->token.type == TOKEN_ERROR_TYPE) {
521				tree->token = *token;
522				if (token->type == TOKEN_IDENTIFIER_TYPE) {
523					tree->token.identifierValue = strdup(token->identifierValue);
524				}
525				++i;
526			} else {
527				tree->token.type = TOKEN_ERROR_TYPE;
528				return i + 1;
529			}
530			break;
531		case TOKEN_SEGMENT_TYPE:
532			tree->lhs = _parseTreeCreate();
533			tree->lhs->token.type = TOKEN_UINT_TYPE;
534			tree->lhs->token.uintValue = token->uintValue;
535			tree->rhs = _parseTreeCreate();
536			tree->token.type = TOKEN_SEGMENT_TYPE;
537			i = _parseExpression(tree->rhs, lv, i + 1, precedence, openParens);
538			if (tree->token.type == TOKEN_ERROR_TYPE) {
539				tree->token.type = TOKEN_ERROR_TYPE;
540			}
541			break;
542		case TOKEN_OPEN_PAREN_TYPE:
543			++*openParens;
544			i = _parseExpression(tree, lv, i + 1, INT_MAX, openParens);
545			break;
546		case TOKEN_CLOSE_PAREN_TYPE:
547			if (*openParens <= 0) {
548				tree->token.type = TOKEN_ERROR_TYPE;
549			}
550			--*openParens;
551			return i + 1;
552		case TOKEN_OPERATOR_TYPE:
553			newPrecedence = _operatorPrecedence[token->operatorValue];
554			if (newPrecedence < precedence) {
555				newTree = _parseTreeCreate();
556				*newTree = *tree;
557				tree->lhs = newTree;
558				tree->rhs = _parseTreeCreate();
559				tree->token = *token;
560				i = _parseExpression(tree->rhs, lv, i + 1, newPrecedence, openParens);
561				if (tree->token.type == TOKEN_ERROR_TYPE) {
562					tree->token.type = TOKEN_ERROR_TYPE;
563				}
564			} else {
565				return i;
566			}
567			break;
568		case TOKEN_ERROR_TYPE:
569			tree->token.type = TOKEN_ERROR_TYPE;
570			return i + 1;
571		}
572	}
573
574	return i;
575}
576
577void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
578	if (!tree) {
579		return;
580	}
581
582	tree->token.type = TOKEN_ERROR_TYPE;
583	tree->lhs = 0;
584	tree->rhs = 0;
585
586	int openParens = 0;
587	_parseExpression(tree, lv, 0, INT_MAX, &openParens);
588	if (openParens) {
589		if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
590			free(tree->token.identifierValue);
591		}
592		tree->token.type = TOKEN_ERROR_TYPE;
593	}
594}
595
596void lexFree(struct LexVector* lv) {
597	size_t i;
598	for (i = 0; i < LexVectorSize(lv); ++i) {
599		struct Token* token = LexVectorGetPointer(lv, i);
600		if (token->type == TOKEN_IDENTIFIER_TYPE) {
601			free(token->identifierValue);
602		}
603	}
604}
605
606void parseFree(struct ParseTree* tree) {
607	if (!tree) {
608		return;
609	}
610
611	if (tree->lhs) {
612		parseFree(tree->lhs);
613		free(tree->lhs);
614	}
615	if (tree->rhs) {
616		parseFree(tree->rhs);
617		free(tree->rhs);
618	}
619
620	if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
621		free(tree->token.identifierValue);
622	}
623}
624
625static bool _performOperation(enum Operation operation, int32_t current, int32_t next, int32_t* value) {
626	switch (operation) {
627	case OP_ASSIGN:
628		current = next;
629		break;
630	case OP_ADD:
631		current += next;
632		break;
633	case OP_SUBTRACT:
634		current -= next;
635		break;
636	case OP_MULTIPLY:
637		current *= next;
638		break;
639	case OP_DIVIDE:
640		if (next != 0) {
641			current /= next;
642		} else {
643			return false;
644		}
645		break;
646	case OP_MODULO:
647		if (next != 0) {
648			current %= next;
649		} else {
650			return false;
651		}
652		break;
653	case OP_AND:
654		current &= next;
655		break;
656	case OP_OR:
657		current |= next;
658		break;
659	case OP_XOR:
660		current ^= next;
661		break;
662	case OP_LESS:
663		current = current < next;
664		break;
665	case OP_GREATER:
666		current = current > next;
667		break;
668	case OP_EQUAL:
669		current = current == next;
670		break;
671	case OP_NOT_EQUAL:
672		current = current != next;
673		break;
674	case OP_LOGICAL_AND:
675		current = current && next;
676		break;
677	case OP_LOGICAL_OR:
678		current = current || next;
679		break;
680	case OP_LE:
681		current = current <= next;
682		break;
683	case OP_GE:
684		current = current >= next;
685		break;
686	case OP_SHIFT_L:
687		current <<= next;
688		break;
689	case OP_SHIFT_R:
690		current >>= next;
691		break;
692	default:
693		return false;
694	}
695	*value = current;
696	return true;
697}
698
699bool mDebuggerEvaluateParseTree(struct mDebugger* debugger, struct ParseTree* tree, int32_t* value, int* segment) {
700	if (!value) {
701		return false;
702	}
703	int32_t lhs, rhs;
704	switch (tree->token.type) {
705	case TOKEN_UINT_TYPE:
706		if (segment) {
707			*segment = -1;
708		}
709		*value = tree->token.uintValue;
710		return true;
711	case TOKEN_SEGMENT_TYPE:
712		if (!mDebuggerEvaluateParseTree(debugger, tree->rhs, value, segment)) {
713			return false;
714		}
715		return mDebuggerEvaluateParseTree(debugger, tree->lhs, segment, NULL);
716	case TOKEN_OPERATOR_TYPE:
717		if (!mDebuggerEvaluateParseTree(debugger, tree->lhs, &lhs, segment)) {
718			return false;
719		}
720		if (!mDebuggerEvaluateParseTree(debugger, tree->rhs, &rhs, segment)) {
721			return false;
722		}
723		return _performOperation(tree->token.operatorValue, lhs, rhs, value);
724	case TOKEN_IDENTIFIER_TYPE:
725		return mDebuggerLookupIdentifier(debugger, tree->token.identifierValue, value, segment);
726	case TOKEN_ERROR_TYPE:
727	default:
728		break;
729	}
730	return false;
731}