all repos — mgba @ e2f4fdbdac57da87cbf500a9c32109e7162d5230

mGBA Game Boy Advance Emulator

src/debugger/parser.c (view raw)

  1/* Copyright (c) 2013-2014 Jeffrey Pfau
  2 *
  3 * This Source Code Form is subject to the terms of the Mozilla Public
  4 * License, v. 2.0. If a copy of the MPL was not distributed with this
  5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6#include <mgba/internal/debugger/parser.h>
  7
  8#include <mgba-util/string.h>
  9
 10enum LexState {
 11	LEX_ERROR = -1,
 12	LEX_ROOT = 0,
 13	LEX_EXPECT_IDENTIFIER,
 14	LEX_EXPECT_BINARY,
 15	LEX_EXPECT_DECIMAL,
 16	LEX_EXPECT_HEX,
 17	LEX_EXPECT_PREFIX,
 18	LEX_EXPECT_OPERATOR
 19};
 20
 21static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
 22	struct LexVector* lvNext = malloc(sizeof(struct LexVector));
 23	lvNext->token.type = TOKEN_OPERATOR_TYPE;
 24	switch (operator) {
 25	case '+':
 26		lvNext->token.operatorValue = OP_ADD;
 27		break;
 28	case '-':
 29		lvNext->token.operatorValue = OP_SUBTRACT;
 30		break;
 31	case '*':
 32		lvNext->token.operatorValue = OP_MULTIPLY;
 33		break;
 34	case '/':
 35		lvNext->token.operatorValue = OP_DIVIDE;
 36		break;
 37	case '&':
 38		lvNext->token.operatorValue = OP_AND;
 39		break;
 40	case '|':
 41		lvNext->token.operatorValue = OP_OR;
 42		break;
 43	case '^':
 44		lvNext->token.operatorValue = OP_XOR;
 45		break;
 46	default:
 47		lvNext->token.type = TOKEN_ERROR_TYPE;
 48		break;
 49	}
 50	lvNext->next = lv->next;
 51	lv->next = lvNext;
 52	lv = lvNext;
 53	lvNext = malloc(sizeof(struct LexVector));
 54	lvNext->next = lv->next;
 55	lvNext->token.type = TOKEN_ERROR_TYPE;
 56	lv->next = lvNext;
 57	return lvNext;
 58}
 59
 60size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
 61	if (!string || length < 1) {
 62		return 0;
 63	}
 64
 65	uint32_t next = 0;
 66	size_t adjusted = 0;
 67
 68	enum LexState state = LEX_ROOT;
 69	const char* tokenStart = 0;
 70	struct LexVector* lvNext;
 71
 72	while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
 73		char token = string[0];
 74		++string;
 75		++adjusted;
 76		--length;
 77		switch (state) {
 78		case LEX_ROOT:
 79			tokenStart = string - 1;
 80			switch (token) {
 81			case '1':
 82			case '2':
 83			case '3':
 84			case '4':
 85			case '5':
 86			case '6':
 87			case '7':
 88			case '8':
 89			case '9':
 90				state = LEX_EXPECT_DECIMAL;
 91				next = token - '0';
 92				break;
 93			case '0':
 94				state = LEX_EXPECT_PREFIX;
 95				next = 0;
 96				break;
 97			case '$':
 98				state = LEX_EXPECT_HEX;
 99				next = 0;
100				break;
101			case '(':
102				state = LEX_ROOT;
103				lv->token.type = TOKEN_OPEN_PAREN_TYPE;
104				lvNext = malloc(sizeof(struct LexVector));
105				lvNext->next = lv->next;
106				lvNext->token.type = TOKEN_ERROR_TYPE;
107				lv->next = lvNext;
108				lv = lvNext;
109				break;
110			default:
111				if (tolower(token) >= 'a' && tolower(token <= 'z')) {
112					state = LEX_EXPECT_IDENTIFIER;
113				} else {
114					state = LEX_ERROR;
115				}
116				break;
117			};
118			break;
119		case LEX_EXPECT_IDENTIFIER:
120			switch (token) {
121			case '+':
122			case '-':
123			case '*':
124			case '/':
125			case '&':
126			case '|':
127			case '^':
128				lv->token.type = TOKEN_IDENTIFIER_TYPE;
129				lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
130				lv = _lexOperator(lv, token);
131				state = LEX_ROOT;
132				break;
133			case ')':
134				lv->token.type = TOKEN_IDENTIFIER_TYPE;
135				lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
136				state = LEX_EXPECT_OPERATOR;
137				break;
138			default:
139				break;
140			}
141			break;
142		case LEX_EXPECT_BINARY:
143			switch (token) {
144			case '0':
145			case '1':
146				// TODO: handle overflow
147				next <<= 1;
148				next += token - '0';
149				break;
150			case '+':
151			case '-':
152			case '*':
153			case '/':
154				lv->token.type = TOKEN_UINT_TYPE;
155				lv->token.uintValue = next;
156				lv = _lexOperator(lv, token);
157				state = LEX_ROOT;
158				break;
159			case ')':
160				lv->token.type = TOKEN_UINT_TYPE;
161				lv->token.uintValue = next;
162				state = LEX_EXPECT_OPERATOR;
163				break;
164			default:
165				state = LEX_ERROR;
166				break;
167			}
168			break;
169		case LEX_EXPECT_DECIMAL:
170			switch (token) {
171			case '0':
172			case '1':
173			case '2':
174			case '3':
175			case '4':
176			case '5':
177			case '6':
178			case '7':
179			case '8':
180			case '9':
181				// TODO: handle overflow
182				next *= 10;
183				next += token - '0';
184				break;
185			case '+':
186			case '-':
187			case '*':
188			case '/':
189			case '&':
190			case '|':
191			case '^':
192				lv->token.type = TOKEN_UINT_TYPE;
193				lv->token.uintValue = next;
194				lv = _lexOperator(lv, token);
195				state = LEX_ROOT;
196				break;
197			case ')':
198				lv->token.type = TOKEN_UINT_TYPE;
199				lv->token.uintValue = next;
200				state = LEX_EXPECT_OPERATOR;
201				break;
202			default:
203				state = LEX_ERROR;
204			}
205			break;
206		case LEX_EXPECT_HEX:
207			switch (token) {
208			case '0':
209			case '1':
210			case '2':
211			case '3':
212			case '4':
213			case '5':
214			case '6':
215			case '7':
216			case '8':
217			case '9':
218				// TODO: handle overflow
219				next *= 16;
220				next += token - '0';
221				break;
222			case 'A':
223			case 'B':
224			case 'C':
225			case 'D':
226			case 'E':
227			case 'F':
228				// TODO: handle overflow
229				next *= 16;
230				next += token - 'A' + 10;
231				break;
232			case 'a':
233			case 'b':
234			case 'c':
235			case 'd':
236			case 'e':
237			case 'f':
238				// TODO: handle overflow
239				next *= 16;
240				next += token - 'a' + 10;
241				break;
242			case '+':
243			case '-':
244			case '*':
245			case '/':
246			case '&':
247			case '|':
248			case '^':
249				lv->token.type = TOKEN_UINT_TYPE;
250				lv->token.uintValue = next;
251				lv = _lexOperator(lv, token);
252				state = LEX_ROOT;
253				break;
254			case ':':
255				lv->token.type = TOKEN_SEGMENT_TYPE;
256				lv->token.uintValue = next;
257				lvNext = malloc(sizeof(struct LexVector));
258				lvNext->next = lv->next;
259				lvNext->token.type = TOKEN_UINT_TYPE;
260				lv->next = lvNext;
261				lv = lvNext;
262				next = 0;
263				state = LEX_EXPECT_HEX;
264				break;
265			case ')':
266				lv->token.type = TOKEN_UINT_TYPE;
267				lv->token.uintValue = next;
268				state = LEX_EXPECT_OPERATOR;
269				break;
270			default:
271				state = LEX_ERROR;
272				break;
273			}
274			break;
275		case LEX_EXPECT_PREFIX:
276			switch (token) {
277			case 'X':
278			case 'x':
279				next = 0;
280				state = LEX_EXPECT_HEX;
281				break;
282			case 'B':
283			case 'b':
284				next = 0;
285				state = LEX_EXPECT_BINARY;
286				break;
287			case '+':
288			case '-':
289			case '*':
290			case '/':
291			case '&':
292			case '|':
293			case '^':
294				lv->token.type = TOKEN_UINT_TYPE;
295				lv->token.uintValue = next;
296				lv = _lexOperator(lv, token);
297				state = LEX_ROOT;
298				break;
299			case ')':
300				lv->token.type = TOKEN_UINT_TYPE;
301				lv->token.uintValue = next;
302				state = LEX_EXPECT_OPERATOR;
303				break;
304			case '0':
305			case '1':
306			case '2':
307			case '3':
308			case '4':
309			case '5':
310			case '6':
311			case '7':
312			case '8':
313			case '9':
314				next = token - '0';
315				state = LEX_EXPECT_DECIMAL;
316				break;
317			default:
318				state = LEX_ERROR;
319			}
320			break;
321		case LEX_EXPECT_OPERATOR:
322			switch (token) {
323			case '+':
324			case '-':
325			case '*':
326			case '/':
327			case '&':
328			case '|':
329			case '^':
330				lvNext = malloc(sizeof(struct LexVector));
331				lvNext->next = lv->next;
332				lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
333				lv->next = lvNext;
334				lv = _lexOperator(lv->next, token);
335				state = LEX_ROOT;
336				break;
337			default:
338				state = LEX_ERROR;
339			}
340			break;
341		case LEX_ERROR:
342			// This shouldn't be reached
343			break;
344		}
345	}
346
347	switch (state) {
348	case LEX_EXPECT_BINARY:
349	case LEX_EXPECT_DECIMAL:
350	case LEX_EXPECT_HEX:
351	case LEX_EXPECT_PREFIX:
352		lv->token.type = TOKEN_UINT_TYPE;
353		lv->token.uintValue = next;
354		break;
355	case LEX_EXPECT_IDENTIFIER:
356		lv->token.type = TOKEN_IDENTIFIER_TYPE;
357		lv->token.identifierValue = strndup(tokenStart, string - tokenStart);
358		break;
359	case LEX_EXPECT_OPERATOR:
360		lvNext = malloc(sizeof(struct LexVector));
361		lvNext->next = lv->next;
362		lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
363		lv->next = lvNext;
364		break;
365	case LEX_ERROR:
366	default:
367		lv->token.type = TOKEN_ERROR_TYPE;
368		break;
369	}
370	return adjusted;
371}
372
373static const int _operatorPrecedence[] = {
374	14,
375	4,
376	4,
377	3,
378	3,
379	8,
380	10,
381	9,
382	6,
383	6,
384	7,
385	6,
386	6
387};
388
389static struct ParseTree* _parseTreeCreate() {
390	struct ParseTree* tree = malloc(sizeof(struct ParseTree));
391	tree->token.type = TOKEN_ERROR_TYPE;
392	tree->rhs = 0;
393	tree->lhs = 0;
394	return tree;
395}
396
397static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
398	struct ParseTree* newTree = 0;
399	while (lv) {
400		int newPrecedence;
401		switch (lv->token.type) {
402		case TOKEN_IDENTIFIER_TYPE:
403		case TOKEN_UINT_TYPE:
404			if (tree->token.type == TOKEN_ERROR_TYPE) {
405				tree->token = lv->token;
406				lv = lv->next;
407			} else {
408				tree->token.type = TOKEN_ERROR_TYPE;
409				return 0;
410			}
411			break;
412		case TOKEN_SEGMENT_TYPE:
413			tree->lhs = _parseTreeCreate();
414			tree->lhs->token.type = TOKEN_UINT_TYPE;
415			tree->lhs->token.uintValue = lv->token.uintValue;
416			tree->rhs = _parseTreeCreate();
417			tree->token.type = TOKEN_SEGMENT_TYPE;
418			lv = _parseExpression(tree->rhs, lv->next, precedence, openParens);
419			if (tree->token.type == TOKEN_ERROR_TYPE) {
420				tree->token.type = TOKEN_ERROR_TYPE;
421			}
422			break;
423		case TOKEN_OPEN_PAREN_TYPE:
424			lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
425			break;
426		case TOKEN_CLOSE_PAREN_TYPE:
427			if (openParens <= 0) {
428				tree->token.type = TOKEN_ERROR_TYPE;
429				return 0;
430			}
431			return lv->next;
432			break;
433		case TOKEN_OPERATOR_TYPE:
434			newPrecedence = _operatorPrecedence[lv->token.operatorValue];
435			if (newPrecedence < precedence) {
436				newTree = _parseTreeCreate();
437				*newTree = *tree;
438				tree->lhs = newTree;
439				tree->rhs = _parseTreeCreate();
440				tree->token = lv->token;
441				lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
442				if (tree->token.type == TOKEN_ERROR_TYPE) {
443					tree->token.type = TOKEN_ERROR_TYPE;
444				}
445			} else {
446				return lv;
447			}
448			break;
449		case TOKEN_ERROR_TYPE:
450			tree->token.type = TOKEN_ERROR_TYPE;
451			return 0;
452		}
453	}
454
455	return 0;
456}
457
458void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
459	if (!tree) {
460		return;
461	}
462
463	tree->token.type = TOKEN_ERROR_TYPE;
464	tree->lhs = 0;
465	tree->rhs = 0;
466
467	_parseExpression(tree, lv, INT_MAX, 0);
468}
469
470void lexFree(struct LexVector* lv) {
471	while (lv) {
472		struct LexVector* lvNext = lv->next;
473		free(lv);
474		lv = lvNext;
475	}
476}
477
478void parseFree(struct ParseTree* tree) {
479	if (!tree) {
480		return;
481	}
482
483	parseFree(tree->lhs);
484	parseFree(tree->rhs);
485
486	if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
487		free(tree->token.identifierValue);
488	}
489	free(tree);
490}