all repos — mgba @ fa884d071ecaa3e05ff20b45a67bf9500dd3d6b6

mGBA Game Boy Advance Emulator

src/debugger/parser.c (view raw)

  1/* Copyright (c) 2013-2014 Jeffrey Pfau
  2 *
  3 * This Source Code Form is subject to the terms of the Mozilla Public
  4 * License, v. 2.0. If a copy of the MPL was not distributed with this
  5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6#include <mgba/internal/debugger/parser.h>
  7
  8#include <mgba-util/string.h>
  9
 10static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
 11	struct LexVector* lvNext = malloc(sizeof(struct LexVector));
 12	lvNext->token.type = TOKEN_OPERATOR_TYPE;
 13	switch (operator) {
 14	case '+':
 15		lvNext->token.operatorValue = OP_ADD;
 16		break;
 17	case '-':
 18		lvNext->token.operatorValue = OP_SUBTRACT;
 19		break;
 20	case '*':
 21		lvNext->token.operatorValue = OP_MULTIPLY;
 22		break;
 23	case '/':
 24		lvNext->token.operatorValue = OP_DIVIDE;
 25		break;
 26	default:
 27		lvNext->token.type = TOKEN_ERROR_TYPE;
 28		break;
 29	}
 30	lvNext->next = lv->next;
 31	lv->next = lvNext;
 32	lv = lvNext;
 33	lvNext = malloc(sizeof(struct LexVector));
 34	lvNext->next = lv->next;
 35	lvNext->token.type = TOKEN_ERROR_TYPE;
 36	lv->next = lvNext;
 37	return lvNext;
 38}
 39
 40size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
 41	if (!string || length < 1) {
 42		return 0;
 43	}
 44
 45	uint32_t next = 0;
 46	size_t adjusted = 0;
 47
 48	enum LexState state = LEX_ROOT;
 49	const char* tokenStart = 0;
 50	struct LexVector* lvNext;
 51
 52	while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
 53		char token = string[0];
 54		++string;
 55		++adjusted;
 56		--length;
 57		switch (state) {
 58		case LEX_ROOT:
 59			tokenStart = string - 1;
 60			switch (token) {
 61			case '1':
 62			case '2':
 63			case '3':
 64			case '4':
 65			case '5':
 66			case '6':
 67			case '7':
 68			case '8':
 69			case '9':
 70				state = LEX_EXPECT_DECIMAL;
 71				next = token - '0';
 72				break;
 73			case '0':
 74				state = LEX_EXPECT_PREFIX;
 75				next = 0;
 76				break;
 77			case '$':
 78				state = LEX_EXPECT_HEX;
 79				next = 0;
 80				break;
 81			case '(':
 82				state = LEX_ROOT;
 83				lv->token.type = TOKEN_OPEN_PAREN_TYPE;
 84				lvNext = malloc(sizeof(struct LexVector));
 85				lvNext->next = lv->next;
 86				lvNext->token.type = TOKEN_ERROR_TYPE;
 87				lv->next = lvNext;
 88				lv = lvNext;
 89				break;
 90			default:
 91				if (tolower(token) >= 'a' && tolower(token <= 'z')) {
 92					state = LEX_EXPECT_IDENTIFIER;
 93				} else {
 94					state = LEX_ERROR;
 95				}
 96				break;
 97			};
 98			break;
 99		case LEX_EXPECT_IDENTIFIER:
100			switch (token) {
101			case '+':
102			case '-':
103			case '*':
104			case '/':
105				lv->token.type = TOKEN_IDENTIFIER_TYPE;
106				lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
107				lv = _lexOperator(lv, token);
108				state = LEX_ROOT;
109				break;
110			case ')':
111				lv->token.type = TOKEN_IDENTIFIER_TYPE;
112				lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
113				state = LEX_EXPECT_OPERATOR;
114				break;
115			default:
116				break;
117			}
118			break;
119		case LEX_EXPECT_BINARY:
120			switch (token) {
121			case '0':
122			case '1':
123				// TODO: handle overflow
124				next <<= 1;
125				next += token - '0';
126				break;
127			case '+':
128			case '-':
129			case '*':
130			case '/':
131				lv->token.type = TOKEN_UINT_TYPE;
132				lv->token.uintValue = next;
133				lv = _lexOperator(lv, token);
134				state = LEX_ROOT;
135				break;
136			case ')':
137				lv->token.type = TOKEN_UINT_TYPE;
138				lv->token.uintValue = next;
139				state = LEX_EXPECT_OPERATOR;
140				break;
141			default:
142				state = LEX_ERROR;
143				break;
144			}
145			break;
146		case LEX_EXPECT_DECIMAL:
147			switch (token) {
148			case '0':
149			case '1':
150			case '2':
151			case '3':
152			case '4':
153			case '5':
154			case '6':
155			case '7':
156			case '8':
157			case '9':
158				// TODO: handle overflow
159				next *= 10;
160				next += token - '0';
161				break;
162			case '+':
163			case '-':
164			case '*':
165			case '/':
166				lv->token.type = TOKEN_UINT_TYPE;
167				lv->token.uintValue = next;
168				lv = _lexOperator(lv, token);
169				state = LEX_ROOT;
170				break;
171			case ')':
172				lv->token.type = TOKEN_UINT_TYPE;
173				lv->token.uintValue = next;
174				state = LEX_EXPECT_OPERATOR;
175				break;
176			default:
177				state = LEX_ERROR;
178			}
179			break;
180		case LEX_EXPECT_HEX:
181			switch (token) {
182			case '0':
183			case '1':
184			case '2':
185			case '3':
186			case '4':
187			case '5':
188			case '6':
189			case '7':
190			case '8':
191			case '9':
192				// TODO: handle overflow
193				next *= 16;
194				next += token - '0';
195				break;
196			case 'A':
197			case 'B':
198			case 'C':
199			case 'D':
200			case 'E':
201			case 'F':
202				// TODO: handle overflow
203				next *= 16;
204				next += token - 'A' + 10;
205				break;
206			case 'a':
207			case 'b':
208			case 'c':
209			case 'd':
210			case 'e':
211			case 'f':
212				// TODO: handle overflow
213				next *= 16;
214				next += token - 'a' + 10;
215				break;
216			case '+':
217			case '-':
218			case '*':
219			case '/':
220				lv->token.type = TOKEN_UINT_TYPE;
221				lv->token.uintValue = next;
222				lv = _lexOperator(lv, token);
223				state = LEX_ROOT;
224				break;
225			case ':':
226				lv->token.type = TOKEN_SEGMENT_TYPE;
227				lv->token.uintValue = next;
228				lvNext = malloc(sizeof(struct LexVector));
229				lvNext->next = lv->next;
230				lvNext->token.type = TOKEN_UINT_TYPE;
231				lv->next = lvNext;
232				lv = lvNext;
233				next = 0;
234				state = LEX_EXPECT_HEX;
235				break;
236			case ')':
237				lv->token.type = TOKEN_UINT_TYPE;
238				lv->token.uintValue = next;
239				state = LEX_EXPECT_OPERATOR;
240				break;
241			default:
242				state = LEX_ERROR;
243				break;
244			}
245			break;
246		case LEX_EXPECT_PREFIX:
247			switch (token) {
248			case 'X':
249			case 'x':
250				next = 0;
251				state = LEX_EXPECT_HEX;
252				break;
253			case 'B':
254			case 'b':
255				next = 0;
256				state = LEX_EXPECT_BINARY;
257				break;
258			case '+':
259			case '-':
260			case '*':
261			case '/':
262				lv->token.type = TOKEN_UINT_TYPE;
263				lv->token.uintValue = next;
264				lv = _lexOperator(lv, token);
265				state = LEX_ROOT;
266				break;
267			case ')':
268				lv->token.type = TOKEN_UINT_TYPE;
269				lv->token.uintValue = next;
270				state = LEX_EXPECT_OPERATOR;
271				break;
272			case '0':
273			case '1':
274			case '2':
275			case '3':
276			case '4':
277			case '5':
278			case '6':
279			case '7':
280			case '8':
281			case '9':
282				next = token - '0';
283				state = LEX_EXPECT_DECIMAL;
284				break;
285			default:
286				state = LEX_ERROR;
287			}
288			break;
289		case LEX_EXPECT_OPERATOR:
290			switch (token) {
291			case '+':
292			case '-':
293			case '*':
294			case '/':
295				lvNext = malloc(sizeof(struct LexVector));
296				lvNext->next = lv->next;
297				lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
298				lv->next = lvNext;
299				lv = _lexOperator(lv->next, token);
300				state = LEX_ROOT;
301				break;
302			default:
303				state = LEX_ERROR;
304			}
305			break;
306		case LEX_ERROR:
307			// This shouldn't be reached
308			break;
309		}
310	}
311
312	switch (state) {
313	case LEX_EXPECT_BINARY:
314	case LEX_EXPECT_DECIMAL:
315	case LEX_EXPECT_HEX:
316	case LEX_EXPECT_PREFIX:
317		lv->token.type = TOKEN_UINT_TYPE;
318		lv->token.uintValue = next;
319		break;
320	case LEX_EXPECT_IDENTIFIER:
321		lv->token.type = TOKEN_IDENTIFIER_TYPE;
322		lv->token.identifierValue = strndup(tokenStart, string - tokenStart);
323		break;
324	case LEX_EXPECT_OPERATOR:
325		lvNext = malloc(sizeof(struct LexVector));
326		lvNext->next = lv->next;
327		lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
328		lv->next = lvNext;
329		break;
330	case LEX_ERROR:
331	default:
332		lv->token.type = TOKEN_ERROR_TYPE;
333		break;
334	}
335	return adjusted;
336}
337
338static const int _operatorPrecedence[] = {
339	2,
340	1,
341	1,
342	0,
343	0
344};
345
346static struct ParseTree* _parseTreeCreate() {
347	struct ParseTree* tree = malloc(sizeof(struct ParseTree));
348	tree->token.type = TOKEN_ERROR_TYPE;
349	tree->rhs = 0;
350	tree->lhs = 0;
351	return tree;
352}
353
354static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
355	struct ParseTree* newTree = 0;
356	while (lv) {
357		int newPrecedence;
358		switch (lv->token.type) {
359		case TOKEN_IDENTIFIER_TYPE:
360		case TOKEN_UINT_TYPE:
361			if (tree->token.type == TOKEN_ERROR_TYPE) {
362				tree->token = lv->token;
363				lv = lv->next;
364			} else {
365				tree->token.type = TOKEN_ERROR_TYPE;
366				return 0;
367			}
368			break;
369		case TOKEN_SEGMENT_TYPE:
370			tree->lhs = _parseTreeCreate();
371			tree->lhs->token.type = TOKEN_UINT_TYPE;
372			tree->lhs->token.uintValue = lv->token.uintValue;
373			tree->rhs = _parseTreeCreate();
374			tree->token.type = TOKEN_SEGMENT_TYPE;
375			lv = _parseExpression(tree->rhs, lv->next, precedence, openParens);
376			if (tree->token.type == TOKEN_ERROR_TYPE) {
377				tree->token.type = TOKEN_ERROR_TYPE;
378			}
379			break;
380		case TOKEN_OPEN_PAREN_TYPE:
381			lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
382			break;
383		case TOKEN_CLOSE_PAREN_TYPE:
384			if (openParens <= 0) {
385				tree->token.type = TOKEN_ERROR_TYPE;
386				return 0;
387			}
388			return lv->next;
389			break;
390		case TOKEN_OPERATOR_TYPE:
391			newPrecedence = _operatorPrecedence[lv->token.operatorValue];
392			if (newPrecedence < precedence) {
393				newTree = _parseTreeCreate();
394				*newTree = *tree;
395				tree->lhs = newTree;
396				tree->rhs = _parseTreeCreate();
397				tree->token = lv->token;
398				lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
399				if (tree->token.type == TOKEN_ERROR_TYPE) {
400					tree->token.type = TOKEN_ERROR_TYPE;
401				}
402			} else {
403				return lv;
404			}
405			break;
406		case TOKEN_ERROR_TYPE:
407			tree->token.type = TOKEN_ERROR_TYPE;
408			return 0;
409		}
410	}
411
412	return 0;
413}
414
415void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
416	if (!tree) {
417		return;
418	}
419
420	tree->token.type = TOKEN_ERROR_TYPE;
421	tree->lhs = 0;
422	tree->rhs = 0;
423
424	_parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
425}
426
427void lexFree(struct LexVector* lv) {
428	while (lv) {
429		struct LexVector* lvNext = lv->next;
430		free(lv);
431		lv = lvNext;
432	}
433}
434
435void parseFree(struct ParseTree* tree) {
436	if (!tree) {
437		return;
438	}
439
440	parseFree(tree->lhs);
441	parseFree(tree->rhs);
442
443	if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
444		free(tree->token.identifierValue);
445	}
446	free(tree);
447}