all repos — mgba @ 2f2e5398719f9d6a78ae84fc241945b46839b01c

mGBA Game Boy Advance Emulator

src/debugger/parser.c (view raw)

  1/* Copyright (c) 2013-2014 Jeffrey Pfau
  2 *
  3 * This Source Code Form is subject to the terms of the Mozilla Public
  4 * License, v. 2.0. If a copy of the MPL was not distributed with this
  5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6#include "parser.h"
  7
  8#include "util/string.h"
  9
 10static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
 11	struct LexVector* lvNext = malloc(sizeof(struct LexVector));
 12	lvNext->token.type = TOKEN_OPERATOR_TYPE;
 13	switch (operator) {
 14	case '+':
 15		lvNext->token.operatorValue = OP_ADD;
 16		break;
 17	case '-':
 18		lvNext->token.operatorValue = OP_SUBTRACT;
 19		break;
 20	case '*':
 21		lvNext->token.operatorValue = OP_MULTIPLY;
 22		break;
 23	case '/':
 24		lvNext->token.operatorValue = OP_DIVIDE;
 25		break;
 26	default:
 27		lvNext->token.type = TOKEN_ERROR_TYPE;
 28		break;
 29	}
 30	lvNext->next = lv->next;
 31	lv->next = lvNext;
 32	lv = lvNext;
 33	lvNext = malloc(sizeof(struct LexVector));
 34	lvNext->next = lv->next;
 35	lvNext->token.type = TOKEN_ERROR_TYPE;
 36	lv->next = lvNext;
 37	return lvNext;
 38}
 39
 40size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
 41	if (!string || length < 1) {
 42		return 0;
 43	}
 44
 45	uint32_t next = 0;
 46	size_t adjusted = 0;
 47
 48	enum LexState state = LEX_ROOT;
 49	const char* tokenStart = 0;
 50	struct LexVector* lvNext;
 51
 52	while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
 53		char token = string[0];
 54		++string;
 55		++adjusted;
 56		--length;
 57		switch (state) {
 58		case LEX_ROOT:
 59			tokenStart = string - 1;
 60			switch (token) {
 61			case '1':
 62			case '2':
 63			case '3':
 64			case '4':
 65			case '5':
 66			case '6':
 67			case '7':
 68			case '8':
 69			case '9':
 70				state = LEX_EXPECT_DECIMAL;
 71				next = token - '0';
 72				break;
 73			case '0':
 74				state = LEX_EXPECT_PREFIX;
 75				next = 0;
 76				break;
 77			case '$':
 78				state = LEX_EXPECT_HEX;
 79				next = 0;
 80				break;
 81			case '(':
 82				state = LEX_ROOT;
 83				lv->token.type = TOKEN_OPEN_PAREN_TYPE;
 84				lvNext = malloc(sizeof(struct LexVector));
 85				lvNext->next = lv->next;
 86				lvNext->token.type = TOKEN_ERROR_TYPE;
 87				lv->next = lvNext;
 88				lv = lvNext;
 89				break;
 90			default:
 91				if (tolower(token) >= 'a' && tolower(token <= 'z')) {
 92					state = LEX_EXPECT_IDENTIFIER;
 93				} else {
 94					state = LEX_ERROR;
 95				}
 96				break;
 97			};
 98			break;
 99		case LEX_EXPECT_IDENTIFIER:
100			switch (token) {
101			case '+':
102			case '-':
103			case '*':
104			case '/':
105				lv->token.type = TOKEN_IDENTIFIER_TYPE;
106				lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
107				lv = _lexOperator(lv, token);
108				state = LEX_ROOT;
109				break;
110			case ')':
111				lv->token.type = TOKEN_IDENTIFIER_TYPE;
112				lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
113				state = LEX_EXPECT_OPERATOR;
114				break;
115			default:
116				break;
117			}
118			break;
119		case LEX_EXPECT_BINARY:
120			switch (token) {
121				case '0':
122				case '1':
123				// TODO: handle overflow
124				next <<= 1;
125				next += token - '0';
126				break;
127			case '+':
128			case '-':
129			case '*':
130			case '/':
131				lv->token.type = TOKEN_UINT_TYPE;
132				lv->token.uintValue = next;
133				lv = _lexOperator(lv, token);
134				state = LEX_ROOT;
135				break;
136			case ')':
137				lv->token.type = TOKEN_UINT_TYPE;
138				lv->token.uintValue = next;
139				state = LEX_EXPECT_OPERATOR;
140				break;
141			default:
142				state = LEX_ERROR;
143				break;
144			}
145			break;
146		case LEX_EXPECT_DECIMAL:
147			switch (token) {
148			case '0':
149			case '1':
150			case '2':
151			case '3':
152			case '4':
153			case '5':
154			case '6':
155			case '7':
156			case '8':
157			case '9':
158				// TODO: handle overflow
159				next *= 10;
160				next += token - '0';
161				break;
162			case '+':
163			case '-':
164			case '*':
165			case '/':
166				lv->token.type = TOKEN_UINT_TYPE;
167				lv->token.uintValue = next;
168				lv = _lexOperator(lv, token);
169				state = LEX_ROOT;
170				break;
171			case ')':
172				lv->token.type = TOKEN_UINT_TYPE;
173				lv->token.uintValue = next;
174				state = LEX_EXPECT_OPERATOR;
175				break;
176			default:
177				state = LEX_ERROR;
178			}
179			break;
180		case LEX_EXPECT_HEX:
181			switch (token) {
182			case '0':
183			case '1':
184			case '2':
185			case '3':
186			case '4':
187			case '5':
188			case '6':
189			case '7':
190			case '8':
191			case '9':
192				// TODO: handle overflow
193				next *= 16;
194				next += token - '0';
195				break;
196			case 'A':
197			case 'B':
198			case 'C':
199			case 'D':
200			case 'E':
201			case 'F':
202				// TODO: handle overflow
203				next *= 16;
204				next += token - 'A' + 10;
205				break;
206			case 'a':
207			case 'b':
208			case 'c':
209			case 'd':
210			case 'e':
211			case 'f':
212				// TODO: handle overflow
213				next *= 16;
214				next += token - 'a' + 10;
215				break;
216			case '+':
217			case '-':
218			case '*':
219			case '/':
220				lv->token.type = TOKEN_UINT_TYPE;
221				lv->token.uintValue = next;
222				lv = _lexOperator(lv, token);
223				state = LEX_ROOT;
224				break;
225			case ')':
226				lv->token.type = TOKEN_UINT_TYPE;
227				lv->token.uintValue = next;
228				state = LEX_EXPECT_OPERATOR;
229				break;
230			default:
231				state = LEX_ERROR;
232				break;
233			}
234			break;
235		case LEX_EXPECT_PREFIX:
236			switch (token) {
237			case 'X':
238			case 'x':
239				next = 0;
240				state = LEX_EXPECT_HEX;
241				break;
242			case 'B':
243			case 'b':
244				next = 0;
245				state = LEX_EXPECT_BINARY;
246				break;
247			case '+':
248			case '-':
249			case '*':
250			case '/':
251				lv->token.type = TOKEN_UINT_TYPE;
252				lv->token.uintValue = next;
253				lv = _lexOperator(lv, token);
254				state = LEX_ROOT;
255				break;
256			case ')':
257				lv->token.type = TOKEN_UINT_TYPE;
258				lv->token.uintValue = next;
259				state = LEX_EXPECT_OPERATOR;
260				break;
261			case '0':
262			case '1':
263			case '2':
264			case '3':
265			case '4':
266			case '5':
267			case '6':
268			case '7':
269			case '8':
270			case '9':
271				next = token - '0';
272				state = LEX_EXPECT_DECIMAL;
273				break;
274			default:
275				state = LEX_ERROR;
276			}
277			break;
278		case LEX_EXPECT_OPERATOR:
279			switch (token) {
280			case '+':
281			case '-':
282			case '*':
283			case '/':
284				lvNext = malloc(sizeof(struct LexVector));
285				lvNext->next = lv->next;
286				lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
287				lv->next = lvNext;
288				lv = _lexOperator(lv->next, token);
289				state = LEX_ROOT;
290				break;
291			default:
292				state = LEX_ERROR;
293			}
294			break;
295		case LEX_ERROR:
296			// This shouldn't be reached
297			break;
298		}
299	}
300
301	switch (state) {
302	case LEX_EXPECT_BINARY:
303	case LEX_EXPECT_DECIMAL:
304	case LEX_EXPECT_HEX:
305	case LEX_EXPECT_PREFIX:
306		lv->token.type = TOKEN_UINT_TYPE;
307		lv->token.uintValue = next;
308		break;
309	case LEX_EXPECT_IDENTIFIER:
310		lv->token.type = TOKEN_IDENTIFIER_TYPE;
311		lv->token.identifierValue = strndup(tokenStart, string - tokenStart);
312		break;
313	case LEX_EXPECT_OPERATOR:
314		lvNext = malloc(sizeof(struct LexVector));
315		lvNext->next = lv->next;
316		lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
317		lv->next = lvNext;
318		break;
319	case LEX_ERROR:
320	default:
321		lv->token.type = TOKEN_ERROR_TYPE;
322		break;
323	}
324	return adjusted;
325}
326
327static const int _operatorPrecedence[] = {
328	2,
329	1,
330	1,
331	0,
332	0
333};
334
335static struct ParseTree* _parseTreeCreate() {
336	struct ParseTree* tree = malloc(sizeof(struct ParseTree));
337	tree->token.type = TOKEN_ERROR_TYPE;
338	tree->rhs = 0;
339	tree->lhs = 0;
340	return tree;
341}
342
343static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
344	struct ParseTree* newTree = 0;
345	while (lv) {
346		int newPrecedence;
347		switch (lv->token.type) {
348		case TOKEN_IDENTIFIER_TYPE:
349		case TOKEN_UINT_TYPE:
350			if (tree->token.type == TOKEN_ERROR_TYPE) {
351				tree->token = lv->token;
352				lv = lv->next;
353			} else {
354				tree->token.type = TOKEN_ERROR_TYPE;
355				return 0;
356			}
357			break;
358		case TOKEN_OPEN_PAREN_TYPE:
359			lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
360			break;
361		case TOKEN_CLOSE_PAREN_TYPE:
362			if (openParens <= 0) {
363				tree->token.type = TOKEN_ERROR_TYPE;
364				return 0;
365			}
366			return lv->next;
367			break;
368		case TOKEN_OPERATOR_TYPE:
369			newPrecedence = _operatorPrecedence[lv->token.operatorValue];
370			if (newPrecedence < precedence) {
371				newTree = _parseTreeCreate();
372				*newTree = *tree;
373				tree->lhs = newTree;
374				tree->rhs = _parseTreeCreate();
375				tree->token = lv->token;
376				lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
377				if (tree->token.type == TOKEN_ERROR_TYPE) {
378					tree->token.type = TOKEN_ERROR_TYPE;
379				}
380			} else {
381				return lv;
382			}
383			break;
384		case TOKEN_ERROR_TYPE:
385			tree->token.type = TOKEN_ERROR_TYPE;
386			return 0;
387		}
388	}
389
390	return 0;
391}
392
393void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
394	if (!tree) {
395		return;
396	}
397
398	tree->token.type = TOKEN_ERROR_TYPE;
399	tree->lhs = 0;
400	tree->rhs = 0;
401
402	_parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
403}
404
405void lexFree(struct LexVector* lv) {
406	while (lv) {
407		struct LexVector* lvNext = lv->next;
408		free(lv);
409		lv = lvNext;
410	}
411}
412
413void parseFree(struct ParseTree* tree) {
414	if (!tree) {
415		return;
416	}
417
418	parseFree(tree->lhs);
419	parseFree(tree->rhs);
420
421	if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
422		free(tree->token.identifierValue);
423	}
424	free(tree);
425}