all repos — mgba @ ca5f7a45ee5ba2d50dedda2692e0319985785572

mGBA Game Boy Advance Emulator

src/debugger/parser.c (view raw)

  1#include "parser.h"
  2
  3#include "util/string.h"
  4
  5static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
  6	struct LexVector* lvNext = malloc(sizeof(struct LexVector));
  7	lvNext->token.type = TOKEN_OPERATOR_TYPE;
  8	switch (operator) {
  9	case '+':
 10		lvNext->token.operatorValue = OP_ADD;
 11		break;
 12	case '-':
 13		lvNext->token.operatorValue = OP_SUBTRACT;
 14		break;
 15	case '*':
 16		lvNext->token.operatorValue = OP_MULTIPLY;
 17		break;
 18	case '/':
 19		lvNext->token.operatorValue = OP_DIVIDE;
 20		break;
 21	default:
 22		lvNext->token.type = TOKEN_ERROR_TYPE;
 23		break;
 24	}
 25	lvNext->next = lv->next;
 26	lv->next = lvNext;
 27	lv = lvNext;
 28	lvNext = malloc(sizeof(struct LexVector));
 29	lvNext->next = lv->next;
 30	lvNext->token.type = TOKEN_ERROR_TYPE;
 31	lv->next = lvNext;
 32	return lvNext;
 33}
 34
 35size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
 36	if (!string || length < 1) {
 37		return 0;
 38	}
 39
 40	uint32_t next = 0;
 41	size_t adjusted = 0;
 42
 43	enum LexState state = LEX_ROOT;
 44	const char* tokenStart = 0;
 45	struct LexVector* lvNext;
 46
 47	while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
 48		char token = string[0];
 49		++string;
 50		++adjusted;
 51		--length;
 52		switch (state) {
 53		case LEX_ROOT:
 54			tokenStart = string - 1;
 55			switch (token) {
 56			case '1':
 57			case '2':
 58			case '3':
 59			case '4':
 60			case '5':
 61			case '6':
 62			case '7':
 63			case '8':
 64			case '9':
 65				state = LEX_EXPECT_DECIMAL;
 66				next = token - '0';
 67				break;
 68			case '0':
 69				state = LEX_EXPECT_PREFIX;
 70				next = 0;
 71				break;
 72			case '$':
 73				state = LEX_EXPECT_HEX;
 74				next = 0;
 75				break;
 76			case '(':
 77				state = LEX_ROOT;
 78				lv->token.type = TOKEN_OPEN_PAREN_TYPE;
 79				lvNext = malloc(sizeof(struct LexVector));
 80				lvNext->next = lv->next;
 81				lvNext->token.type = TOKEN_ERROR_TYPE;
 82				lv->next = lvNext;
 83				lv = lvNext;
 84				break;
 85			default:
 86				if (tolower(token) >= 'a' && tolower(token <= 'z')) {
 87					state = LEX_EXPECT_IDENTIFIER;
 88				} else {
 89					state = LEX_ERROR;
 90				}
 91				break;
 92			};
 93			break;
 94		case LEX_EXPECT_IDENTIFIER:
 95			switch (token) {
 96			case '+':
 97			case '-':
 98			case '*':
 99			case '/':
100				lv->token.type = TOKEN_IDENTIFIER_TYPE;
101				lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
102				lv = _lexOperator(lv, token);
103				state = LEX_ROOT;
104				break;
105			case ')':
106				lv->token.type = TOKEN_IDENTIFIER_TYPE;
107				lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
108				state = LEX_EXPECT_OPERATOR;
109				break;
110			default:
111				break;
112			}
113			break;
114		case LEX_EXPECT_BINARY:
115			switch (token) {
116				case '0':
117				case '1':
118				// TODO: handle overflow
119				next <<= 1;
120				next += token - '0';
121				break;
122			case '+':
123			case '-':
124			case '*':
125			case '/':
126				lv->token.type = TOKEN_UINT_TYPE;
127				lv->token.uintValue = next;
128				lv = _lexOperator(lv, token);
129				state = LEX_ROOT;
130				break;
131			case ')':
132				lv->token.type = TOKEN_UINT_TYPE;
133				lv->token.uintValue = next;
134				state = LEX_EXPECT_OPERATOR;
135				break;
136			default:
137				state = LEX_ERROR;
138				break;
139			}
140			break;
141		case LEX_EXPECT_DECIMAL:
142			switch (token) {
143			case '0':
144			case '1':
145			case '2':
146			case '3':
147			case '4':
148			case '5':
149			case '6':
150			case '7':
151			case '8':
152			case '9':
153				// TODO: handle overflow
154				next *= 10;
155				next += token - '0';
156				break;
157			case '+':
158			case '-':
159			case '*':
160			case '/':
161				lv->token.type = TOKEN_UINT_TYPE;
162				lv->token.uintValue = next;
163				lv = _lexOperator(lv, token);
164				state = LEX_ROOT;
165				break;
166			case ')':
167				lv->token.type = TOKEN_UINT_TYPE;
168				lv->token.uintValue = next;
169				state = LEX_EXPECT_OPERATOR;
170				break;
171			default:
172				state = LEX_ERROR;
173			}
174			break;
175		case LEX_EXPECT_HEX:
176			switch (token) {
177			case '0':
178			case '1':
179			case '2':
180			case '3':
181			case '4':
182			case '5':
183			case '6':
184			case '7':
185			case '8':
186			case '9':
187				// TODO: handle overflow
188				next *= 16;
189				next += token - '0';
190				break;
191			case 'A':
192			case 'B':
193			case 'C':
194			case 'D':
195			case 'E':
196			case 'F':
197				// TODO: handle overflow
198				next *= 16;
199				next += token - 'A' + 10;
200				break;
201			case 'a':
202			case 'b':
203			case 'c':
204			case 'd':
205			case 'e':
206			case 'f':
207				// TODO: handle overflow
208				next *= 16;
209				next += token - 'a' + 10;
210				break;
211			case '+':
212			case '-':
213			case '*':
214			case '/':
215				lv->token.type = TOKEN_UINT_TYPE;
216				lv->token.uintValue = next;
217				lv = _lexOperator(lv, token);
218				state = LEX_ROOT;
219				break;
220			case ')':
221				lv->token.type = TOKEN_UINT_TYPE;
222				lv->token.uintValue = next;
223				state = LEX_EXPECT_OPERATOR;
224				break;
225			default:
226				state = LEX_ERROR;
227				break;
228			}
229			break;
230		case LEX_EXPECT_PREFIX:
231			switch (token) {
232			case 'X':
233			case 'x':
234				next = 0;
235				state = LEX_EXPECT_HEX;
236				break;
237			case 'B':
238			case 'b':
239				next = 0;
240				state = LEX_EXPECT_BINARY;
241				break;
242			case '+':
243			case '-':
244			case '*':
245			case '/':
246				lv->token.type = TOKEN_UINT_TYPE;
247				lv->token.uintValue = next;
248				lv = _lexOperator(lv, token);
249				state = LEX_ROOT;
250				break;
251			case ')':
252				lv->token.type = TOKEN_UINT_TYPE;
253				lv->token.uintValue = next;
254				state = LEX_EXPECT_OPERATOR;
255				break;
256			}
257			break;
258		case LEX_EXPECT_OPERATOR:
259			switch (token) {
260			case '+':
261			case '-':
262			case '*':
263			case '/':
264				lvNext = malloc(sizeof(struct LexVector));
265				lvNext->next = lv->next;
266				lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
267				lv->next = lvNext;
268				lv = _lexOperator(lv->next, token);
269				state = LEX_ROOT;
270				break;
271			default:
272				state = LEX_ERROR;
273			}
274			break;
275		case LEX_ERROR:
276			// This shouldn't be reached
277			break;
278		}
279	}
280
281	switch (state) {
282	case LEX_EXPECT_BINARY:
283	case LEX_EXPECT_DECIMAL:
284	case LEX_EXPECT_HEX:
285	case LEX_EXPECT_PREFIX:
286		lv->token.type = TOKEN_UINT_TYPE;
287		lv->token.uintValue = next;
288		break;
289	case LEX_EXPECT_IDENTIFIER:
290		lv->token.type = TOKEN_IDENTIFIER_TYPE;
291		lv->token.identifierValue = strndup(tokenStart, string - tokenStart);
292		break;
293	case LEX_EXPECT_OPERATOR:
294		lvNext = malloc(sizeof(struct LexVector));
295		lvNext->next = lv->next;
296		lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
297		lv->next = lvNext;
298		break;
299	case LEX_ERROR:
300	default:
301		lv->token.type = TOKEN_ERROR_TYPE;
302		break;
303	}
304	return adjusted;
305}
306
307static const int _operatorPrecedence[] = {
308	2,
309	1,
310	1,
311	0,
312	0
313};
314
315static struct ParseTree* _parseTreeCreate() {
316	struct ParseTree* tree = malloc(sizeof(struct ParseTree));
317	tree->token.type = TOKEN_ERROR_TYPE;
318	tree->rhs = 0;
319	tree->lhs = 0;
320	return tree;
321}
322
323static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
324	struct ParseTree* newTree = 0;
325	while (lv) {
326		int newPrecedence;
327		switch (lv->token.type) {
328		case TOKEN_IDENTIFIER_TYPE:
329		case TOKEN_UINT_TYPE:
330			if (tree->token.type == TOKEN_ERROR_TYPE) {
331				tree->token = lv->token;
332				lv = lv->next;
333			} else {
334				tree->token.type = TOKEN_ERROR_TYPE;
335				return 0;
336			}
337			break;
338		case TOKEN_OPEN_PAREN_TYPE:
339			lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
340			break;
341		case TOKEN_CLOSE_PAREN_TYPE:
342			if (openParens <= 0) {
343				tree->token.type = TOKEN_ERROR_TYPE;
344				return 0;
345			}
346			return lv->next;
347			break;
348		case TOKEN_OPERATOR_TYPE:
349			newPrecedence = _operatorPrecedence[lv->token.operatorValue];
350			if (newPrecedence < precedence) {
351				newTree = _parseTreeCreate();
352				*newTree = *tree;
353				tree->lhs = newTree;
354				tree->rhs = _parseTreeCreate();
355				tree->token = lv->token;
356				lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
357				if (tree->token.type == TOKEN_ERROR_TYPE) {
358					tree->token.type = TOKEN_ERROR_TYPE;
359				}
360			} else {
361				return lv;
362			}
363			break;
364		case TOKEN_ERROR_TYPE:
365			tree->token.type = TOKEN_ERROR_TYPE;
366			return 0;
367		}
368	}
369
370	return 0;
371}
372
373void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
374	if (!tree) {
375		return;
376	}
377
378	tree->token.type = TOKEN_ERROR_TYPE;
379	tree->lhs = 0;
380	tree->rhs = 0;
381
382	_parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
383}
384
385void lexFree(struct LexVector* lv) {
386	while (lv) {
387		struct LexVector* lvNext = lv->next;
388		free(lv);
389		lv = lvNext;
390	}
391}
392
393void parseFree(struct ParseTree* tree) {
394	if (!tree) {
395		return;
396	}
397
398	parseFree(tree->lhs);
399	parseFree(tree->rhs);
400
401	if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
402		free(tree->token.identifierValue);
403	}
404	free(tree);
405}