all repos — mgba @ 344364695e4900668c085de57c02f2306f52d85c

mGBA Game Boy Advance Emulator

src/debugger/parser.c (view raw)

  1#include "parser.h"
  2
  3#include "util/string.h"
  4
  5static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
  6	struct LexVector* lvNext = malloc(sizeof(struct LexVector));
  7	lvNext->token.type = TOKEN_OPERATOR_TYPE;
  8	switch (operator) {
  9	case '+':
 10		lvNext->token.operatorValue = OP_ADD;
 11		break;
 12	case '-':
 13		lvNext->token.operatorValue = OP_SUBTRACT;
 14		break;
 15	case '*':
 16		lvNext->token.operatorValue = OP_MULTIPLY;
 17		break;
 18	case '/':
 19		lvNext->token.operatorValue = OP_DIVIDE;
 20		break;
 21	default:
 22		lvNext->token.type = TOKEN_ERROR_TYPE;
 23		break;
 24	}
 25	lvNext->next = lv->next;
 26	lv->next = lvNext;
 27	lv = lvNext;
 28	lvNext = malloc(sizeof(struct LexVector));
 29	lvNext->next = lv->next;
 30	lvNext->token.type = TOKEN_ERROR_TYPE;
 31	lv->next = lvNext;
 32	return lvNext;
 33}
 34
 35size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
 36	if (!string || length < 1) {
 37		return 0;
 38	}
 39
 40	uint32_t next = 0;
 41	size_t adjusted = 0;
 42
 43	enum LexState state = LEX_ROOT;
 44	const char* tokenStart = 0;
 45	struct LexVector* lvNext;
 46
 47	while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
 48		char token = string[0];
 49		++string;
 50		++adjusted;
 51		--length;
 52		switch (state) {
 53		case LEX_ROOT:
 54			tokenStart = string - 1;
 55			switch (token) {
 56			case '1':
 57			case '2':
 58			case '3':
 59			case '4':
 60			case '5':
 61			case '6':
 62			case '7':
 63			case '8':
 64			case '9':
 65				state = LEX_EXPECT_DECIMAL;
 66				next = token - '0';
 67				break;
 68			case '0':
 69				state = LEX_EXPECT_PREFIX;
 70				next = 0;
 71				break;
 72			case '$':
 73				state = LEX_EXPECT_HEX;
 74				next = 0;
 75				break;
 76			case '(':
 77				state = LEX_ROOT;
 78				lv->token.type = TOKEN_OPEN_PAREN_TYPE;
 79				lvNext = malloc(sizeof(struct LexVector));
 80				lvNext->next = lv->next;
 81				lvNext->token.type = TOKEN_ERROR_TYPE;
 82				lv->next = lvNext;
 83				lv = lvNext;
 84				break;
 85			default:
 86				if (tolower(token) >= 'a' && tolower(token <= 'z')) {
 87					state = LEX_EXPECT_IDENTIFIER;
 88				} else {
 89					state = LEX_ERROR;
 90				}
 91				break;
 92			};
 93			break;
 94		case LEX_EXPECT_IDENTIFIER:
 95			switch (token) {
 96			case '+':
 97			case '-':
 98			case '*':
 99			case '/':
100				lv->token.type = TOKEN_IDENTIFIER_TYPE;
101				lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
102				lv = _lexOperator(lv, token);
103				state = LEX_ROOT;
104				break;
105			case ')':
106				lv->token.type = TOKEN_IDENTIFIER_TYPE;
107				lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
108				state = LEX_EXPECT_OPERATOR;
109				break;
110			default:
111				break;
112			}
113			break;
114		case LEX_EXPECT_BINARY:
115			switch (token) {
116				case '0':
117				case '1':
118				// TODO: handle overflow
119				next <<= 1;
120				next += token - '0';
121				break;
122			case '+':
123			case '-':
124			case '*':
125			case '/':
126				lv->token.type = TOKEN_UINT_TYPE;
127				lv->token.uintValue = next;
128				lv = _lexOperator(lv, token);
129				state = LEX_ROOT;
130				break;
131			case ')':
132				lv->token.type = TOKEN_UINT_TYPE;
133				lv->token.uintValue = next;
134				state = LEX_EXPECT_OPERATOR;
135				break;
136			default:
137				state = LEX_ERROR;
138				break;
139			}
140			break;
141		case LEX_EXPECT_DECIMAL:
142			switch (token) {
143			case '0':
144			case '1':
145			case '2':
146			case '3':
147			case '4':
148			case '5':
149			case '6':
150			case '7':
151			case '8':
152			case '9':
153				// TODO: handle overflow
154				next *= 10;
155				next += token - '0';
156				break;
157			case '+':
158			case '-':
159			case '*':
160			case '/':
161				lv->token.type = TOKEN_UINT_TYPE;
162				lv->token.uintValue = next;
163				lv = _lexOperator(lv, token);
164				state = LEX_ROOT;
165				break;
166			case ')':
167				lv->token.type = TOKEN_UINT_TYPE;
168				lv->token.uintValue = next;
169				state = LEX_EXPECT_OPERATOR;
170				break;
171			default:
172				state = LEX_ERROR;
173			}
174			break;
175		case LEX_EXPECT_HEX:
176			switch (token) {
177			case '0':
178			case '1':
179			case '2':
180			case '3':
181			case '4':
182			case '5':
183			case '6':
184			case '7':
185			case '8':
186			case '9':
187				// TODO: handle overflow
188				next *= 16;
189				next += token - '0';
190				break;
191			case 'A':
192			case 'B':
193			case 'C':
194			case 'D':
195			case 'E':
196			case 'F':
197				// TODO: handle overflow
198				next *= 16;
199				next += token - 'A' + 10;
200				break;
201			case 'a':
202			case 'b':
203			case 'c':
204			case 'd':
205			case 'e':
206			case 'f':
207				// TODO: handle overflow
208				next *= 16;
209				next += token - 'a' + 10;
210				break;
211			case '+':
212			case '-':
213			case '*':
214			case '/':
215				lv->token.type = TOKEN_UINT_TYPE;
216				lv->token.uintValue = next;
217				lv = _lexOperator(lv, token);
218				state = LEX_ROOT;
219				break;
220			case ')':
221				lv->token.type = TOKEN_UINT_TYPE;
222				lv->token.uintValue = next;
223				state = LEX_EXPECT_OPERATOR;
224				break;
225			default:
226				state = LEX_ERROR;
227				break;
228			}
229			break;
230		case LEX_EXPECT_PREFIX:
231			switch (token) {
232			case 'X':
233			case 'x':
234				next = 0;
235				state = LEX_EXPECT_HEX;
236				break;
237			case 'B':
238			case 'b':
239				next = 0;
240				state = LEX_EXPECT_BINARY;
241				break;
242			case '+':
243			case '-':
244			case '*':
245			case '/':
246				lv->token.type = TOKEN_UINT_TYPE;
247				lv->token.uintValue = next;
248				lv = _lexOperator(lv, token);
249				state = LEX_ROOT;
250				break;
251			case ')':
252				lv->token.type = TOKEN_UINT_TYPE;
253				lv->token.uintValue = next;
254				state = LEX_EXPECT_OPERATOR;
255				break;
256			case '0':
257			case '1':
258			case '2':
259			case '3':
260			case '4':
261			case '5':
262			case '6':
263			case '7':
264			case '8':
265			case '9':
266				next = token - '0';
267				state = LEX_EXPECT_DECIMAL;
268				break;
269			default:
270				state = LEX_ERROR;
271			}
272			break;
273		case LEX_EXPECT_OPERATOR:
274			switch (token) {
275			case '+':
276			case '-':
277			case '*':
278			case '/':
279				lvNext = malloc(sizeof(struct LexVector));
280				lvNext->next = lv->next;
281				lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
282				lv->next = lvNext;
283				lv = _lexOperator(lv->next, token);
284				state = LEX_ROOT;
285				break;
286			default:
287				state = LEX_ERROR;
288			}
289			break;
290		case LEX_ERROR:
291			// This shouldn't be reached
292			break;
293		}
294	}
295
296	switch (state) {
297	case LEX_EXPECT_BINARY:
298	case LEX_EXPECT_DECIMAL:
299	case LEX_EXPECT_HEX:
300	case LEX_EXPECT_PREFIX:
301		lv->token.type = TOKEN_UINT_TYPE;
302		lv->token.uintValue = next;
303		break;
304	case LEX_EXPECT_IDENTIFIER:
305		lv->token.type = TOKEN_IDENTIFIER_TYPE;
306		lv->token.identifierValue = strndup(tokenStart, string - tokenStart);
307		break;
308	case LEX_EXPECT_OPERATOR:
309		lvNext = malloc(sizeof(struct LexVector));
310		lvNext->next = lv->next;
311		lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
312		lv->next = lvNext;
313		break;
314	case LEX_ERROR:
315	default:
316		lv->token.type = TOKEN_ERROR_TYPE;
317		break;
318	}
319	return adjusted;
320}
321
322static const int _operatorPrecedence[] = {
323	2,
324	1,
325	1,
326	0,
327	0
328};
329
330static struct ParseTree* _parseTreeCreate() {
331	struct ParseTree* tree = malloc(sizeof(struct ParseTree));
332	tree->token.type = TOKEN_ERROR_TYPE;
333	tree->rhs = 0;
334	tree->lhs = 0;
335	return tree;
336}
337
338static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
339	struct ParseTree* newTree = 0;
340	while (lv) {
341		int newPrecedence;
342		switch (lv->token.type) {
343		case TOKEN_IDENTIFIER_TYPE:
344		case TOKEN_UINT_TYPE:
345			if (tree->token.type == TOKEN_ERROR_TYPE) {
346				tree->token = lv->token;
347				lv = lv->next;
348			} else {
349				tree->token.type = TOKEN_ERROR_TYPE;
350				return 0;
351			}
352			break;
353		case TOKEN_OPEN_PAREN_TYPE:
354			lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
355			break;
356		case TOKEN_CLOSE_PAREN_TYPE:
357			if (openParens <= 0) {
358				tree->token.type = TOKEN_ERROR_TYPE;
359				return 0;
360			}
361			return lv->next;
362			break;
363		case TOKEN_OPERATOR_TYPE:
364			newPrecedence = _operatorPrecedence[lv->token.operatorValue];
365			if (newPrecedence < precedence) {
366				newTree = _parseTreeCreate();
367				*newTree = *tree;
368				tree->lhs = newTree;
369				tree->rhs = _parseTreeCreate();
370				tree->token = lv->token;
371				lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
372				if (tree->token.type == TOKEN_ERROR_TYPE) {
373					tree->token.type = TOKEN_ERROR_TYPE;
374				}
375			} else {
376				return lv;
377			}
378			break;
379		case TOKEN_ERROR_TYPE:
380			tree->token.type = TOKEN_ERROR_TYPE;
381			return 0;
382		}
383	}
384
385	return 0;
386}
387
388void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
389	if (!tree) {
390		return;
391	}
392
393	tree->token.type = TOKEN_ERROR_TYPE;
394	tree->lhs = 0;
395	tree->rhs = 0;
396
397	_parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
398}
399
400void lexFree(struct LexVector* lv) {
401	while (lv) {
402		struct LexVector* lvNext = lv->next;
403		free(lv);
404		lv = lvNext;
405	}
406}
407
408void parseFree(struct ParseTree* tree) {
409	if (!tree) {
410		return;
411	}
412
413	parseFree(tree->lhs);
414	parseFree(tree->rhs);
415
416	if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
417		free(tree->token.identifierValue);
418	}
419	free(tree);
420}