all repos — mgba @ 51ad9d37e1cbba22d81d9bcddfe9ad8d7c6de1c2

mGBA Game Boy Advance Emulator

src/debugger/parser.c (view raw)

  1#include "parser.h"
  2
  3static inline char* _strndup(const char* start, size_t len) {
  4#ifdef HAVE_STRNDUP
  5	return strndup(start, len);
  6#else
  7	// This is suboptimal, but anything recent should have strndup
  8	char* out = malloc((len + 1) * sizeof(char));
  9	strncpy(out, start, len);
 10	out[len] = '\0';
 11	return out;
 12#endif
 13}
 14
 15static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
 16	struct LexVector* lvNext = malloc(sizeof(struct LexVector));
 17	lvNext->token.type = TOKEN_OPERATOR_TYPE;
 18	switch (operator) {
 19	case '+':
 20		lvNext->token.operatorValue = OP_ADD;
 21		break;
 22	case '-':
 23		lvNext->token.operatorValue = OP_SUBTRACT;
 24		break;
 25	case '*':
 26		lvNext->token.operatorValue = OP_MULTIPLY;
 27		break;
 28	case '/':
 29		lvNext->token.operatorValue = OP_DIVIDE;
 30		break;
 31	default:
 32		lvNext->token.type = TOKEN_ERROR_TYPE;
 33		break;
 34	}
 35	lvNext->next = lv->next;
 36	lv->next = lvNext;
 37	lv = lvNext;
 38	lvNext = malloc(sizeof(struct LexVector));
 39	lvNext->next = lv->next;
 40	lvNext->token.type = TOKEN_ERROR_TYPE;
 41	lv->next = lvNext;
 42	return lvNext;
 43}
 44
 45size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
 46	if (!string || length < 1) {
 47		return 0;
 48	}
 49
 50	uint32_t next = 0;
 51	size_t adjusted = 0;
 52
 53	enum LexState state = LEX_ROOT;
 54	const char* tokenStart = 0;
 55	struct LexVector* lvNext;
 56
 57	while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
 58		char token = string[0];
 59		++string;
 60		++adjusted;
 61		--length;
 62		switch (state) {
 63		case LEX_ROOT:
 64			tokenStart = string - 1;
 65			switch (token) {
 66			case '1':
 67			case '2':
 68			case '3':
 69			case '4':
 70			case '5':
 71			case '6':
 72			case '7':
 73			case '8':
 74			case '9':
 75				state = LEX_EXPECT_DECIMAL;
 76				next = token - '0';
 77				break;
 78			case '0':
 79				state = LEX_EXPECT_PREFIX;
 80				next = 0;
 81				break;
 82			case '$':
 83				state = LEX_EXPECT_HEX;
 84				next = 0;
 85				break;
 86			case '(':
 87				state = LEX_ROOT;
 88				lv->token.type = TOKEN_OPEN_PAREN_TYPE;
 89				lvNext = malloc(sizeof(struct LexVector));
 90				lvNext->next = lv->next;
 91				lvNext->token.type = TOKEN_ERROR_TYPE;
 92				lv->next = lvNext;
 93				lv = lvNext;
 94				break;
 95			default:
 96				if (tolower(token) >= 'a' && tolower(token <= 'z')) {
 97					state = LEX_EXPECT_IDENTIFIER;
 98				} else {
 99					state = LEX_ERROR;
100				}
101				break;
102			};
103			break;
104		case LEX_EXPECT_IDENTIFIER:
105			switch (token) {
106			case '+':
107			case '-':
108			case '*':
109			case '/':
110				lv->token.type = TOKEN_IDENTIFIER_TYPE;
111				lv->token.identifierValue = _strndup(tokenStart, string - tokenStart - 1);
112				lv = _lexOperator(lv, token);
113				state = LEX_ROOT;
114				break;
115			case ')':
116				lv->token.type = TOKEN_IDENTIFIER_TYPE;
117				lv->token.identifierValue = _strndup(tokenStart, string - tokenStart - 1);
118				state = LEX_EXPECT_OPERATOR;
119				break;
120			default:
121				break;
122			}
123			break;
124		case LEX_EXPECT_BINARY:
125			switch (token) {
126				case '0':
127				case '1':
128				// TODO: handle overflow
129				next <<= 1;
130				next += token - '0';
131				break;
132			case '+':
133			case '-':
134			case '*':
135			case '/':
136				lv->token.type = TOKEN_UINT_TYPE;
137				lv->token.uintValue = next;
138				lv = _lexOperator(lv, token);
139				state = LEX_ROOT;
140				break;
141			case ')':
142				lv->token.type = TOKEN_UINT_TYPE;
143				lv->token.uintValue = next;
144				state = LEX_EXPECT_OPERATOR;
145				break;
146			default:
147				state = LEX_ERROR;
148				break;
149			}
150			break;
151		case LEX_EXPECT_DECIMAL:
152			switch (token) {
153			case '0':
154			case '1':
155			case '2':
156			case '3':
157			case '4':
158			case '5':
159			case '6':
160			case '7':
161			case '8':
162			case '9':
163				// TODO: handle overflow
164				next *= 10;
165				next += token - '0';
166				break;
167			case '+':
168			case '-':
169			case '*':
170			case '/':
171				lv->token.type = TOKEN_UINT_TYPE;
172				lv->token.uintValue = next;
173				lv = _lexOperator(lv, token);
174				state = LEX_ROOT;
175				break;
176			case ')':
177				lv->token.type = TOKEN_UINT_TYPE;
178				lv->token.uintValue = next;
179				state = LEX_EXPECT_OPERATOR;
180				break;
181			default:
182				state = LEX_ERROR;
183			}
184			break;
185		case LEX_EXPECT_HEX:
186			switch (token) {
187			case '0':
188			case '1':
189			case '2':
190			case '3':
191			case '4':
192			case '5':
193			case '6':
194			case '7':
195			case '8':
196			case '9':
197				// TODO: handle overflow
198				next *= 16;
199				next += token - '0';
200				break;
201			case 'A':
202			case 'B':
203			case 'C':
204			case 'D':
205			case 'E':
206			case 'F':
207				// TODO: handle overflow
208				next *= 16;
209				next += token - 'A' + 10;
210				break;
211			case 'a':
212			case 'b':
213			case 'c':
214			case 'd':
215			case 'e':
216			case 'f':
217				// TODO: handle overflow
218				next *= 16;
219				next += token - 'a' + 10;
220				break;
221			case '+':
222			case '-':
223			case '*':
224			case '/':
225				lv->token.type = TOKEN_UINT_TYPE;
226				lv->token.uintValue = next;
227				lv = _lexOperator(lv, token);
228				state = LEX_ROOT;
229				break;
230			case ')':
231				lv->token.type = TOKEN_UINT_TYPE;
232				lv->token.uintValue = next;
233				state = LEX_EXPECT_OPERATOR;
234				break;
235			default:
236				state = LEX_ERROR;
237				break;
238			}
239			break;
240		case LEX_EXPECT_PREFIX:
241			switch (token) {
242			case 'X':
243			case 'x':
244				next = 0;
245				state = LEX_EXPECT_HEX;
246				break;
247			case 'B':
248			case 'b':
249				next = 0;
250				state = LEX_EXPECT_BINARY;
251				break;
252			case '+':
253			case '-':
254			case '*':
255			case '/':
256				lv->token.type = TOKEN_UINT_TYPE;
257				lv->token.uintValue = next;
258				lv = _lexOperator(lv, token);
259				state = LEX_ROOT;
260				break;
261			case ')':
262				lv->token.type = TOKEN_UINT_TYPE;
263				lv->token.uintValue = next;
264				state = LEX_EXPECT_OPERATOR;
265				break;
266			}
267			break;
268		case LEX_EXPECT_OPERATOR:
269			switch (token) {
270			case '+':
271			case '-':
272			case '*':
273			case '/':
274				lvNext = malloc(sizeof(struct LexVector));
275				lvNext->next = lv->next;
276				lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
277				lv->next = lvNext;
278				lv = _lexOperator(lv->next, token);
279				state = LEX_ROOT;
280				break;
281			default:
282				state = LEX_ERROR;
283			}
284			break;
285		case LEX_ERROR:
286			// This shouldn't be reached
287			break;
288		}
289	}
290
291	switch (state) {
292	case LEX_EXPECT_BINARY:
293	case LEX_EXPECT_DECIMAL:
294	case LEX_EXPECT_HEX:
295	case LEX_EXPECT_PREFIX:
296		lv->token.type = TOKEN_UINT_TYPE;
297		lv->token.uintValue = next;
298		break;
299	case LEX_EXPECT_IDENTIFIER:
300		lv->token.type = TOKEN_IDENTIFIER_TYPE;
301		lv->token.identifierValue = _strndup(tokenStart, string - tokenStart);
302		break;
303	case LEX_EXPECT_OPERATOR:
304		lvNext = malloc(sizeof(struct LexVector));
305		lvNext->next = lv->next;
306		lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
307		lv->next = lvNext;
308		break;
309	case LEX_ERROR:
310	default:
311		lv->token.type = TOKEN_ERROR_TYPE;
312		break;
313	}
314	return adjusted;
315}
316
317static const int _operatorPrecedence[] = {
318	2,
319	1,
320	1,
321	0,
322	0
323};
324
325static struct ParseTree* _parseTreeCreate() {
326	struct ParseTree* tree = malloc(sizeof(struct ParseTree));
327	tree->token.type = TOKEN_ERROR_TYPE;
328	tree->rhs = 0;
329	tree->lhs = 0;
330	return tree;
331}
332
333static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
334	struct ParseTree* newTree = 0;
335	while (lv) {
336		int newPrecedence;
337		switch (lv->token.type) {
338		case TOKEN_IDENTIFIER_TYPE:
339		case TOKEN_UINT_TYPE:
340			if (tree->token.type == TOKEN_ERROR_TYPE) {
341				tree->token = lv->token;
342				lv = lv->next;
343			} else {
344				tree->token.type = TOKEN_ERROR_TYPE;
345				return 0;
346			}
347			break;
348		case TOKEN_OPEN_PAREN_TYPE:
349			lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
350			break;
351		case TOKEN_CLOSE_PAREN_TYPE:
352			if (openParens <= 0) {
353				tree->token.type = TOKEN_ERROR_TYPE;
354				return 0;
355			}
356			return lv->next;
357			break;
358		case TOKEN_OPERATOR_TYPE:
359			newPrecedence = _operatorPrecedence[lv->token.operatorValue];
360			if (newPrecedence < precedence) {
361				newTree = _parseTreeCreate();
362				*newTree = *tree;
363				tree->lhs = newTree;
364				tree->rhs = _parseTreeCreate();
365				tree->token = lv->token;
366				lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
367				if (tree->token.type == TOKEN_ERROR_TYPE) {
368					tree->token.type = TOKEN_ERROR_TYPE;
369				}
370			} else {
371				return lv;
372			}
373			break;
374		case TOKEN_ERROR_TYPE:
375			tree->token.type = TOKEN_ERROR_TYPE;
376			return 0;
377		}
378	}
379
380	return 0;
381}
382
383void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
384	if (!tree) {
385		return;
386	}
387
388	tree->token.type = TOKEN_ERROR_TYPE;
389	tree->lhs = 0;
390	tree->rhs = 0;
391
392	_parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
393}
394
395void lexFree(struct LexVector* lv) {
396	while (lv) {
397		struct LexVector* lvNext = lv->next;
398		free(lv);
399		lv = lvNext;
400	}
401}
402
403void parseFree(struct ParseTree* tree) {
404	if (!tree) {
405		return;
406	}
407
408	parseFree(tree->lhs);
409	parseFree(tree->rhs);
410
411	if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
412		free(tree->token.identifierValue);
413	}
414	free(tree);
415}