all repos — mgba @ fd1128f90a17151fd6e3a673332045cf534a3d21

mGBA Game Boy Advance Emulator

src/debugger/parser.c (view raw)

  1#include "parser.h"
  2
  3static inline char* _strndup(const char* start, size_t len) {
  4#ifdef HAVE_STRNDUP
  5	return strndup(start, len);
  6#else
  7	// This is suboptimal, but anything recent should have strndup
  8	char* out = malloc((len + 1) * sizeof(char));
  9	strncpy(out, start, len);
 10	out[len] = '\0';
 11	return out;
 12#endif
 13}
 14
 15static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
 16	struct LexVector* lvNext = malloc(sizeof(struct LexVector));
 17	lvNext->token.type = TOKEN_OPERATOR_TYPE;
 18	switch (operator) {
 19	case '+':
 20		lvNext->token.operatorValue = OP_ADD;
 21		break;
 22	case '-':
 23		lvNext->token.operatorValue = OP_SUBTRACT;
 24		break;
 25	case '*':
 26		lvNext->token.operatorValue = OP_MULTIPLY;
 27		break;
 28	case '/':
 29		lvNext->token.operatorValue = OP_DIVIDE;
 30		break;
 31	default:
 32		lvNext->token.type = TOKEN_ERROR_TYPE;
 33		break;
 34	}
 35	lvNext->next = lv->next;
 36	lv->next = lvNext;
 37	lv = lvNext;
 38	lvNext = malloc(sizeof(struct LexVector));
 39	lvNext->next = lv->next;
 40	lvNext->token.type = TOKEN_ERROR_TYPE;
 41	lv->next = lvNext;
 42	return lvNext;
 43}
 44
 45size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
 46	if (!string || length < 1) {
 47		return 0;
 48	}
 49
 50	uint32_t next = 0;
 51	size_t adjusted = 0;
 52
 53	enum LexState state = LEX_ROOT;
 54	const char* tokenStart = 0;
 55	struct LexVector* lvNext;
 56
 57	while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
 58		char token = string[0];
 59		++string;
 60		++adjusted;
 61		--length;
 62		switch (state) {
 63		case LEX_ROOT:
 64			tokenStart = string - 1;
 65			switch (token) {
 66			case '1':
 67			case '2':
 68			case '3':
 69			case '4':
 70			case '5':
 71			case '6':
 72			case '7':
 73			case '8':
 74			case '9':
 75				state = LEX_EXPECT_DECIMAL;
 76				next = token - '0';
 77				break;
 78			case '0':
 79				state = LEX_EXPECT_PREFIX;
 80				next = 0;
 81				break;
 82			case '$':
 83				state = LEX_EXPECT_HEX;
 84				next = 0;
 85				break;
 86			case '(':
 87				state = LEX_ROOT;
 88				lv->token.type = TOKEN_OPEN_PAREN_TYPE;
 89				lvNext = malloc(sizeof(struct LexVector));
 90				lvNext->next = lv->next;
 91				lvNext->token.type = TOKEN_ERROR_TYPE;
 92				lv->next = lvNext;
 93				lv = lvNext;
 94				break;
 95			default:
 96				if (tolower(token) >= 'a' && tolower(token <= 'z')) {
 97					state = LEX_EXPECT_IDENTIFIER;
 98				} else {
 99					state = LEX_ERROR;
100				}
101				break;
102			};
103			break;
104		case LEX_EXPECT_IDENTIFIER:
105			switch (token) {
106			case '+':
107			case '-':
108			case '*':
109			case '/':
110				lv->token.type = TOKEN_IDENTIFIER_TYPE;
111				lv->token.identifierValue = _strndup(tokenStart, string - tokenStart - 1);
112				lv = _lexOperator(lv, token);
113				state = LEX_ROOT;
114				break;
115			case ')':
116				lv->token.type = TOKEN_IDENTIFIER_TYPE;
117				lv->token.identifierValue = _strndup(tokenStart, string - tokenStart - 1);
118				state = LEX_EXPECT_OPERATOR;
119				break;
120			default:
121				break;
122			}
123			break;
124		case LEX_EXPECT_DECIMAL:
125			switch (token) {
126			case '0':
127			case '1':
128			case '2':
129			case '3':
130			case '4':
131			case '5':
132			case '6':
133			case '7':
134			case '8':
135			case '9':
136				// TODO: handle overflow
137				next *= 10;
138				next += token - '0';
139				break;
140			case '+':
141			case '-':
142			case '*':
143			case '/':
144				lv->token.type = TOKEN_UINT_TYPE;
145				lv->token.uintValue = next;
146				lv = _lexOperator(lv, token);
147				state = LEX_ROOT;
148				break;
149			case ')':
150				lv->token.type = TOKEN_UINT_TYPE;
151				lv->token.uintValue = next;
152				state = LEX_EXPECT_OPERATOR;
153				break;
154			default:
155				state = LEX_ERROR;
156			}
157			break;
158		case LEX_EXPECT_HEX:
159			switch (token) {
160			case '0':
161			case '1':
162			case '2':
163			case '3':
164			case '4':
165			case '5':
166			case '6':
167			case '7':
168			case '8':
169			case '9':
170				// TODO: handle overflow
171				next *= 16;
172				next += token - '0';
173				break;
174			case 'A':
175			case 'B':
176			case 'C':
177			case 'D':
178			case 'E':
179			case 'F':
180				// TODO: handle overflow
181				next *= 16;
182				next += token - 'A' + 10;
183				break;
184			case 'a':
185			case 'b':
186			case 'c':
187			case 'd':
188			case 'e':
189			case 'f':
190				// TODO: handle overflow
191				next *= 16;
192				next += token - 'a' + 10;
193				break;
194			case '+':
195			case '-':
196			case '*':
197			case '/':
198				lv->token.type = TOKEN_UINT_TYPE;
199				lv->token.uintValue = next;
200				lv = _lexOperator(lv, token);
201				state = LEX_ROOT;
202				break;
203			case ')':
204				lv->token.type = TOKEN_UINT_TYPE;
205				lv->token.uintValue = next;
206				state = LEX_EXPECT_OPERATOR;
207				break;
208			default:
209				state = LEX_ERROR;
210				break;
211			}
212			break;
213		case LEX_EXPECT_PREFIX:
214			switch (token) {
215			case 'X':
216			case 'x':
217				next = 0;
218				state = LEX_EXPECT_HEX;
219				break;
220			case '+':
221			case '-':
222			case '*':
223			case '/':
224				lv->token.type = TOKEN_UINT_TYPE;
225				lv->token.uintValue = next;
226				lv = _lexOperator(lv, token);
227				state = LEX_ROOT;
228				break;
229			case ')':
230				lv->token.type = TOKEN_UINT_TYPE;
231				lv->token.uintValue = next;
232				state = LEX_EXPECT_OPERATOR;
233				break;
234			}
235			break;
236		case LEX_EXPECT_OPERATOR:
237			switch (token) {
238			case '+':
239			case '-':
240			case '*':
241			case '/':
242				lvNext = malloc(sizeof(struct LexVector));
243				lvNext->next = lv->next;
244				lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
245				lv->next = lvNext;
246				lv = _lexOperator(lv->next, token);
247				state = LEX_ROOT;
248				break;
249			default:
250				state = LEX_ERROR;
251			}
252			break;
253		case LEX_ERROR:
254			// This shouldn't be reached
255			break;
256		}
257	}
258
259	switch (state) {
260	case LEX_EXPECT_DECIMAL:
261	case LEX_EXPECT_HEX:
262	case LEX_EXPECT_PREFIX:
263		lv->token.type = TOKEN_UINT_TYPE;
264		lv->token.uintValue = next;
265		break;
266	case LEX_EXPECT_IDENTIFIER:
267		lv->token.type = TOKEN_IDENTIFIER_TYPE;
268		lv->token.identifierValue = _strndup(tokenStart, string - tokenStart);
269		break;
270	case LEX_EXPECT_OPERATOR:
271		lvNext = malloc(sizeof(struct LexVector));
272		lvNext->next = lv->next;
273		lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
274		lv->next = lvNext;
275		break;
276	case LEX_ERROR:
277	default:
278		lv->token.type = TOKEN_ERROR_TYPE;
279		break;
280	}
281	return adjusted;
282}
283
284static const int _operatorPrecedence[] = {
285	2,
286	1,
287	1,
288	0,
289	0
290};
291
292static struct ParseTree* _parseTreeCreate() {
293	struct ParseTree* tree = malloc(sizeof(struct ParseTree));
294	tree->token.type = TOKEN_ERROR_TYPE;
295	tree->rhs = 0;
296	tree->lhs = 0;
297	return tree;
298}
299
300static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
301	struct ParseTree* newTree = 0;
302	while (lv) {
303		int newPrecedence;
304		switch (lv->token.type) {
305		case TOKEN_IDENTIFIER_TYPE:
306		case TOKEN_UINT_TYPE:
307			if (tree->token.type == TOKEN_ERROR_TYPE) {
308				tree->token = lv->token;
309				lv = lv->next;
310			} else {
311				tree->token.type = TOKEN_ERROR_TYPE;
312				return 0;
313			}
314			break;
315		case TOKEN_OPEN_PAREN_TYPE:
316			lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
317			break;
318		case TOKEN_CLOSE_PAREN_TYPE:
319			if (openParens <= 0) {
320				tree->token.type = TOKEN_ERROR_TYPE;
321				return 0;
322			}
323			return lv->next;
324			break;
325		case TOKEN_OPERATOR_TYPE:
326			newPrecedence = _operatorPrecedence[lv->token.operatorValue];
327			if (newPrecedence < precedence) {
328				newTree = _parseTreeCreate();
329				*newTree = *tree;
330				tree->lhs = newTree;
331				tree->rhs = _parseTreeCreate();
332				tree->token = lv->token;
333				lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
334				if (tree->token.type == TOKEN_ERROR_TYPE) {
335					tree->token.type = TOKEN_ERROR_TYPE;
336				}
337			} else {
338				return lv;
339			}
340			break;
341		case TOKEN_ERROR_TYPE:
342			tree->token.type = TOKEN_ERROR_TYPE;
343			return 0;
344		}
345	}
346
347	return 0;
348}
349
350void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
351	if (!tree) {
352		return;
353	}
354
355	tree->token.type = TOKEN_ERROR_TYPE;
356	tree->lhs = 0;
357	tree->rhs = 0;
358
359	_parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
360}
361
362void lexFree(struct LexVector* lv) {
363	while (lv) {
364		struct LexVector* lvNext = lv->next;
365		free(lv);
366		lv = lvNext;
367	}
368}
369
370void parseFree(struct ParseTree* tree) {
371	if (!tree) {
372		return;
373	}
374
375	parseFree(tree->lhs);
376	parseFree(tree->rhs);
377
378	if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
379		free(tree->token.identifierValue);
380	}
381	free(tree);
382}