all repos — mgba @ fd5b95024eaf0564ac8fb550763af6bb76c5dfa6

mGBA Game Boy Advance Emulator

src/debugger/parser.c (view raw)

  1#include "parser.h"
  2
  3static inline char* _strndup(const char* start, size_t len) {
  4#ifdef HAVE_STRNDUP
  5	return strndup(start, len);
  6#else
  7	// This is suboptimal, but anything recent should have strndup
  8	char* out = malloc((len + 1) * sizeof(char));
  9	strncpy(out, start, len);
 10	out[len] = '\0';
 11	return out;
 12#endif
 13}
 14
 15static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
 16	struct LexVector* lvNext = malloc(sizeof(struct LexVector));
 17	lvNext->token.type = TOKEN_OPERATOR_TYPE;
 18	switch (operator) {
 19	case '+':
 20		lvNext->token.operatorValue = OP_ADD;
 21		break;
 22	case '-':
 23		lvNext->token.operatorValue = OP_SUBTRACT;
 24		break;
 25	case '*':
 26		lvNext->token.operatorValue = OP_MULTIPLY;
 27		break;
 28	case '/':
 29		lvNext->token.operatorValue = OP_DIVIDE;
 30		break;
 31	default:
 32		lvNext->token.type = TOKEN_ERROR_TYPE;
 33		break;
 34	}
 35	lvNext->next = lv->next;
 36	lv->next = lvNext;
 37	lv = lvNext;
 38	lvNext = malloc(sizeof(struct LexVector));
 39	lvNext->next = lv->next;
 40	lvNext->token.type = TOKEN_ERROR_TYPE;
 41	lv->next = lvNext;
 42	return lvNext;
 43}
 44
 45size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
 46	if (!string || length < 1) {
 47		return 0;
 48	}
 49
 50	uint32_t next = 0;
 51	size_t adjusted = 0;
 52
 53	enum LexState state = LEX_ROOT;
 54	const char* tokenStart = 0;
 55	struct LexVector* lvNext;
 56
 57	while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
 58		char token = string[0];
 59		++string;
 60		++adjusted;
 61		--length;
 62		switch (state) {
 63		case LEX_ROOT:
 64			tokenStart = string - 1;
 65			switch (token) {
 66			case '1':
 67			case '2':
 68			case '3':
 69			case '4':
 70			case '5':
 71			case '6':
 72			case '7':
 73			case '8':
 74			case '9':
 75				state = LEX_EXPECT_DECIMAL;
 76				next = token - '0';
 77				break;
 78			case '0':
 79				state = LEX_EXPECT_PREFIX;
 80				break;
 81			case '$':
 82				state = LEX_EXPECT_HEX;
 83				next = 0;
 84				break;
 85			case '(':
 86				state = LEX_ROOT;
 87				lv->token.type = TOKEN_OPEN_PAREN_TYPE;
 88				lvNext = malloc(sizeof(struct LexVector));
 89				lvNext->next = lv->next;
 90				lvNext->token.type = TOKEN_ERROR_TYPE;
 91				lv->next = lvNext;
 92				lv = lvNext;
 93				break;
 94			default:
 95				if (tolower(token) >= 'a' && tolower(token <= 'z')) {
 96					state = LEX_EXPECT_IDENTIFIER;
 97				} else {
 98					state = LEX_ERROR;
 99				}
100				break;
101			};
102			break;
103		case LEX_EXPECT_IDENTIFIER:
104			switch (token) {
105			case '+':
106			case '-':
107			case '*':
108			case '/':
109				lv->token.type = TOKEN_IDENTIFIER_TYPE;
110				lv->token.identifierValue = _strndup(tokenStart, string - tokenStart - 1);
111				lv = _lexOperator(lv, token);
112				state = LEX_ROOT;
113				break;
114			case ')':
115				lv->token.type = TOKEN_IDENTIFIER_TYPE;
116				lv->token.identifierValue = _strndup(tokenStart, string - tokenStart - 1);
117				state = LEX_EXPECT_OPERATOR;
118				break;
119			default:
120				break;
121			}
122			break;
123		case LEX_EXPECT_DECIMAL:
124			switch (token) {
125			case '0':
126			case '1':
127			case '2':
128			case '3':
129			case '4':
130			case '5':
131			case '6':
132			case '7':
133			case '8':
134			case '9':
135				// TODO: handle overflow
136				next *= 10;
137				next += token - '0';
138				break;
139			case '+':
140			case '-':
141			case '*':
142			case '/':
143				lv->token.type = TOKEN_UINT_TYPE;
144				lv->token.uintValue = next;
145				lv = _lexOperator(lv, token);
146				state = LEX_ROOT;
147				break;
148			case ')':
149				lv->token.type = TOKEN_UINT_TYPE;
150				lv->token.uintValue = next;
151				state = LEX_EXPECT_OPERATOR;
152				break;
153			default:
154				state = LEX_ERROR;
155			}
156			break;
157		case LEX_EXPECT_HEX:
158			switch (token) {
159			case '0':
160			case '1':
161			case '2':
162			case '3':
163			case '4':
164			case '5':
165			case '6':
166			case '7':
167			case '8':
168			case '9':
169				// TODO: handle overflow
170				next *= 16;
171				next += token - '0';
172				break;
173			case 'A':
174			case 'B':
175			case 'C':
176			case 'D':
177			case 'E':
178			case 'F':
179				// TODO: handle overflow
180				next *= 16;
181				next += token - 'A' + 10;
182				break;
183			case 'a':
184			case 'b':
185			case 'c':
186			case 'd':
187			case 'e':
188			case 'f':
189				// TODO: handle overflow
190				next *= 16;
191				next += token - 'a' + 10;
192				break;
193			case '+':
194			case '-':
195			case '*':
196			case '/':
197				lv->token.type = TOKEN_UINT_TYPE;
198				lv->token.uintValue = next;
199				lv = _lexOperator(lv, token);
200				state = LEX_ROOT;
201				break;
202			case ')':
203				lv->token.type = TOKEN_UINT_TYPE;
204				lv->token.uintValue = next;
205				state = LEX_EXPECT_OPERATOR;
206				break;
207			default:
208				state = LEX_ERROR;
209				break;
210			}
211			break;
212		case LEX_EXPECT_PREFIX:
213			switch (token) {
214			case 'X':
215			case 'x':
216				next = 0;
217				state = LEX_EXPECT_HEX;
218				break;
219			default:
220				state = LEX_ERROR;
221				break;
222			}
223			break;
224		case LEX_EXPECT_OPERATOR:
225			switch (token) {
226			case '+':
227			case '-':
228			case '*':
229			case '/':
230				lvNext = malloc(sizeof(struct LexVector));
231				lvNext->next = lv->next;
232				lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
233				lv->next = lvNext;
234				lv = _lexOperator(lv->next, token);
235				state = LEX_ROOT;
236				break;
237			default:
238				state = LEX_ERROR;
239			}
240			break;
241		case LEX_ERROR:
242			// This shouldn't be reached
243			break;
244		}
245	}
246
247	switch (state) {
248	case LEX_EXPECT_DECIMAL:
249	case LEX_EXPECT_HEX:
250		lv->token.type = TOKEN_UINT_TYPE;
251		lv->token.uintValue = next;
252		break;
253	case LEX_EXPECT_IDENTIFIER:
254		lv->token.type = TOKEN_IDENTIFIER_TYPE;
255		lv->token.identifierValue = _strndup(tokenStart, string - tokenStart);
256		break;
257	case LEX_EXPECT_OPERATOR:
258		lvNext = malloc(sizeof(struct LexVector));
259		lvNext->next = lv->next;
260		lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
261		lv->next = lvNext;
262		break;
263	case LEX_ERROR:
264	default:
265		lv->token.type = TOKEN_ERROR_TYPE;
266		break;
267	}
268	return adjusted;
269}
270
271static const int _operatorPrecedence[] = {
272	2,
273	1,
274	1,
275	0,
276	0
277};
278
279static struct ParseTree* _parseTreeCreate() {
280	struct ParseTree* tree = malloc(sizeof(struct ParseTree));
281	tree->token.type = TOKEN_ERROR_TYPE;
282	tree->rhs = 0;
283	tree->lhs = 0;
284	return tree;
285}
286
287static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
288	struct ParseTree* newTree = 0;
289	while (lv) {
290		int newPrecedence;
291		switch (lv->token.type) {
292		case TOKEN_IDENTIFIER_TYPE:
293		case TOKEN_UINT_TYPE:
294			if (tree->token.type == TOKEN_ERROR_TYPE) {
295				tree->token = lv->token;
296				lv = lv->next;
297			} else {
298				tree->token.type = TOKEN_ERROR_TYPE;
299				return 0;
300			}
301			break;
302		case TOKEN_OPEN_PAREN_TYPE:
303			lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
304			break;
305		case TOKEN_CLOSE_PAREN_TYPE:
306			if (openParens <= 0) {
307				tree->token.type = TOKEN_ERROR_TYPE;
308				return 0;
309			}
310			return lv->next;
311			break;
312		case TOKEN_OPERATOR_TYPE:
313			newPrecedence = _operatorPrecedence[lv->token.operatorValue];
314			if (newPrecedence < precedence) {
315				newTree = _parseTreeCreate();
316				*newTree = *tree;
317				tree->lhs = newTree;
318				tree->rhs = _parseTreeCreate();
319				tree->token = lv->token;
320				lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
321				if (tree->token.type == TOKEN_ERROR_TYPE) {
322					tree->token.type = TOKEN_ERROR_TYPE;
323				}
324			} else {
325				return lv;
326			}
327			break;
328		case TOKEN_ERROR_TYPE:
329			tree->token.type = TOKEN_ERROR_TYPE;
330			return 0;
331		}
332	}
333
334	return 0;
335}
336
337void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
338	if (!tree) {
339		return;
340	}
341
342	tree->token.type = TOKEN_ERROR_TYPE;
343	tree->lhs = 0;
344	tree->rhs = 0;
345
346	_parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
347}
348
349void lexFree(struct LexVector* lv) {
350	while (lv) {
351		struct LexVector* lvNext = lv->next;
352		free(lv);
353		lv = lvNext;
354	}
355}
356
357void parseFree(struct ParseTree* tree) {
358	if (!tree) {
359		return;
360	}
361
362	parseFree(tree->lhs);
363	parseFree(tree->rhs);
364
365	if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
366		free(tree->token.identifierValue);
367	}
368	free(tree);
369}