all repos — mgba @ 94c077703d9b56680f6bcaede01734b6a0ed1760

mGBA Game Boy Advance Emulator

src/debugger/parser.c (view raw)

  1#include "parser.h"
  2
  3static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
  4	struct LexVector* lvNext = malloc(sizeof(struct LexVector));
  5	lvNext->token.type = TOKEN_OPERATOR_TYPE;
  6	switch (operator) {
  7	case '+':
  8		lvNext->token.operatorValue = OP_ADD;
  9		break;
 10	case '-':
 11		lvNext->token.operatorValue = OP_SUBTRACT;
 12		break;
 13	case '*':
 14		lvNext->token.operatorValue = OP_MULTIPLY;
 15		break;
 16	case '/':
 17		lvNext->token.operatorValue = OP_DIVIDE;
 18		break;
 19	default:
 20		lvNext->token.type = TOKEN_ERROR_TYPE;
 21		break;
 22	}
 23	lvNext->next = lv->next;
 24	lv->next = lvNext;
 25	lv = lvNext;
 26	lvNext = malloc(sizeof(struct LexVector));
 27	lvNext->next = lv->next;
 28	lvNext->token.type = TOKEN_ERROR_TYPE;
 29	lv->next = lvNext;
 30	return lvNext;
 31}
 32
 33size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
 34	if (!string || length < 1) {
 35		return 0;
 36	}
 37
 38	uint32_t next = 0;
 39	size_t adjusted = 0;
 40
 41	enum LexState state = LEX_ROOT;
 42	const char* tokenStart = 0;
 43	struct LexVector* lvNext;
 44
 45	while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
 46		char token = string[0];
 47		++string;
 48		++adjusted;
 49		--length;
 50		switch (state) {
 51		case LEX_ROOT:
 52			tokenStart = string - 1;
 53			switch (token) {
 54			case '1':
 55			case '2':
 56			case '3':
 57			case '4':
 58			case '5':
 59			case '6':
 60			case '7':
 61			case '8':
 62			case '9':
 63				state = LEX_EXPECT_DECIMAL;
 64				next = token - '0';
 65				break;
 66			case '0':
 67				state = LEX_EXPECT_PREFIX;
 68				break;
 69			case '$':
 70				state = LEX_EXPECT_HEX;
 71				next = 0;
 72				break;
 73			case '(':
 74				state = LEX_ROOT;
 75				lv->token.type = TOKEN_OPEN_PAREN_TYPE;
 76				lvNext = malloc(sizeof(struct LexVector));
 77				lvNext->next = lv->next;
 78				lvNext->token.type = TOKEN_ERROR_TYPE;
 79				lv->next = lvNext;
 80				lv = lvNext;
 81				break;
 82			default:
 83				if (tolower(token) >= 'a' && tolower(token <= 'z')) {
 84					state = LEX_EXPECT_IDENTIFIER;
 85				} else {
 86					state = LEX_ERROR;
 87				}
 88				break;
 89			};
 90			break;
 91		case LEX_EXPECT_IDENTIFIER:
 92			switch (token) {
 93			case '+':
 94			case '-':
 95			case '*':
 96			case '/':
 97				lv->token.type = TOKEN_IDENTIFIER_TYPE;
 98				lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
 99				lv = _lexOperator(lv, token);
100				state = LEX_ROOT;
101				break;
102			case ')':
103				lv->token.type = TOKEN_IDENTIFIER_TYPE;
104				lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
105				state = LEX_EXPECT_OPERATOR;
106				break;
107			default:
108				break;
109			}
110			break;
111		case LEX_EXPECT_DECIMAL:
112			switch (token) {
113			case '0':
114			case '1':
115			case '2':
116			case '3':
117			case '4':
118			case '5':
119			case '6':
120			case '7':
121			case '8':
122			case '9':
123				// TODO: handle overflow
124				next *= 10;
125				next += token - '0';
126				break;
127			case '+':
128			case '-':
129			case '*':
130			case '/':
131				lv->token.type = TOKEN_UINT_TYPE;
132				lv->token.uintValue = next;
133				lv = _lexOperator(lv, token);
134				state = LEX_ROOT;
135				break;
136			case ')':
137				lv->token.type = TOKEN_UINT_TYPE;
138				lv->token.uintValue = next;
139				state = LEX_EXPECT_OPERATOR;
140				break;
141			default:
142				state = LEX_ERROR;
143			}
144			break;
145		case LEX_EXPECT_HEX:
146			switch (token) {
147			case '0':
148			case '1':
149			case '2':
150			case '3':
151			case '4':
152			case '5':
153			case '6':
154			case '7':
155			case '8':
156			case '9':
157				// TODO: handle overflow
158				next *= 16;
159				next += token - '0';
160				break;
161			case 'A':
162			case 'B':
163			case 'C':
164			case 'D':
165			case 'E':
166			case 'F':
167				// TODO: handle overflow
168				next *= 16;
169				next += token - 'A' + 10;
170				break;
171			case 'a':
172			case 'b':
173			case 'c':
174			case 'd':
175			case 'e':
176			case 'f':
177				// TODO: handle overflow
178				next *= 16;
179				next += token - 'a' + 10;
180				break;
181			case '+':
182			case '-':
183			case '*':
184			case '/':
185				lv->token.type = TOKEN_UINT_TYPE;
186				lv->token.uintValue = next;
187				lv = _lexOperator(lv, token);
188				state = LEX_ROOT;
189				break;
190			case ')':
191				lv->token.type = TOKEN_UINT_TYPE;
192				lv->token.uintValue = next;
193				state = LEX_EXPECT_OPERATOR;
194				break;
195			default:
196				state = LEX_ERROR;
197				break;
198			}
199			break;
200		case LEX_EXPECT_PREFIX:
201			switch (token) {
202			case 'X':
203			case 'x':
204				next = 0;
205				state = LEX_EXPECT_HEX;
206				break;
207			default:
208				state = LEX_ERROR;
209				break;
210			}
211			break;
212		case LEX_EXPECT_OPERATOR:
213			switch (token) {
214			case '+':
215			case '-':
216			case '*':
217			case '/':
218				lvNext = malloc(sizeof(struct LexVector));
219				lvNext->next = lv->next;
220				lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
221				lv->next = lvNext;
222				lv = _lexOperator(lv->next, token);
223				state = LEX_ROOT;
224				break;
225			default:
226				state = LEX_ERROR;
227			}
228			break;
229		case LEX_ERROR:
230			// This shouldn't be reached
231			break;
232		}
233	}
234
235	switch (state) {
236	case LEX_EXPECT_DECIMAL:
237	case LEX_EXPECT_HEX:
238		lv->token.type = TOKEN_UINT_TYPE;
239		lv->token.uintValue = next;
240		break;
241	case LEX_EXPECT_IDENTIFIER:
242		lv->token.type = TOKEN_IDENTIFIER_TYPE;
243		lv->token.identifierValue = strndup(tokenStart, string - tokenStart);
244		break;
245	case LEX_EXPECT_OPERATOR:
246		lvNext = malloc(sizeof(struct LexVector));
247		lvNext->next = lv->next;
248		lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
249		lv->next = lvNext;
250		break;
251	case LEX_ERROR:
252	default:
253		lv->token.type = TOKEN_ERROR_TYPE;
254		break;
255	}
256	return adjusted;
257}
258
259static const int _operatorPrecedence[] = {
260	2,
261	1,
262	1,
263	0,
264	0
265};
266
267static struct ParseTree* _parseTreeCreate() {
268	struct ParseTree* tree = malloc(sizeof(struct ParseTree));
269	tree->token.type = TOKEN_ERROR_TYPE;
270	tree->rhs = 0;
271	tree->lhs = 0;
272	return tree;
273}
274
275static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
276	struct ParseTree* newTree = 0;
277	while (lv) {
278		int newPrecedence;
279		switch (lv->token.type) {
280		case TOKEN_IDENTIFIER_TYPE:
281		case TOKEN_UINT_TYPE:
282			if (tree->token.type == TOKEN_ERROR_TYPE) {
283				tree->token = lv->token;
284				lv = lv->next;
285			} else {
286				tree->token.type = TOKEN_ERROR_TYPE;
287				return 0;
288			}
289			break;
290		case TOKEN_OPEN_PAREN_TYPE:
291			lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
292			break;
293		case TOKEN_CLOSE_PAREN_TYPE:
294			if (openParens <= 0) {
295				tree->token.type = TOKEN_ERROR_TYPE;
296				return 0;
297			}
298			return lv->next;
299			break;
300		case TOKEN_OPERATOR_TYPE:
301			newPrecedence = _operatorPrecedence[lv->token.operatorValue];
302			if (newPrecedence < precedence) {
303				newTree = _parseTreeCreate();
304				*newTree = *tree;
305				tree->lhs = newTree;
306				tree->rhs = _parseTreeCreate();
307				tree->token = lv->token;
308				lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
309				if (tree->token.type == TOKEN_ERROR_TYPE) {
310					tree->token.type = TOKEN_ERROR_TYPE;
311				}
312			} else {
313				return lv;
314			}
315			break;
316		case TOKEN_ERROR_TYPE:
317			tree->token.type = TOKEN_ERROR_TYPE;
318			return 0;
319		}
320	}
321
322	return 0;
323}
324
325void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
326	if (!tree) {
327		return;
328	}
329
330	tree->token.type = TOKEN_ERROR_TYPE;
331	tree->lhs = 0;
332	tree->rhs = 0;
333
334	_parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
335}
336
337void lexFree(struct LexVector* lv) {
338	while (lv) {
339		struct LexVector* lvNext = lv->next;
340		free(lv);
341		lv = lvNext;
342	}
343}
344
345void parseFree(struct ParseTree* tree) {
346	if (!tree) {
347		return;
348	}
349
350	parseFree(tree->lhs);
351	parseFree(tree->rhs);
352
353	if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
354		free(tree->token.identifierValue);
355	}
356	free(tree);
357}