all repos — mgba @ 4638e4a0170795799e9adc8cf97184d969ba417b

mGBA Game Boy Advance Emulator

src/debugger/parser.c (view raw)

  1#include "parser.h"
  2
  3static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
  4	struct LexVector* lvNext = malloc(sizeof(struct LexVector));
  5	lvNext->token.type = TOKEN_OPERATOR_TYPE;
  6	switch (operator) {
  7	case '+':
  8		lvNext->token.operatorValue = OP_ADD;
  9		break;
 10	case '-':
 11		lvNext->token.operatorValue = OP_SUBTRACT;
 12		break;
 13	case '*':
 14		lvNext->token.operatorValue = OP_MULTIPLY;
 15		break;
 16	case '/':
 17		lvNext->token.operatorValue = OP_DIVIDE;
 18		break;
 19	default:
 20		lvNext->token.type = TOKEN_ERROR_TYPE;
 21		break;
 22	}
 23	lvNext->next = lv->next;
 24	lv->next = lvNext;
 25	lv = lvNext;
 26	lvNext = malloc(sizeof(struct LexVector));
 27	lvNext->next = lv->next;
 28	lvNext->token.type = TOKEN_ERROR_TYPE;
 29	lv->next = lvNext;
 30	return lvNext;
 31}
 32
 33size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
 34	if (!string || length < 1) {
 35		return 0;
 36	}
 37
 38	uint32_t next = 0;
 39	size_t adjusted = 0;
 40
 41	enum LexState state = LEX_ROOT;
 42	const char* tokenStart = 0;
 43	struct LexVector* lvNext;
 44
 45	while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
 46		char token = string[0];
 47		++string;
 48		++adjusted;
 49		--length;
 50		switch (state) {
 51		case LEX_ROOT:
 52			tokenStart = string - 1;
 53			switch (token) {
 54			case '1':
 55			case '2':
 56			case '3':
 57			case '4':
 58			case '5':
 59			case '6':
 60			case '7':
 61			case '8':
 62			case '9':
 63				state = LEX_EXPECT_DECIMAL;
 64				next = token - '0';
 65				break;
 66			case '0':
 67				state = LEX_EXPECT_PREFIX;
 68				break;
 69			case '$':
 70				state = LEX_EXPECT_HEX;
 71				next = 0;
 72				break;
 73			case '(':
 74				state = LEX_ROOT;
 75				lv->token.type = TOKEN_OPEN_PAREN_TYPE;
 76				lvNext = malloc(sizeof(struct LexVector));
 77				lvNext->next = lv->next;
 78				lvNext->token.type = TOKEN_ERROR_TYPE;
 79				lv->next = lvNext;
 80				lv = lvNext;
 81				break;
 82			default:
 83				if (tolower(token) >= 'a' && tolower(token <= 'z')) {
 84					state = LEX_EXPECT_IDENTIFIER;
 85				} else {
 86					state = LEX_ERROR;
 87				}
 88				break;
 89			};
 90			break;
 91		case LEX_EXPECT_IDENTIFIER:
 92			switch (token) {
 93			case '+':
 94			case '-':
 95			case '*':
 96			case '/':
 97				lv->token.type = TOKEN_IDENTIFIER_TYPE;
 98				lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
 99				lv = _lexOperator(lv, token);
100				state = LEX_ROOT;
101				break;
102			case ')':
103				lv->token.type = TOKEN_IDENTIFIER_TYPE;
104				lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
105				state = LEX_EXPECT_OPERATOR;
106				break;
107			default:
108				break;
109			}
110			break;
111		case LEX_EXPECT_DECIMAL:
112			switch (token) {
113			case '0':
114			case '1':
115			case '2':
116			case '3':
117			case '4':
118			case '5':
119			case '6':
120			case '7':
121			case '8':
122			case '9':
123				// TODO: handle overflow
124				next *= 10;
125				next += token - '0';
126				break;
127			case '+':
128			case '-':
129			case '*':
130			case '/':
131				lv->token.type = TOKEN_UINT_TYPE;
132				lv->token.uintValue = next;
133				lv = _lexOperator(lv, token);
134				state = LEX_ROOT;
135				break;
136			case ')':
137				lv->token.type = TOKEN_UINT_TYPE;
138				lv->token.uintValue = next;
139				state = LEX_EXPECT_OPERATOR;
140				break;
141			default:
142				state = LEX_ERROR;
143			}
144			break;
145		case LEX_EXPECT_HEX:
146			switch (token) {
147			case '0':
148			case '1':
149			case '2':
150			case '3':
151			case '4':
152			case '5':
153			case '6':
154			case '7':
155			case '8':
156			case '9':
157				// TODO: handle overflow
158				next *= 16;
159				next += token - '0';
160				break;
161			case 'A':
162			case 'B':
163			case 'C':
164			case 'D':
165			case 'E':
166			case 'F':
167				// TODO: handle overflow
168				next *= 16;
169				next += token - 'A' + 10;
170				break;
171			case 'a':
172			case 'b':
173			case 'c':
174			case 'd':
175			case 'e':
176			case 'f':
177				// TODO: handle overflow
178				next *= 16;
179				next += token - 'a' + 10;
180				break;
181			case '+':
182			case '-':
183			case '*':
184			case '/':
185				lv->token.type = TOKEN_UINT_TYPE;
186				lv->token.uintValue = next;
187				lv = _lexOperator(lv, token);
188				state = LEX_ROOT;
189				break;
190			case ')':
191				lv->token.type = TOKEN_UINT_TYPE;
192				lv->token.uintValue = next;
193				state = LEX_EXPECT_OPERATOR;
194				break;
195			default:
196				state = LEX_ERROR;
197				break;
198			}
199			break;
200		case LEX_EXPECT_PREFIX:
201			switch (token) {
202			case 'X':
203			case 'x':
204				next = 0;
205				state = LEX_EXPECT_HEX;
206				break;
207			default:
208				state = LEX_ERROR;
209				break;
210			}
211			break;
212		case LEX_EXPECT_OPERATOR:
213			switch (token) {
214			case '+':
215			case '-':
216			case '*':
217			case '/':
218				lvNext = malloc(sizeof(struct LexVector));
219				lvNext->next = lv->next;
220				lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
221				lv->next = lvNext;
222				lv = _lexOperator(lv->next, token);
223				state = LEX_ROOT;
224				break;
225			default:
226				state = LEX_ERROR;
227			}
228			break;
229		case LEX_ERROR:
230			// This shouldn't be reached
231			break;
232		}
233	}
234
235	switch (state) {
236	case LEX_EXPECT_DECIMAL:
237	case LEX_EXPECT_HEX:
238		lv->token.type = TOKEN_UINT_TYPE;
239		lv->token.uintValue = next;
240		break;
241	case LEX_EXPECT_IDENTIFIER:
242		lv->token.type = TOKEN_IDENTIFIER_TYPE;
243		lv->token.identifierValue = strndup(tokenStart, string - tokenStart);
244		break;
245	case LEX_EXPECT_OPERATOR:
246		lvNext = malloc(sizeof(struct LexVector));
247		lvNext->next = lv->next;
248		lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
249		lv->next = lvNext;
250		break;
251	case LEX_ERROR:
252	default:
253		lv->token.type = TOKEN_ERROR_TYPE;
254		break;
255	}
256	return adjusted;
257}
258
259static const int _operatorPrecedence[] = {
260	2,
261	1,
262	1,
263	0
264};
265
266static struct ParseTree* _parseTreeCreate() {
267	struct ParseTree* tree = malloc(sizeof(struct ParseTree));
268	tree->token.type = TOKEN_ERROR_TYPE;
269	tree->rhs = 0;
270	tree->lhs = 0;
271	return tree;
272}
273
274static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
275	struct ParseTree* newTree = 0;
276	while (lv) {
277		int newPrecedence;
278		switch (lv->token.type) {
279		case TOKEN_IDENTIFIER_TYPE:
280		case TOKEN_UINT_TYPE:
281			if (tree->token.type == TOKEN_ERROR_TYPE) {
282				tree->token = lv->token;
283				lv = lv->next;
284			} else {
285				tree->token.type = TOKEN_ERROR_TYPE;
286				return 0;
287			}
288			break;
289		case TOKEN_OPEN_PAREN_TYPE:
290			lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
291			break;
292		case TOKEN_CLOSE_PAREN_TYPE:
293			if (openParens <= 0) {
294				tree->token.type = TOKEN_ERROR_TYPE;
295				return 0;
296			}
297			return lv->next;
298			break;
299		case TOKEN_OPERATOR_TYPE:
300			newPrecedence = _operatorPrecedence[lv->token.operatorValue];
301			if (newPrecedence < precedence) {
302				newTree = _parseTreeCreate();
303				*newTree = *tree;
304				tree->lhs = newTree;
305				tree->rhs = _parseTreeCreate();
306				tree->token = lv->token;
307				lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
308				if (tree->token.type == TOKEN_ERROR_TYPE) {
309					tree->token.type = TOKEN_ERROR_TYPE;
310				}
311			} else {
312				return lv;
313			}
314			break;
315		case TOKEN_ERROR_TYPE:
316			tree->token.type = TOKEN_ERROR_TYPE;
317			return 0;
318		}
319	}
320
321	return 0;
322}
323
324void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
325	if (!tree) {
326		return;
327	}
328
329	tree->token.type = TOKEN_ERROR_TYPE;
330	tree->lhs = 0;
331	tree->rhs = 0;
332
333	_parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
334}
335
336void lexFree(struct LexVector* lv) {
337	while (lv) {
338		struct LexVector* lvNext = lv->next;
339		free(lv);
340		lv = lvNext;
341	}
342}
343
344void parseFree(struct ParseTree* tree) {
345	if (!tree) {
346		return;
347	}
348
349	parseFree(tree->lhs);
350	parseFree(tree->rhs);
351
352	if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
353		free(tree->token.identifierValue);
354	}
355	free(tree);
356}