src/debugger/parser.c (view raw)
1/* Copyright (c) 2013-2014 Jeffrey Pfau
2 *
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6#include <mgba/internal/debugger/parser.h>
7
8#include <mgba-util/string.h>
9
10static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
11 struct LexVector* lvNext = malloc(sizeof(struct LexVector));
12 lvNext->token.type = TOKEN_OPERATOR_TYPE;
13 switch (operator) {
14 case '+':
15 lvNext->token.operatorValue = OP_ADD;
16 break;
17 case '-':
18 lvNext->token.operatorValue = OP_SUBTRACT;
19 break;
20 case '*':
21 lvNext->token.operatorValue = OP_MULTIPLY;
22 break;
23 case '/':
24 lvNext->token.operatorValue = OP_DIVIDE;
25 break;
26 default:
27 lvNext->token.type = TOKEN_ERROR_TYPE;
28 break;
29 }
30 lvNext->next = lv->next;
31 lv->next = lvNext;
32 lv = lvNext;
33 lvNext = malloc(sizeof(struct LexVector));
34 lvNext->next = lv->next;
35 lvNext->token.type = TOKEN_ERROR_TYPE;
36 lv->next = lvNext;
37 return lvNext;
38}
39
40size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
41 if (!string || length < 1) {
42 return 0;
43 }
44
45 uint32_t next = 0;
46 size_t adjusted = 0;
47
48 enum LexState state = LEX_ROOT;
49 const char* tokenStart = 0;
50 struct LexVector* lvNext;
51
52 while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
53 char token = string[0];
54 ++string;
55 ++adjusted;
56 --length;
57 switch (state) {
58 case LEX_ROOT:
59 tokenStart = string - 1;
60 switch (token) {
61 case '1':
62 case '2':
63 case '3':
64 case '4':
65 case '5':
66 case '6':
67 case '7':
68 case '8':
69 case '9':
70 state = LEX_EXPECT_DECIMAL;
71 next = token - '0';
72 break;
73 case '0':
74 state = LEX_EXPECT_PREFIX;
75 next = 0;
76 break;
77 case '$':
78 state = LEX_EXPECT_HEX;
79 next = 0;
80 break;
81 case '(':
82 state = LEX_ROOT;
83 lv->token.type = TOKEN_OPEN_PAREN_TYPE;
84 lvNext = malloc(sizeof(struct LexVector));
85 lvNext->next = lv->next;
86 lvNext->token.type = TOKEN_ERROR_TYPE;
87 lv->next = lvNext;
88 lv = lvNext;
89 break;
90 default:
91 if (tolower(token) >= 'a' && tolower(token <= 'z')) {
92 state = LEX_EXPECT_IDENTIFIER;
93 } else {
94 state = LEX_ERROR;
95 }
96 break;
97 };
98 break;
99 case LEX_EXPECT_IDENTIFIER:
100 switch (token) {
101 case '+':
102 case '-':
103 case '*':
104 case '/':
105 lv->token.type = TOKEN_IDENTIFIER_TYPE;
106 lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
107 lv = _lexOperator(lv, token);
108 state = LEX_ROOT;
109 break;
110 case ')':
111 lv->token.type = TOKEN_IDENTIFIER_TYPE;
112 lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
113 state = LEX_EXPECT_OPERATOR;
114 break;
115 default:
116 break;
117 }
118 break;
119 case LEX_EXPECT_BINARY:
120 switch (token) {
121 case '0':
122 case '1':
123 // TODO: handle overflow
124 next <<= 1;
125 next += token - '0';
126 break;
127 case '+':
128 case '-':
129 case '*':
130 case '/':
131 lv->token.type = TOKEN_UINT_TYPE;
132 lv->token.uintValue = next;
133 lv = _lexOperator(lv, token);
134 state = LEX_ROOT;
135 break;
136 case ')':
137 lv->token.type = TOKEN_UINT_TYPE;
138 lv->token.uintValue = next;
139 state = LEX_EXPECT_OPERATOR;
140 break;
141 default:
142 state = LEX_ERROR;
143 break;
144 }
145 break;
146 case LEX_EXPECT_DECIMAL:
147 switch (token) {
148 case '0':
149 case '1':
150 case '2':
151 case '3':
152 case '4':
153 case '5':
154 case '6':
155 case '7':
156 case '8':
157 case '9':
158 // TODO: handle overflow
159 next *= 10;
160 next += token - '0';
161 break;
162 case '+':
163 case '-':
164 case '*':
165 case '/':
166 lv->token.type = TOKEN_UINT_TYPE;
167 lv->token.uintValue = next;
168 lv = _lexOperator(lv, token);
169 state = LEX_ROOT;
170 break;
171 case ')':
172 lv->token.type = TOKEN_UINT_TYPE;
173 lv->token.uintValue = next;
174 state = LEX_EXPECT_OPERATOR;
175 break;
176 default:
177 state = LEX_ERROR;
178 }
179 break;
180 case LEX_EXPECT_HEX:
181 switch (token) {
182 case '0':
183 case '1':
184 case '2':
185 case '3':
186 case '4':
187 case '5':
188 case '6':
189 case '7':
190 case '8':
191 case '9':
192 // TODO: handle overflow
193 next *= 16;
194 next += token - '0';
195 break;
196 case 'A':
197 case 'B':
198 case 'C':
199 case 'D':
200 case 'E':
201 case 'F':
202 // TODO: handle overflow
203 next *= 16;
204 next += token - 'A' + 10;
205 break;
206 case 'a':
207 case 'b':
208 case 'c':
209 case 'd':
210 case 'e':
211 case 'f':
212 // TODO: handle overflow
213 next *= 16;
214 next += token - 'a' + 10;
215 break;
216 case '+':
217 case '-':
218 case '*':
219 case '/':
220 lv->token.type = TOKEN_UINT_TYPE;
221 lv->token.uintValue = next;
222 lv = _lexOperator(lv, token);
223 state = LEX_ROOT;
224 break;
225 case ':':
226 lv->token.type = TOKEN_SEGMENT_TYPE;
227 lv->token.uintValue = next;
228 lvNext = malloc(sizeof(struct LexVector));
229 lvNext->next = lv->next;
230 lvNext->token.type = TOKEN_UINT_TYPE;
231 lv->next = lvNext;
232 lv = lvNext;
233 next = 0;
234 state = LEX_EXPECT_HEX;
235 break;
236 case ')':
237 lv->token.type = TOKEN_UINT_TYPE;
238 lv->token.uintValue = next;
239 state = LEX_EXPECT_OPERATOR;
240 break;
241 default:
242 state = LEX_ERROR;
243 break;
244 }
245 break;
246 case LEX_EXPECT_PREFIX:
247 switch (token) {
248 case 'X':
249 case 'x':
250 next = 0;
251 state = LEX_EXPECT_HEX;
252 break;
253 case 'B':
254 case 'b':
255 next = 0;
256 state = LEX_EXPECT_BINARY;
257 break;
258 case '+':
259 case '-':
260 case '*':
261 case '/':
262 lv->token.type = TOKEN_UINT_TYPE;
263 lv->token.uintValue = next;
264 lv = _lexOperator(lv, token);
265 state = LEX_ROOT;
266 break;
267 case ')':
268 lv->token.type = TOKEN_UINT_TYPE;
269 lv->token.uintValue = next;
270 state = LEX_EXPECT_OPERATOR;
271 break;
272 case '0':
273 case '1':
274 case '2':
275 case '3':
276 case '4':
277 case '5':
278 case '6':
279 case '7':
280 case '8':
281 case '9':
282 next = token - '0';
283 state = LEX_EXPECT_DECIMAL;
284 break;
285 default:
286 state = LEX_ERROR;
287 }
288 break;
289 case LEX_EXPECT_OPERATOR:
290 switch (token) {
291 case '+':
292 case '-':
293 case '*':
294 case '/':
295 lvNext = malloc(sizeof(struct LexVector));
296 lvNext->next = lv->next;
297 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
298 lv->next = lvNext;
299 lv = _lexOperator(lv->next, token);
300 state = LEX_ROOT;
301 break;
302 default:
303 state = LEX_ERROR;
304 }
305 break;
306 case LEX_ERROR:
307 // This shouldn't be reached
308 break;
309 }
310 }
311
312 switch (state) {
313 case LEX_EXPECT_BINARY:
314 case LEX_EXPECT_DECIMAL:
315 case LEX_EXPECT_HEX:
316 case LEX_EXPECT_PREFIX:
317 lv->token.type = TOKEN_UINT_TYPE;
318 lv->token.uintValue = next;
319 break;
320 case LEX_EXPECT_IDENTIFIER:
321 lv->token.type = TOKEN_IDENTIFIER_TYPE;
322 lv->token.identifierValue = strndup(tokenStart, string - tokenStart);
323 break;
324 case LEX_EXPECT_OPERATOR:
325 lvNext = malloc(sizeof(struct LexVector));
326 lvNext->next = lv->next;
327 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
328 lv->next = lvNext;
329 break;
330 case LEX_ERROR:
331 default:
332 lv->token.type = TOKEN_ERROR_TYPE;
333 break;
334 }
335 return adjusted;
336}
337
338static const int _operatorPrecedence[] = {
339 2,
340 1,
341 1,
342 0,
343 0
344};
345
346static struct ParseTree* _parseTreeCreate() {
347 struct ParseTree* tree = malloc(sizeof(struct ParseTree));
348 tree->token.type = TOKEN_ERROR_TYPE;
349 tree->rhs = 0;
350 tree->lhs = 0;
351 return tree;
352}
353
354static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
355 struct ParseTree* newTree = 0;
356 while (lv) {
357 int newPrecedence;
358 switch (lv->token.type) {
359 case TOKEN_IDENTIFIER_TYPE:
360 case TOKEN_UINT_TYPE:
361 if (tree->token.type == TOKEN_ERROR_TYPE) {
362 tree->token = lv->token;
363 lv = lv->next;
364 } else {
365 tree->token.type = TOKEN_ERROR_TYPE;
366 return 0;
367 }
368 break;
369 case TOKEN_SEGMENT_TYPE:
370 tree->lhs = _parseTreeCreate();
371 tree->lhs->token.type = TOKEN_UINT_TYPE;
372 tree->lhs->token.uintValue = lv->token.uintValue;
373 tree->rhs = _parseTreeCreate();
374 tree->token.type = TOKEN_SEGMENT_TYPE;
375 lv = _parseExpression(tree->rhs, lv->next, precedence, openParens);
376 if (tree->token.type == TOKEN_ERROR_TYPE) {
377 tree->token.type = TOKEN_ERROR_TYPE;
378 }
379 break;
380 case TOKEN_OPEN_PAREN_TYPE:
381 lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
382 break;
383 case TOKEN_CLOSE_PAREN_TYPE:
384 if (openParens <= 0) {
385 tree->token.type = TOKEN_ERROR_TYPE;
386 return 0;
387 }
388 return lv->next;
389 break;
390 case TOKEN_OPERATOR_TYPE:
391 newPrecedence = _operatorPrecedence[lv->token.operatorValue];
392 if (newPrecedence < precedence) {
393 newTree = _parseTreeCreate();
394 *newTree = *tree;
395 tree->lhs = newTree;
396 tree->rhs = _parseTreeCreate();
397 tree->token = lv->token;
398 lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
399 if (tree->token.type == TOKEN_ERROR_TYPE) {
400 tree->token.type = TOKEN_ERROR_TYPE;
401 }
402 } else {
403 return lv;
404 }
405 break;
406 case TOKEN_ERROR_TYPE:
407 tree->token.type = TOKEN_ERROR_TYPE;
408 return 0;
409 }
410 }
411
412 return 0;
413}
414
415void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
416 if (!tree) {
417 return;
418 }
419
420 tree->token.type = TOKEN_ERROR_TYPE;
421 tree->lhs = 0;
422 tree->rhs = 0;
423
424 _parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
425}
426
427void lexFree(struct LexVector* lv) {
428 while (lv) {
429 struct LexVector* lvNext = lv->next;
430 free(lv);
431 lv = lvNext;
432 }
433}
434
435void parseFree(struct ParseTree* tree) {
436 if (!tree) {
437 return;
438 }
439
440 parseFree(tree->lhs);
441 parseFree(tree->rhs);
442
443 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
444 free(tree->token.identifierValue);
445 }
446 free(tree);
447}