src/debugger/parser.c (view raw)
1#include "parser.h"
2
3static inline char* _strndup(const char* start, size_t len) {
4#ifdef HAVE_STRNDUP
5 return strndup(start, len);
6#else
7 // This is suboptimal, but anything recent should have strndup
8 char* out = malloc((len + 1) * sizeof(char));
9 strncpy(out, start, len);
10 out[len] = '\0';
11 return out;
12#endif
13}
14
15static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
16 struct LexVector* lvNext = malloc(sizeof(struct LexVector));
17 lvNext->token.type = TOKEN_OPERATOR_TYPE;
18 switch (operator) {
19 case '+':
20 lvNext->token.operatorValue = OP_ADD;
21 break;
22 case '-':
23 lvNext->token.operatorValue = OP_SUBTRACT;
24 break;
25 case '*':
26 lvNext->token.operatorValue = OP_MULTIPLY;
27 break;
28 case '/':
29 lvNext->token.operatorValue = OP_DIVIDE;
30 break;
31 default:
32 lvNext->token.type = TOKEN_ERROR_TYPE;
33 break;
34 }
35 lvNext->next = lv->next;
36 lv->next = lvNext;
37 lv = lvNext;
38 lvNext = malloc(sizeof(struct LexVector));
39 lvNext->next = lv->next;
40 lvNext->token.type = TOKEN_ERROR_TYPE;
41 lv->next = lvNext;
42 return lvNext;
43}
44
45size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
46 if (!string || length < 1) {
47 return 0;
48 }
49
50 uint32_t next = 0;
51 size_t adjusted = 0;
52
53 enum LexState state = LEX_ROOT;
54 const char* tokenStart = 0;
55 struct LexVector* lvNext;
56
57 while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
58 char token = string[0];
59 ++string;
60 ++adjusted;
61 --length;
62 switch (state) {
63 case LEX_ROOT:
64 tokenStart = string - 1;
65 switch (token) {
66 case '1':
67 case '2':
68 case '3':
69 case '4':
70 case '5':
71 case '6':
72 case '7':
73 case '8':
74 case '9':
75 state = LEX_EXPECT_DECIMAL;
76 next = token - '0';
77 break;
78 case '0':
79 state = LEX_EXPECT_PREFIX;
80 next = 0;
81 break;
82 case '$':
83 state = LEX_EXPECT_HEX;
84 next = 0;
85 break;
86 case '(':
87 state = LEX_ROOT;
88 lv->token.type = TOKEN_OPEN_PAREN_TYPE;
89 lvNext = malloc(sizeof(struct LexVector));
90 lvNext->next = lv->next;
91 lvNext->token.type = TOKEN_ERROR_TYPE;
92 lv->next = lvNext;
93 lv = lvNext;
94 break;
95 default:
96 if (tolower(token) >= 'a' && tolower(token <= 'z')) {
97 state = LEX_EXPECT_IDENTIFIER;
98 } else {
99 state = LEX_ERROR;
100 }
101 break;
102 };
103 break;
104 case LEX_EXPECT_IDENTIFIER:
105 switch (token) {
106 case '+':
107 case '-':
108 case '*':
109 case '/':
110 lv->token.type = TOKEN_IDENTIFIER_TYPE;
111 lv->token.identifierValue = _strndup(tokenStart, string - tokenStart - 1);
112 lv = _lexOperator(lv, token);
113 state = LEX_ROOT;
114 break;
115 case ')':
116 lv->token.type = TOKEN_IDENTIFIER_TYPE;
117 lv->token.identifierValue = _strndup(tokenStart, string - tokenStart - 1);
118 state = LEX_EXPECT_OPERATOR;
119 break;
120 default:
121 break;
122 }
123 break;
124 case LEX_EXPECT_BINARY:
125 switch (token) {
126 case '0':
127 case '1':
128 // TODO: handle overflow
129 next <<= 1;
130 next += token - '0';
131 break;
132 case '+':
133 case '-':
134 case '*':
135 case '/':
136 lv->token.type = TOKEN_UINT_TYPE;
137 lv->token.uintValue = next;
138 lv = _lexOperator(lv, token);
139 state = LEX_ROOT;
140 break;
141 case ')':
142 lv->token.type = TOKEN_UINT_TYPE;
143 lv->token.uintValue = next;
144 state = LEX_EXPECT_OPERATOR;
145 break;
146 default:
147 state = LEX_ERROR;
148 break;
149 }
150 break;
151 case LEX_EXPECT_DECIMAL:
152 switch (token) {
153 case '0':
154 case '1':
155 case '2':
156 case '3':
157 case '4':
158 case '5':
159 case '6':
160 case '7':
161 case '8':
162 case '9':
163 // TODO: handle overflow
164 next *= 10;
165 next += token - '0';
166 break;
167 case '+':
168 case '-':
169 case '*':
170 case '/':
171 lv->token.type = TOKEN_UINT_TYPE;
172 lv->token.uintValue = next;
173 lv = _lexOperator(lv, token);
174 state = LEX_ROOT;
175 break;
176 case ')':
177 lv->token.type = TOKEN_UINT_TYPE;
178 lv->token.uintValue = next;
179 state = LEX_EXPECT_OPERATOR;
180 break;
181 default:
182 state = LEX_ERROR;
183 }
184 break;
185 case LEX_EXPECT_HEX:
186 switch (token) {
187 case '0':
188 case '1':
189 case '2':
190 case '3':
191 case '4':
192 case '5':
193 case '6':
194 case '7':
195 case '8':
196 case '9':
197 // TODO: handle overflow
198 next *= 16;
199 next += token - '0';
200 break;
201 case 'A':
202 case 'B':
203 case 'C':
204 case 'D':
205 case 'E':
206 case 'F':
207 // TODO: handle overflow
208 next *= 16;
209 next += token - 'A' + 10;
210 break;
211 case 'a':
212 case 'b':
213 case 'c':
214 case 'd':
215 case 'e':
216 case 'f':
217 // TODO: handle overflow
218 next *= 16;
219 next += token - 'a' + 10;
220 break;
221 case '+':
222 case '-':
223 case '*':
224 case '/':
225 lv->token.type = TOKEN_UINT_TYPE;
226 lv->token.uintValue = next;
227 lv = _lexOperator(lv, token);
228 state = LEX_ROOT;
229 break;
230 case ')':
231 lv->token.type = TOKEN_UINT_TYPE;
232 lv->token.uintValue = next;
233 state = LEX_EXPECT_OPERATOR;
234 break;
235 default:
236 state = LEX_ERROR;
237 break;
238 }
239 break;
240 case LEX_EXPECT_PREFIX:
241 switch (token) {
242 case 'X':
243 case 'x':
244 next = 0;
245 state = LEX_EXPECT_HEX;
246 break;
247 case 'B':
248 case 'b':
249 next = 0;
250 state = LEX_EXPECT_BINARY;
251 break;
252 case '+':
253 case '-':
254 case '*':
255 case '/':
256 lv->token.type = TOKEN_UINT_TYPE;
257 lv->token.uintValue = next;
258 lv = _lexOperator(lv, token);
259 state = LEX_ROOT;
260 break;
261 case ')':
262 lv->token.type = TOKEN_UINT_TYPE;
263 lv->token.uintValue = next;
264 state = LEX_EXPECT_OPERATOR;
265 break;
266 }
267 break;
268 case LEX_EXPECT_OPERATOR:
269 switch (token) {
270 case '+':
271 case '-':
272 case '*':
273 case '/':
274 lvNext = malloc(sizeof(struct LexVector));
275 lvNext->next = lv->next;
276 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
277 lv->next = lvNext;
278 lv = _lexOperator(lv->next, token);
279 state = LEX_ROOT;
280 break;
281 default:
282 state = LEX_ERROR;
283 }
284 break;
285 case LEX_ERROR:
286 // This shouldn't be reached
287 break;
288 }
289 }
290
291 switch (state) {
292 case LEX_EXPECT_BINARY:
293 case LEX_EXPECT_DECIMAL:
294 case LEX_EXPECT_HEX:
295 case LEX_EXPECT_PREFIX:
296 lv->token.type = TOKEN_UINT_TYPE;
297 lv->token.uintValue = next;
298 break;
299 case LEX_EXPECT_IDENTIFIER:
300 lv->token.type = TOKEN_IDENTIFIER_TYPE;
301 lv->token.identifierValue = _strndup(tokenStart, string - tokenStart);
302 break;
303 case LEX_EXPECT_OPERATOR:
304 lvNext = malloc(sizeof(struct LexVector));
305 lvNext->next = lv->next;
306 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
307 lv->next = lvNext;
308 break;
309 case LEX_ERROR:
310 default:
311 lv->token.type = TOKEN_ERROR_TYPE;
312 break;
313 }
314 return adjusted;
315}
316
317static const int _operatorPrecedence[] = {
318 2,
319 1,
320 1,
321 0,
322 0
323};
324
325static struct ParseTree* _parseTreeCreate() {
326 struct ParseTree* tree = malloc(sizeof(struct ParseTree));
327 tree->token.type = TOKEN_ERROR_TYPE;
328 tree->rhs = 0;
329 tree->lhs = 0;
330 return tree;
331}
332
333static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
334 struct ParseTree* newTree = 0;
335 while (lv) {
336 int newPrecedence;
337 switch (lv->token.type) {
338 case TOKEN_IDENTIFIER_TYPE:
339 case TOKEN_UINT_TYPE:
340 if (tree->token.type == TOKEN_ERROR_TYPE) {
341 tree->token = lv->token;
342 lv = lv->next;
343 } else {
344 tree->token.type = TOKEN_ERROR_TYPE;
345 return 0;
346 }
347 break;
348 case TOKEN_OPEN_PAREN_TYPE:
349 lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
350 break;
351 case TOKEN_CLOSE_PAREN_TYPE:
352 if (openParens <= 0) {
353 tree->token.type = TOKEN_ERROR_TYPE;
354 return 0;
355 }
356 return lv->next;
357 break;
358 case TOKEN_OPERATOR_TYPE:
359 newPrecedence = _operatorPrecedence[lv->token.operatorValue];
360 if (newPrecedence < precedence) {
361 newTree = _parseTreeCreate();
362 *newTree = *tree;
363 tree->lhs = newTree;
364 tree->rhs = _parseTreeCreate();
365 tree->token = lv->token;
366 lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
367 if (tree->token.type == TOKEN_ERROR_TYPE) {
368 tree->token.type = TOKEN_ERROR_TYPE;
369 }
370 } else {
371 return lv;
372 }
373 break;
374 case TOKEN_ERROR_TYPE:
375 tree->token.type = TOKEN_ERROR_TYPE;
376 return 0;
377 }
378 }
379
380 return 0;
381}
382
383void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
384 if (!tree) {
385 return;
386 }
387
388 tree->token.type = TOKEN_ERROR_TYPE;
389 tree->lhs = 0;
390 tree->rhs = 0;
391
392 _parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
393}
394
395void lexFree(struct LexVector* lv) {
396 while (lv) {
397 struct LexVector* lvNext = lv->next;
398 free(lv);
399 lv = lvNext;
400 }
401}
402
403void parseFree(struct ParseTree* tree) {
404 if (!tree) {
405 return;
406 }
407
408 parseFree(tree->lhs);
409 parseFree(tree->rhs);
410
411 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
412 free(tree->token.identifierValue);
413 }
414 free(tree);
415}