src/debugger/parser.c (view raw)
1#include "parser.h"
2
3#include "util/string.h"
4
5static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
6 struct LexVector* lvNext = malloc(sizeof(struct LexVector));
7 lvNext->token.type = TOKEN_OPERATOR_TYPE;
8 switch (operator) {
9 case '+':
10 lvNext->token.operatorValue = OP_ADD;
11 break;
12 case '-':
13 lvNext->token.operatorValue = OP_SUBTRACT;
14 break;
15 case '*':
16 lvNext->token.operatorValue = OP_MULTIPLY;
17 break;
18 case '/':
19 lvNext->token.operatorValue = OP_DIVIDE;
20 break;
21 default:
22 lvNext->token.type = TOKEN_ERROR_TYPE;
23 break;
24 }
25 lvNext->next = lv->next;
26 lv->next = lvNext;
27 lv = lvNext;
28 lvNext = malloc(sizeof(struct LexVector));
29 lvNext->next = lv->next;
30 lvNext->token.type = TOKEN_ERROR_TYPE;
31 lv->next = lvNext;
32 return lvNext;
33}
34
35size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
36 if (!string || length < 1) {
37 return 0;
38 }
39
40 uint32_t next = 0;
41 size_t adjusted = 0;
42
43 enum LexState state = LEX_ROOT;
44 const char* tokenStart = 0;
45 struct LexVector* lvNext;
46
47 while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
48 char token = string[0];
49 ++string;
50 ++adjusted;
51 --length;
52 switch (state) {
53 case LEX_ROOT:
54 tokenStart = string - 1;
55 switch (token) {
56 case '1':
57 case '2':
58 case '3':
59 case '4':
60 case '5':
61 case '6':
62 case '7':
63 case '8':
64 case '9':
65 state = LEX_EXPECT_DECIMAL;
66 next = token - '0';
67 break;
68 case '0':
69 state = LEX_EXPECT_PREFIX;
70 next = 0;
71 break;
72 case '$':
73 state = LEX_EXPECT_HEX;
74 next = 0;
75 break;
76 case '(':
77 state = LEX_ROOT;
78 lv->token.type = TOKEN_OPEN_PAREN_TYPE;
79 lvNext = malloc(sizeof(struct LexVector));
80 lvNext->next = lv->next;
81 lvNext->token.type = TOKEN_ERROR_TYPE;
82 lv->next = lvNext;
83 lv = lvNext;
84 break;
85 default:
86 if (tolower(token) >= 'a' && tolower(token <= 'z')) {
87 state = LEX_EXPECT_IDENTIFIER;
88 } else {
89 state = LEX_ERROR;
90 }
91 break;
92 };
93 break;
94 case LEX_EXPECT_IDENTIFIER:
95 switch (token) {
96 case '+':
97 case '-':
98 case '*':
99 case '/':
100 lv->token.type = TOKEN_IDENTIFIER_TYPE;
101 lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
102 lv = _lexOperator(lv, token);
103 state = LEX_ROOT;
104 break;
105 case ')':
106 lv->token.type = TOKEN_IDENTIFIER_TYPE;
107 lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
108 state = LEX_EXPECT_OPERATOR;
109 break;
110 default:
111 break;
112 }
113 break;
114 case LEX_EXPECT_BINARY:
115 switch (token) {
116 case '0':
117 case '1':
118 // TODO: handle overflow
119 next <<= 1;
120 next += token - '0';
121 break;
122 case '+':
123 case '-':
124 case '*':
125 case '/':
126 lv->token.type = TOKEN_UINT_TYPE;
127 lv->token.uintValue = next;
128 lv = _lexOperator(lv, token);
129 state = LEX_ROOT;
130 break;
131 case ')':
132 lv->token.type = TOKEN_UINT_TYPE;
133 lv->token.uintValue = next;
134 state = LEX_EXPECT_OPERATOR;
135 break;
136 default:
137 state = LEX_ERROR;
138 break;
139 }
140 break;
141 case LEX_EXPECT_DECIMAL:
142 switch (token) {
143 case '0':
144 case '1':
145 case '2':
146 case '3':
147 case '4':
148 case '5':
149 case '6':
150 case '7':
151 case '8':
152 case '9':
153 // TODO: handle overflow
154 next *= 10;
155 next += token - '0';
156 break;
157 case '+':
158 case '-':
159 case '*':
160 case '/':
161 lv->token.type = TOKEN_UINT_TYPE;
162 lv->token.uintValue = next;
163 lv = _lexOperator(lv, token);
164 state = LEX_ROOT;
165 break;
166 case ')':
167 lv->token.type = TOKEN_UINT_TYPE;
168 lv->token.uintValue = next;
169 state = LEX_EXPECT_OPERATOR;
170 break;
171 default:
172 state = LEX_ERROR;
173 }
174 break;
175 case LEX_EXPECT_HEX:
176 switch (token) {
177 case '0':
178 case '1':
179 case '2':
180 case '3':
181 case '4':
182 case '5':
183 case '6':
184 case '7':
185 case '8':
186 case '9':
187 // TODO: handle overflow
188 next *= 16;
189 next += token - '0';
190 break;
191 case 'A':
192 case 'B':
193 case 'C':
194 case 'D':
195 case 'E':
196 case 'F':
197 // TODO: handle overflow
198 next *= 16;
199 next += token - 'A' + 10;
200 break;
201 case 'a':
202 case 'b':
203 case 'c':
204 case 'd':
205 case 'e':
206 case 'f':
207 // TODO: handle overflow
208 next *= 16;
209 next += token - 'a' + 10;
210 break;
211 case '+':
212 case '-':
213 case '*':
214 case '/':
215 lv->token.type = TOKEN_UINT_TYPE;
216 lv->token.uintValue = next;
217 lv = _lexOperator(lv, token);
218 state = LEX_ROOT;
219 break;
220 case ')':
221 lv->token.type = TOKEN_UINT_TYPE;
222 lv->token.uintValue = next;
223 state = LEX_EXPECT_OPERATOR;
224 break;
225 default:
226 state = LEX_ERROR;
227 break;
228 }
229 break;
230 case LEX_EXPECT_PREFIX:
231 switch (token) {
232 case 'X':
233 case 'x':
234 next = 0;
235 state = LEX_EXPECT_HEX;
236 break;
237 case 'B':
238 case 'b':
239 next = 0;
240 state = LEX_EXPECT_BINARY;
241 break;
242 case '+':
243 case '-':
244 case '*':
245 case '/':
246 lv->token.type = TOKEN_UINT_TYPE;
247 lv->token.uintValue = next;
248 lv = _lexOperator(lv, token);
249 state = LEX_ROOT;
250 break;
251 case ')':
252 lv->token.type = TOKEN_UINT_TYPE;
253 lv->token.uintValue = next;
254 state = LEX_EXPECT_OPERATOR;
255 break;
256 }
257 break;
258 case LEX_EXPECT_OPERATOR:
259 switch (token) {
260 case '+':
261 case '-':
262 case '*':
263 case '/':
264 lvNext = malloc(sizeof(struct LexVector));
265 lvNext->next = lv->next;
266 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
267 lv->next = lvNext;
268 lv = _lexOperator(lv->next, token);
269 state = LEX_ROOT;
270 break;
271 default:
272 state = LEX_ERROR;
273 }
274 break;
275 case LEX_ERROR:
276 // This shouldn't be reached
277 break;
278 }
279 }
280
281 switch (state) {
282 case LEX_EXPECT_BINARY:
283 case LEX_EXPECT_DECIMAL:
284 case LEX_EXPECT_HEX:
285 case LEX_EXPECT_PREFIX:
286 lv->token.type = TOKEN_UINT_TYPE;
287 lv->token.uintValue = next;
288 break;
289 case LEX_EXPECT_IDENTIFIER:
290 lv->token.type = TOKEN_IDENTIFIER_TYPE;
291 lv->token.identifierValue = strndup(tokenStart, string - tokenStart);
292 break;
293 case LEX_EXPECT_OPERATOR:
294 lvNext = malloc(sizeof(struct LexVector));
295 lvNext->next = lv->next;
296 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
297 lv->next = lvNext;
298 break;
299 case LEX_ERROR:
300 default:
301 lv->token.type = TOKEN_ERROR_TYPE;
302 break;
303 }
304 return adjusted;
305}
306
307static const int _operatorPrecedence[] = {
308 2,
309 1,
310 1,
311 0,
312 0
313};
314
315static struct ParseTree* _parseTreeCreate() {
316 struct ParseTree* tree = malloc(sizeof(struct ParseTree));
317 tree->token.type = TOKEN_ERROR_TYPE;
318 tree->rhs = 0;
319 tree->lhs = 0;
320 return tree;
321}
322
323static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
324 struct ParseTree* newTree = 0;
325 while (lv) {
326 int newPrecedence;
327 switch (lv->token.type) {
328 case TOKEN_IDENTIFIER_TYPE:
329 case TOKEN_UINT_TYPE:
330 if (tree->token.type == TOKEN_ERROR_TYPE) {
331 tree->token = lv->token;
332 lv = lv->next;
333 } else {
334 tree->token.type = TOKEN_ERROR_TYPE;
335 return 0;
336 }
337 break;
338 case TOKEN_OPEN_PAREN_TYPE:
339 lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
340 break;
341 case TOKEN_CLOSE_PAREN_TYPE:
342 if (openParens <= 0) {
343 tree->token.type = TOKEN_ERROR_TYPE;
344 return 0;
345 }
346 return lv->next;
347 break;
348 case TOKEN_OPERATOR_TYPE:
349 newPrecedence = _operatorPrecedence[lv->token.operatorValue];
350 if (newPrecedence < precedence) {
351 newTree = _parseTreeCreate();
352 *newTree = *tree;
353 tree->lhs = newTree;
354 tree->rhs = _parseTreeCreate();
355 tree->token = lv->token;
356 lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
357 if (tree->token.type == TOKEN_ERROR_TYPE) {
358 tree->token.type = TOKEN_ERROR_TYPE;
359 }
360 } else {
361 return lv;
362 }
363 break;
364 case TOKEN_ERROR_TYPE:
365 tree->token.type = TOKEN_ERROR_TYPE;
366 return 0;
367 }
368 }
369
370 return 0;
371}
372
373void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
374 if (!tree) {
375 return;
376 }
377
378 tree->token.type = TOKEN_ERROR_TYPE;
379 tree->lhs = 0;
380 tree->rhs = 0;
381
382 _parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
383}
384
385void lexFree(struct LexVector* lv) {
386 while (lv) {
387 struct LexVector* lvNext = lv->next;
388 free(lv);
389 lv = lvNext;
390 }
391}
392
393void parseFree(struct ParseTree* tree) {
394 if (!tree) {
395 return;
396 }
397
398 parseFree(tree->lhs);
399 parseFree(tree->rhs);
400
401 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
402 free(tree->token.identifierValue);
403 }
404 free(tree);
405}