src/debugger/parser.c (view raw)
1#include "parser.h"
2
3static inline char* _strndup(const char* start, size_t len) {
4#ifdef HAVE_STRNDUP
5 return strndup(start, len);
6#else
7 // This is suboptimal, but anything recent should have strndup
8 char* out = malloc((len + 1) * sizeof(char));
9 strncpy(out, start, len);
10 out[len] = '\0';
11 return out;
12#endif
13}
14
15static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
16 struct LexVector* lvNext = malloc(sizeof(struct LexVector));
17 lvNext->token.type = TOKEN_OPERATOR_TYPE;
18 switch (operator) {
19 case '+':
20 lvNext->token.operatorValue = OP_ADD;
21 break;
22 case '-':
23 lvNext->token.operatorValue = OP_SUBTRACT;
24 break;
25 case '*':
26 lvNext->token.operatorValue = OP_MULTIPLY;
27 break;
28 case '/':
29 lvNext->token.operatorValue = OP_DIVIDE;
30 break;
31 default:
32 lvNext->token.type = TOKEN_ERROR_TYPE;
33 break;
34 }
35 lvNext->next = lv->next;
36 lv->next = lvNext;
37 lv = lvNext;
38 lvNext = malloc(sizeof(struct LexVector));
39 lvNext->next = lv->next;
40 lvNext->token.type = TOKEN_ERROR_TYPE;
41 lv->next = lvNext;
42 return lvNext;
43}
44
45size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
46 if (!string || length < 1) {
47 return 0;
48 }
49
50 uint32_t next = 0;
51 size_t adjusted = 0;
52
53 enum LexState state = LEX_ROOT;
54 const char* tokenStart = 0;
55 struct LexVector* lvNext;
56
57 while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
58 char token = string[0];
59 ++string;
60 ++adjusted;
61 --length;
62 switch (state) {
63 case LEX_ROOT:
64 tokenStart = string - 1;
65 switch (token) {
66 case '1':
67 case '2':
68 case '3':
69 case '4':
70 case '5':
71 case '6':
72 case '7':
73 case '8':
74 case '9':
75 state = LEX_EXPECT_DECIMAL;
76 next = token - '0';
77 break;
78 case '0':
79 state = LEX_EXPECT_PREFIX;
80 next = 0;
81 break;
82 case '$':
83 state = LEX_EXPECT_HEX;
84 next = 0;
85 break;
86 case '(':
87 state = LEX_ROOT;
88 lv->token.type = TOKEN_OPEN_PAREN_TYPE;
89 lvNext = malloc(sizeof(struct LexVector));
90 lvNext->next = lv->next;
91 lvNext->token.type = TOKEN_ERROR_TYPE;
92 lv->next = lvNext;
93 lv = lvNext;
94 break;
95 default:
96 if (tolower(token) >= 'a' && tolower(token <= 'z')) {
97 state = LEX_EXPECT_IDENTIFIER;
98 } else {
99 state = LEX_ERROR;
100 }
101 break;
102 };
103 break;
104 case LEX_EXPECT_IDENTIFIER:
105 switch (token) {
106 case '+':
107 case '-':
108 case '*':
109 case '/':
110 lv->token.type = TOKEN_IDENTIFIER_TYPE;
111 lv->token.identifierValue = _strndup(tokenStart, string - tokenStart - 1);
112 lv = _lexOperator(lv, token);
113 state = LEX_ROOT;
114 break;
115 case ')':
116 lv->token.type = TOKEN_IDENTIFIER_TYPE;
117 lv->token.identifierValue = _strndup(tokenStart, string - tokenStart - 1);
118 state = LEX_EXPECT_OPERATOR;
119 break;
120 default:
121 break;
122 }
123 break;
124 case LEX_EXPECT_DECIMAL:
125 switch (token) {
126 case '0':
127 case '1':
128 case '2':
129 case '3':
130 case '4':
131 case '5':
132 case '6':
133 case '7':
134 case '8':
135 case '9':
136 // TODO: handle overflow
137 next *= 10;
138 next += token - '0';
139 break;
140 case '+':
141 case '-':
142 case '*':
143 case '/':
144 lv->token.type = TOKEN_UINT_TYPE;
145 lv->token.uintValue = next;
146 lv = _lexOperator(lv, token);
147 state = LEX_ROOT;
148 break;
149 case ')':
150 lv->token.type = TOKEN_UINT_TYPE;
151 lv->token.uintValue = next;
152 state = LEX_EXPECT_OPERATOR;
153 break;
154 default:
155 state = LEX_ERROR;
156 }
157 break;
158 case LEX_EXPECT_HEX:
159 switch (token) {
160 case '0':
161 case '1':
162 case '2':
163 case '3':
164 case '4':
165 case '5':
166 case '6':
167 case '7':
168 case '8':
169 case '9':
170 // TODO: handle overflow
171 next *= 16;
172 next += token - '0';
173 break;
174 case 'A':
175 case 'B':
176 case 'C':
177 case 'D':
178 case 'E':
179 case 'F':
180 // TODO: handle overflow
181 next *= 16;
182 next += token - 'A' + 10;
183 break;
184 case 'a':
185 case 'b':
186 case 'c':
187 case 'd':
188 case 'e':
189 case 'f':
190 // TODO: handle overflow
191 next *= 16;
192 next += token - 'a' + 10;
193 break;
194 case '+':
195 case '-':
196 case '*':
197 case '/':
198 lv->token.type = TOKEN_UINT_TYPE;
199 lv->token.uintValue = next;
200 lv = _lexOperator(lv, token);
201 state = LEX_ROOT;
202 break;
203 case ')':
204 lv->token.type = TOKEN_UINT_TYPE;
205 lv->token.uintValue = next;
206 state = LEX_EXPECT_OPERATOR;
207 break;
208 default:
209 state = LEX_ERROR;
210 break;
211 }
212 break;
213 case LEX_EXPECT_PREFIX:
214 switch (token) {
215 case 'X':
216 case 'x':
217 next = 0;
218 state = LEX_EXPECT_HEX;
219 break;
220 case '+':
221 case '-':
222 case '*':
223 case '/':
224 lv->token.type = TOKEN_UINT_TYPE;
225 lv->token.uintValue = next;
226 lv = _lexOperator(lv, token);
227 state = LEX_ROOT;
228 break;
229 case ')':
230 lv->token.type = TOKEN_UINT_TYPE;
231 lv->token.uintValue = next;
232 state = LEX_EXPECT_OPERATOR;
233 break;
234 }
235 break;
236 case LEX_EXPECT_OPERATOR:
237 switch (token) {
238 case '+':
239 case '-':
240 case '*':
241 case '/':
242 lvNext = malloc(sizeof(struct LexVector));
243 lvNext->next = lv->next;
244 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
245 lv->next = lvNext;
246 lv = _lexOperator(lv->next, token);
247 state = LEX_ROOT;
248 break;
249 default:
250 state = LEX_ERROR;
251 }
252 break;
253 case LEX_ERROR:
254 // This shouldn't be reached
255 break;
256 }
257 }
258
259 switch (state) {
260 case LEX_EXPECT_DECIMAL:
261 case LEX_EXPECT_HEX:
262 case LEX_EXPECT_PREFIX:
263 lv->token.type = TOKEN_UINT_TYPE;
264 lv->token.uintValue = next;
265 break;
266 case LEX_EXPECT_IDENTIFIER:
267 lv->token.type = TOKEN_IDENTIFIER_TYPE;
268 lv->token.identifierValue = _strndup(tokenStart, string - tokenStart);
269 break;
270 case LEX_EXPECT_OPERATOR:
271 lvNext = malloc(sizeof(struct LexVector));
272 lvNext->next = lv->next;
273 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
274 lv->next = lvNext;
275 break;
276 case LEX_ERROR:
277 default:
278 lv->token.type = TOKEN_ERROR_TYPE;
279 break;
280 }
281 return adjusted;
282}
283
284static const int _operatorPrecedence[] = {
285 2,
286 1,
287 1,
288 0,
289 0
290};
291
292static struct ParseTree* _parseTreeCreate() {
293 struct ParseTree* tree = malloc(sizeof(struct ParseTree));
294 tree->token.type = TOKEN_ERROR_TYPE;
295 tree->rhs = 0;
296 tree->lhs = 0;
297 return tree;
298}
299
300static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
301 struct ParseTree* newTree = 0;
302 while (lv) {
303 int newPrecedence;
304 switch (lv->token.type) {
305 case TOKEN_IDENTIFIER_TYPE:
306 case TOKEN_UINT_TYPE:
307 if (tree->token.type == TOKEN_ERROR_TYPE) {
308 tree->token = lv->token;
309 lv = lv->next;
310 } else {
311 tree->token.type = TOKEN_ERROR_TYPE;
312 return 0;
313 }
314 break;
315 case TOKEN_OPEN_PAREN_TYPE:
316 lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
317 break;
318 case TOKEN_CLOSE_PAREN_TYPE:
319 if (openParens <= 0) {
320 tree->token.type = TOKEN_ERROR_TYPE;
321 return 0;
322 }
323 return lv->next;
324 break;
325 case TOKEN_OPERATOR_TYPE:
326 newPrecedence = _operatorPrecedence[lv->token.operatorValue];
327 if (newPrecedence < precedence) {
328 newTree = _parseTreeCreate();
329 *newTree = *tree;
330 tree->lhs = newTree;
331 tree->rhs = _parseTreeCreate();
332 tree->token = lv->token;
333 lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
334 if (tree->token.type == TOKEN_ERROR_TYPE) {
335 tree->token.type = TOKEN_ERROR_TYPE;
336 }
337 } else {
338 return lv;
339 }
340 break;
341 case TOKEN_ERROR_TYPE:
342 tree->token.type = TOKEN_ERROR_TYPE;
343 return 0;
344 }
345 }
346
347 return 0;
348}
349
350void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
351 if (!tree) {
352 return;
353 }
354
355 tree->token.type = TOKEN_ERROR_TYPE;
356 tree->lhs = 0;
357 tree->rhs = 0;
358
359 _parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
360}
361
362void lexFree(struct LexVector* lv) {
363 while (lv) {
364 struct LexVector* lvNext = lv->next;
365 free(lv);
366 lv = lvNext;
367 }
368}
369
370void parseFree(struct ParseTree* tree) {
371 if (!tree) {
372 return;
373 }
374
375 parseFree(tree->lhs);
376 parseFree(tree->rhs);
377
378 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
379 free(tree->token.identifierValue);
380 }
381 free(tree);
382}