src/debugger/parser.c (view raw)
1#include "parser.h"
2
3static inline char* _strndup(const char* start, size_t len) {
4#ifdef HAVE_STRNDUP
5 return strndup(start, len);
6#else
7 // This is suboptimal, but anything recent should have strndup
8 char* out = malloc((len + 1) * sizeof(char));
9 strncpy(out, start, len);
10 out[len] = '\0';
11 return out;
12#endif
13}
14
15static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
16 struct LexVector* lvNext = malloc(sizeof(struct LexVector));
17 lvNext->token.type = TOKEN_OPERATOR_TYPE;
18 switch (operator) {
19 case '+':
20 lvNext->token.operatorValue = OP_ADD;
21 break;
22 case '-':
23 lvNext->token.operatorValue = OP_SUBTRACT;
24 break;
25 case '*':
26 lvNext->token.operatorValue = OP_MULTIPLY;
27 break;
28 case '/':
29 lvNext->token.operatorValue = OP_DIVIDE;
30 break;
31 default:
32 lvNext->token.type = TOKEN_ERROR_TYPE;
33 break;
34 }
35 lvNext->next = lv->next;
36 lv->next = lvNext;
37 lv = lvNext;
38 lvNext = malloc(sizeof(struct LexVector));
39 lvNext->next = lv->next;
40 lvNext->token.type = TOKEN_ERROR_TYPE;
41 lv->next = lvNext;
42 return lvNext;
43}
44
45size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
46 if (!string || length < 1) {
47 return 0;
48 }
49
50 uint32_t next = 0;
51 size_t adjusted = 0;
52
53 enum LexState state = LEX_ROOT;
54 const char* tokenStart = 0;
55 struct LexVector* lvNext;
56
57 while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
58 char token = string[0];
59 ++string;
60 ++adjusted;
61 --length;
62 switch (state) {
63 case LEX_ROOT:
64 tokenStart = string - 1;
65 switch (token) {
66 case '1':
67 case '2':
68 case '3':
69 case '4':
70 case '5':
71 case '6':
72 case '7':
73 case '8':
74 case '9':
75 state = LEX_EXPECT_DECIMAL;
76 next = token - '0';
77 break;
78 case '0':
79 state = LEX_EXPECT_PREFIX;
80 break;
81 case '$':
82 state = LEX_EXPECT_HEX;
83 next = 0;
84 break;
85 case '(':
86 state = LEX_ROOT;
87 lv->token.type = TOKEN_OPEN_PAREN_TYPE;
88 lvNext = malloc(sizeof(struct LexVector));
89 lvNext->next = lv->next;
90 lvNext->token.type = TOKEN_ERROR_TYPE;
91 lv->next = lvNext;
92 lv = lvNext;
93 break;
94 default:
95 if (tolower(token) >= 'a' && tolower(token <= 'z')) {
96 state = LEX_EXPECT_IDENTIFIER;
97 } else {
98 state = LEX_ERROR;
99 }
100 break;
101 };
102 break;
103 case LEX_EXPECT_IDENTIFIER:
104 switch (token) {
105 case '+':
106 case '-':
107 case '*':
108 case '/':
109 lv->token.type = TOKEN_IDENTIFIER_TYPE;
110 lv->token.identifierValue = _strndup(tokenStart, string - tokenStart - 1);
111 lv = _lexOperator(lv, token);
112 state = LEX_ROOT;
113 break;
114 case ')':
115 lv->token.type = TOKEN_IDENTIFIER_TYPE;
116 lv->token.identifierValue = _strndup(tokenStart, string - tokenStart - 1);
117 state = LEX_EXPECT_OPERATOR;
118 break;
119 default:
120 break;
121 }
122 break;
123 case LEX_EXPECT_DECIMAL:
124 switch (token) {
125 case '0':
126 case '1':
127 case '2':
128 case '3':
129 case '4':
130 case '5':
131 case '6':
132 case '7':
133 case '8':
134 case '9':
135 // TODO: handle overflow
136 next *= 10;
137 next += token - '0';
138 break;
139 case '+':
140 case '-':
141 case '*':
142 case '/':
143 lv->token.type = TOKEN_UINT_TYPE;
144 lv->token.uintValue = next;
145 lv = _lexOperator(lv, token);
146 state = LEX_ROOT;
147 break;
148 case ')':
149 lv->token.type = TOKEN_UINT_TYPE;
150 lv->token.uintValue = next;
151 state = LEX_EXPECT_OPERATOR;
152 break;
153 default:
154 state = LEX_ERROR;
155 }
156 break;
157 case LEX_EXPECT_HEX:
158 switch (token) {
159 case '0':
160 case '1':
161 case '2':
162 case '3':
163 case '4':
164 case '5':
165 case '6':
166 case '7':
167 case '8':
168 case '9':
169 // TODO: handle overflow
170 next *= 16;
171 next += token - '0';
172 break;
173 case 'A':
174 case 'B':
175 case 'C':
176 case 'D':
177 case 'E':
178 case 'F':
179 // TODO: handle overflow
180 next *= 16;
181 next += token - 'A' + 10;
182 break;
183 case 'a':
184 case 'b':
185 case 'c':
186 case 'd':
187 case 'e':
188 case 'f':
189 // TODO: handle overflow
190 next *= 16;
191 next += token - 'a' + 10;
192 break;
193 case '+':
194 case '-':
195 case '*':
196 case '/':
197 lv->token.type = TOKEN_UINT_TYPE;
198 lv->token.uintValue = next;
199 lv = _lexOperator(lv, token);
200 state = LEX_ROOT;
201 break;
202 case ')':
203 lv->token.type = TOKEN_UINT_TYPE;
204 lv->token.uintValue = next;
205 state = LEX_EXPECT_OPERATOR;
206 break;
207 default:
208 state = LEX_ERROR;
209 break;
210 }
211 break;
212 case LEX_EXPECT_PREFIX:
213 switch (token) {
214 case 'X':
215 case 'x':
216 next = 0;
217 state = LEX_EXPECT_HEX;
218 break;
219 default:
220 state = LEX_ERROR;
221 break;
222 }
223 break;
224 case LEX_EXPECT_OPERATOR:
225 switch (token) {
226 case '+':
227 case '-':
228 case '*':
229 case '/':
230 lvNext = malloc(sizeof(struct LexVector));
231 lvNext->next = lv->next;
232 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
233 lv->next = lvNext;
234 lv = _lexOperator(lv->next, token);
235 state = LEX_ROOT;
236 break;
237 default:
238 state = LEX_ERROR;
239 }
240 break;
241 case LEX_ERROR:
242 // This shouldn't be reached
243 break;
244 }
245 }
246
247 switch (state) {
248 case LEX_EXPECT_DECIMAL:
249 case LEX_EXPECT_HEX:
250 lv->token.type = TOKEN_UINT_TYPE;
251 lv->token.uintValue = next;
252 break;
253 case LEX_EXPECT_IDENTIFIER:
254 lv->token.type = TOKEN_IDENTIFIER_TYPE;
255 lv->token.identifierValue = _strndup(tokenStart, string - tokenStart);
256 break;
257 case LEX_EXPECT_OPERATOR:
258 lvNext = malloc(sizeof(struct LexVector));
259 lvNext->next = lv->next;
260 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
261 lv->next = lvNext;
262 break;
263 case LEX_ERROR:
264 default:
265 lv->token.type = TOKEN_ERROR_TYPE;
266 break;
267 }
268 return adjusted;
269}
270
271static const int _operatorPrecedence[] = {
272 2,
273 1,
274 1,
275 0,
276 0
277};
278
279static struct ParseTree* _parseTreeCreate() {
280 struct ParseTree* tree = malloc(sizeof(struct ParseTree));
281 tree->token.type = TOKEN_ERROR_TYPE;
282 tree->rhs = 0;
283 tree->lhs = 0;
284 return tree;
285}
286
287static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
288 struct ParseTree* newTree = 0;
289 while (lv) {
290 int newPrecedence;
291 switch (lv->token.type) {
292 case TOKEN_IDENTIFIER_TYPE:
293 case TOKEN_UINT_TYPE:
294 if (tree->token.type == TOKEN_ERROR_TYPE) {
295 tree->token = lv->token;
296 lv = lv->next;
297 } else {
298 tree->token.type = TOKEN_ERROR_TYPE;
299 return 0;
300 }
301 break;
302 case TOKEN_OPEN_PAREN_TYPE:
303 lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
304 break;
305 case TOKEN_CLOSE_PAREN_TYPE:
306 if (openParens <= 0) {
307 tree->token.type = TOKEN_ERROR_TYPE;
308 return 0;
309 }
310 return lv->next;
311 break;
312 case TOKEN_OPERATOR_TYPE:
313 newPrecedence = _operatorPrecedence[lv->token.operatorValue];
314 if (newPrecedence < precedence) {
315 newTree = _parseTreeCreate();
316 *newTree = *tree;
317 tree->lhs = newTree;
318 tree->rhs = _parseTreeCreate();
319 tree->token = lv->token;
320 lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
321 if (tree->token.type == TOKEN_ERROR_TYPE) {
322 tree->token.type = TOKEN_ERROR_TYPE;
323 }
324 } else {
325 return lv;
326 }
327 break;
328 case TOKEN_ERROR_TYPE:
329 tree->token.type = TOKEN_ERROR_TYPE;
330 return 0;
331 }
332 }
333
334 return 0;
335}
336
337void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
338 if (!tree) {
339 return;
340 }
341
342 tree->token.type = TOKEN_ERROR_TYPE;
343 tree->lhs = 0;
344 tree->rhs = 0;
345
346 _parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
347}
348
349void lexFree(struct LexVector* lv) {
350 while (lv) {
351 struct LexVector* lvNext = lv->next;
352 free(lv);
353 lv = lvNext;
354 }
355}
356
357void parseFree(struct ParseTree* tree) {
358 if (!tree) {
359 return;
360 }
361
362 parseFree(tree->lhs);
363 parseFree(tree->rhs);
364
365 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
366 free(tree->token.identifierValue);
367 }
368 free(tree);
369}