src/debugger/parser.c (view raw)
1#include "parser.h"
2
3static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
4 struct LexVector* lvNext = malloc(sizeof(struct LexVector));
5 lvNext->token.type = TOKEN_OPERATOR_TYPE;
6 switch (operator) {
7 case '+':
8 lvNext->token.operatorValue = OP_ADD;
9 break;
10 case '-':
11 lvNext->token.operatorValue = OP_SUBTRACT;
12 break;
13 case '*':
14 lvNext->token.operatorValue = OP_MULTIPLY;
15 break;
16 case '/':
17 lvNext->token.operatorValue = OP_DIVIDE;
18 break;
19 default:
20 lvNext->token.type = TOKEN_ERROR_TYPE;
21 break;
22 }
23 lvNext->next = lv->next;
24 lv->next = lvNext;
25 lv = lvNext;
26 lvNext = malloc(sizeof(struct LexVector));
27 lvNext->next = lv->next;
28 lvNext->token.type = TOKEN_ERROR_TYPE;
29 lv->next = lvNext;
30 return lvNext;
31}
32
33size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
34 if (!string || length < 1) {
35 return 0;
36 }
37
38 uint32_t next = 0;
39 size_t adjusted = 0;
40
41 enum LexState state = LEX_ROOT;
42 const char* tokenStart = 0;
43 struct LexVector* lvNext;
44
45 while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
46 char token = string[0];
47 ++string;
48 ++adjusted;
49 --length;
50 switch (state) {
51 case LEX_ROOT:
52 tokenStart = string - 1;
53 switch (token) {
54 case '1':
55 case '2':
56 case '3':
57 case '4':
58 case '5':
59 case '6':
60 case '7':
61 case '8':
62 case '9':
63 state = LEX_EXPECT_DECIMAL;
64 next = token - '0';
65 break;
66 case '0':
67 state = LEX_EXPECT_PREFIX;
68 break;
69 case '$':
70 state = LEX_EXPECT_HEX;
71 next = 0;
72 break;
73 case '(':
74 state = LEX_ROOT;
75 lv->token.type = TOKEN_OPEN_PAREN_TYPE;
76 lvNext = malloc(sizeof(struct LexVector));
77 lvNext->next = lv->next;
78 lvNext->token.type = TOKEN_ERROR_TYPE;
79 lv->next = lvNext;
80 lv = lvNext;
81 break;
82 default:
83 if (tolower(token) >= 'a' && tolower(token <= 'z')) {
84 state = LEX_EXPECT_IDENTIFIER;
85 } else {
86 state = LEX_ERROR;
87 }
88 break;
89 };
90 break;
91 case LEX_EXPECT_IDENTIFIER:
92 switch (token) {
93 case '+':
94 case '-':
95 case '*':
96 case '/':
97 lv->token.type = TOKEN_IDENTIFIER_TYPE;
98 lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
99 lv = _lexOperator(lv, token);
100 state = LEX_ROOT;
101 break;
102 case ')':
103 lv->token.type = TOKEN_IDENTIFIER_TYPE;
104 lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
105 state = LEX_EXPECT_OPERATOR;
106 break;
107 default:
108 break;
109 }
110 break;
111 case LEX_EXPECT_DECIMAL:
112 switch (token) {
113 case '0':
114 case '1':
115 case '2':
116 case '3':
117 case '4':
118 case '5':
119 case '6':
120 case '7':
121 case '8':
122 case '9':
123 // TODO: handle overflow
124 next *= 10;
125 next += token - '0';
126 break;
127 case '+':
128 case '-':
129 case '*':
130 case '/':
131 lv->token.type = TOKEN_UINT_TYPE;
132 lv->token.uintValue = next;
133 lv = _lexOperator(lv, token);
134 state = LEX_ROOT;
135 break;
136 case ')':
137 lv->token.type = TOKEN_UINT_TYPE;
138 lv->token.uintValue = next;
139 state = LEX_EXPECT_OPERATOR;
140 break;
141 default:
142 state = LEX_ERROR;
143 }
144 break;
145 case LEX_EXPECT_HEX:
146 switch (token) {
147 case '0':
148 case '1':
149 case '2':
150 case '3':
151 case '4':
152 case '5':
153 case '6':
154 case '7':
155 case '8':
156 case '9':
157 // TODO: handle overflow
158 next *= 16;
159 next += token - '0';
160 break;
161 case 'A':
162 case 'B':
163 case 'C':
164 case 'D':
165 case 'E':
166 case 'F':
167 // TODO: handle overflow
168 next *= 16;
169 next += token - 'A' + 10;
170 break;
171 case 'a':
172 case 'b':
173 case 'c':
174 case 'd':
175 case 'e':
176 case 'f':
177 // TODO: handle overflow
178 next *= 16;
179 next += token - 'a' + 10;
180 break;
181 case '+':
182 case '-':
183 case '*':
184 case '/':
185 lv->token.type = TOKEN_UINT_TYPE;
186 lv->token.uintValue = next;
187 lv = _lexOperator(lv, token);
188 state = LEX_ROOT;
189 break;
190 case ')':
191 lv->token.type = TOKEN_UINT_TYPE;
192 lv->token.uintValue = next;
193 state = LEX_EXPECT_OPERATOR;
194 break;
195 default:
196 state = LEX_ERROR;
197 break;
198 }
199 break;
200 case LEX_EXPECT_PREFIX:
201 switch (token) {
202 case 'X':
203 case 'x':
204 next = 0;
205 state = LEX_EXPECT_HEX;
206 break;
207 default:
208 state = LEX_ERROR;
209 break;
210 }
211 break;
212 case LEX_EXPECT_OPERATOR:
213 switch (token) {
214 case '+':
215 case '-':
216 case '*':
217 case '/':
218 lvNext = malloc(sizeof(struct LexVector));
219 lvNext->next = lv->next;
220 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
221 lv->next = lvNext;
222 lv = _lexOperator(lv->next, token);
223 state = LEX_ROOT;
224 break;
225 default:
226 state = LEX_ERROR;
227 }
228 break;
229 case LEX_ERROR:
230 // This shouldn't be reached
231 break;
232 }
233 }
234
235 switch (state) {
236 case LEX_EXPECT_DECIMAL:
237 case LEX_EXPECT_HEX:
238 lv->token.type = TOKEN_UINT_TYPE;
239 lv->token.uintValue = next;
240 break;
241 case LEX_EXPECT_IDENTIFIER:
242 lv->token.type = TOKEN_IDENTIFIER_TYPE;
243 lv->token.identifierValue = strndup(tokenStart, string - tokenStart);
244 break;
245 case LEX_EXPECT_OPERATOR:
246 lvNext = malloc(sizeof(struct LexVector));
247 lvNext->next = lv->next;
248 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
249 lv->next = lvNext;
250 break;
251 case LEX_ERROR:
252 default:
253 lv->token.type = TOKEN_ERROR_TYPE;
254 break;
255 }
256 return adjusted;
257}
258
259static const int _operatorPrecedence[] = {
260 2,
261 1,
262 1,
263 0,
264 0
265};
266
267static struct ParseTree* _parseTreeCreate() {
268 struct ParseTree* tree = malloc(sizeof(struct ParseTree));
269 tree->token.type = TOKEN_ERROR_TYPE;
270 tree->rhs = 0;
271 tree->lhs = 0;
272 return tree;
273}
274
275static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
276 struct ParseTree* newTree = 0;
277 while (lv) {
278 int newPrecedence;
279 switch (lv->token.type) {
280 case TOKEN_IDENTIFIER_TYPE:
281 case TOKEN_UINT_TYPE:
282 if (tree->token.type == TOKEN_ERROR_TYPE) {
283 tree->token = lv->token;
284 lv = lv->next;
285 } else {
286 tree->token.type = TOKEN_ERROR_TYPE;
287 return 0;
288 }
289 break;
290 case TOKEN_OPEN_PAREN_TYPE:
291 lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
292 break;
293 case TOKEN_CLOSE_PAREN_TYPE:
294 if (openParens <= 0) {
295 tree->token.type = TOKEN_ERROR_TYPE;
296 return 0;
297 }
298 return lv->next;
299 break;
300 case TOKEN_OPERATOR_TYPE:
301 newPrecedence = _operatorPrecedence[lv->token.operatorValue];
302 if (newPrecedence < precedence) {
303 newTree = _parseTreeCreate();
304 *newTree = *tree;
305 tree->lhs = newTree;
306 tree->rhs = _parseTreeCreate();
307 tree->token = lv->token;
308 lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
309 if (tree->token.type == TOKEN_ERROR_TYPE) {
310 tree->token.type = TOKEN_ERROR_TYPE;
311 }
312 } else {
313 return lv;
314 }
315 break;
316 case TOKEN_ERROR_TYPE:
317 tree->token.type = TOKEN_ERROR_TYPE;
318 return 0;
319 }
320 }
321
322 return 0;
323}
324
325void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
326 if (!tree) {
327 return;
328 }
329
330 tree->token.type = TOKEN_ERROR_TYPE;
331 tree->lhs = 0;
332 tree->rhs = 0;
333
334 _parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
335}
336
337void lexFree(struct LexVector* lv) {
338 while (lv) {
339 struct LexVector* lvNext = lv->next;
340 free(lv);
341 lv = lvNext;
342 }
343}
344
345void parseFree(struct ParseTree* tree) {
346 if (!tree) {
347 return;
348 }
349
350 parseFree(tree->lhs);
351 parseFree(tree->rhs);
352
353 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
354 free(tree->token.identifierValue);
355 }
356 free(tree);
357}