src/debugger/parser.c (view raw)
1#include "parser.h"
2
3static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
4 struct LexVector* lvNext = malloc(sizeof(struct LexVector));
5 lvNext->token.type = TOKEN_OPERATOR_TYPE;
6 switch (operator) {
7 case '+':
8 lvNext->token.operatorValue = OP_ADD;
9 break;
10 case '-':
11 lvNext->token.operatorValue = OP_SUBTRACT;
12 break;
13 case '*':
14 lvNext->token.operatorValue = OP_MULTIPLY;
15 break;
16 case '/':
17 lvNext->token.operatorValue = OP_DIVIDE;
18 break;
19 default:
20 lvNext->token.type = TOKEN_ERROR_TYPE;
21 break;
22 }
23 lvNext->next = lv->next;
24 lv->next = lvNext;
25 lv = lvNext;
26 lvNext = malloc(sizeof(struct LexVector));
27 lvNext->next = lv->next;
28 lvNext->token.type = TOKEN_ERROR_TYPE;
29 lv->next = lvNext;
30 return lvNext;
31}
32
33size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
34 if (!string || length < 1) {
35 return 0;
36 }
37
38 uint32_t next = 0;
39 size_t adjusted = 0;
40
41 enum LexState state = LEX_ROOT;
42 const char* tokenStart = 0;
43 struct LexVector* lvNext;
44
45 while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
46 char token = string[0];
47 ++string;
48 ++adjusted;
49 --length;
50 switch (state) {
51 case LEX_ROOT:
52 tokenStart = string - 1;
53 switch (token) {
54 case '1':
55 case '2':
56 case '3':
57 case '4':
58 case '5':
59 case '6':
60 case '7':
61 case '8':
62 case '9':
63 state = LEX_EXPECT_DECIMAL;
64 next = token - '0';
65 break;
66 case '0':
67 state = LEX_EXPECT_PREFIX;
68 break;
69 case '$':
70 state = LEX_EXPECT_HEX;
71 next = 0;
72 break;
73 case '(':
74 state = LEX_ROOT;
75 lv->token.type = TOKEN_OPEN_PAREN_TYPE;
76 lvNext = malloc(sizeof(struct LexVector));
77 lvNext->next = lv->next;
78 lvNext->token.type = TOKEN_ERROR_TYPE;
79 lv->next = lvNext;
80 lv = lvNext;
81 break;
82 default:
83 if (tolower(token) >= 'a' && tolower(token <= 'z')) {
84 state = LEX_EXPECT_IDENTIFIER;
85 } else {
86 state = LEX_ERROR;
87 }
88 break;
89 };
90 break;
91 case LEX_EXPECT_IDENTIFIER:
92 switch (token) {
93 case '+':
94 case '-':
95 case '*':
96 case '/':
97 lv->token.type = TOKEN_IDENTIFIER_TYPE;
98 lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
99 lv = _lexOperator(lv, token);
100 state = LEX_ROOT;
101 break;
102 case ')':
103 lv->token.type = TOKEN_IDENTIFIER_TYPE;
104 lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
105 state = LEX_EXPECT_OPERATOR;
106 break;
107 default:
108 break;
109 }
110 break;
111 case LEX_EXPECT_DECIMAL:
112 switch (token) {
113 case '0':
114 case '1':
115 case '2':
116 case '3':
117 case '4':
118 case '5':
119 case '6':
120 case '7':
121 case '8':
122 case '9':
123 // TODO: handle overflow
124 next *= 10;
125 next += token - '0';
126 break;
127 case '+':
128 case '-':
129 case '*':
130 case '/':
131 lv->token.type = TOKEN_UINT_TYPE;
132 lv->token.uintValue = next;
133 lv = _lexOperator(lv, token);
134 state = LEX_ROOT;
135 break;
136 case ')':
137 lv->token.type = TOKEN_UINT_TYPE;
138 lv->token.uintValue = next;
139 state = LEX_EXPECT_OPERATOR;
140 break;
141 default:
142 state = LEX_ERROR;
143 }
144 break;
145 case LEX_EXPECT_HEX:
146 switch (token) {
147 case '0':
148 case '1':
149 case '2':
150 case '3':
151 case '4':
152 case '5':
153 case '6':
154 case '7':
155 case '8':
156 case '9':
157 // TODO: handle overflow
158 next *= 16;
159 next += token - '0';
160 break;
161 case 'A':
162 case 'B':
163 case 'C':
164 case 'D':
165 case 'E':
166 case 'F':
167 // TODO: handle overflow
168 next *= 16;
169 next += token - 'A' + 10;
170 break;
171 case 'a':
172 case 'b':
173 case 'c':
174 case 'd':
175 case 'e':
176 case 'f':
177 // TODO: handle overflow
178 next *= 16;
179 next += token - 'a' + 10;
180 break;
181 case '+':
182 case '-':
183 case '*':
184 case '/':
185 lv->token.type = TOKEN_UINT_TYPE;
186 lv->token.uintValue = next;
187 lv = _lexOperator(lv, token);
188 state = LEX_ROOT;
189 break;
190 case ')':
191 lv->token.type = TOKEN_UINT_TYPE;
192 lv->token.uintValue = next;
193 state = LEX_EXPECT_OPERATOR;
194 break;
195 default:
196 state = LEX_ERROR;
197 break;
198 }
199 break;
200 case LEX_EXPECT_PREFIX:
201 switch (token) {
202 case 'X':
203 case 'x':
204 next = 0;
205 state = LEX_EXPECT_HEX;
206 break;
207 default:
208 state = LEX_ERROR;
209 break;
210 }
211 break;
212 case LEX_EXPECT_OPERATOR:
213 switch (token) {
214 case '+':
215 case '-':
216 case '*':
217 case '/':
218 lvNext = malloc(sizeof(struct LexVector));
219 lvNext->next = lv->next;
220 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
221 lv->next = lvNext;
222 lv = _lexOperator(lv->next, token);
223 state = LEX_ROOT;
224 break;
225 default:
226 state = LEX_ERROR;
227 }
228 break;
229 case LEX_ERROR:
230 // This shouldn't be reached
231 break;
232 }
233 }
234
235 switch (state) {
236 case LEX_EXPECT_DECIMAL:
237 case LEX_EXPECT_HEX:
238 lv->token.type = TOKEN_UINT_TYPE;
239 lv->token.uintValue = next;
240 break;
241 case LEX_EXPECT_IDENTIFIER:
242 lv->token.type = TOKEN_IDENTIFIER_TYPE;
243 lv->token.identifierValue = strndup(tokenStart, string - tokenStart);
244 break;
245 case LEX_EXPECT_OPERATOR:
246 lvNext = malloc(sizeof(struct LexVector));
247 lvNext->next = lv->next;
248 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
249 lv->next = lvNext;
250 break;
251 case LEX_ERROR:
252 default:
253 lv->token.type = TOKEN_ERROR_TYPE;
254 break;
255 }
256 return adjusted;
257}
258
259static const int _operatorPrecedence[] = {
260 2,
261 1,
262 1,
263 0
264};
265
266static struct ParseTree* _parseTreeCreate() {
267 struct ParseTree* tree = malloc(sizeof(struct ParseTree));
268 tree->token.type = TOKEN_ERROR_TYPE;
269 tree->rhs = 0;
270 tree->lhs = 0;
271 return tree;
272}
273
274static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
275 struct ParseTree* newTree = 0;
276 while (lv) {
277 int newPrecedence;
278 switch (lv->token.type) {
279 case TOKEN_IDENTIFIER_TYPE:
280 case TOKEN_UINT_TYPE:
281 if (tree->token.type == TOKEN_ERROR_TYPE) {
282 tree->token = lv->token;
283 lv = lv->next;
284 } else {
285 tree->token.type = TOKEN_ERROR_TYPE;
286 return 0;
287 }
288 break;
289 case TOKEN_OPEN_PAREN_TYPE:
290 lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
291 break;
292 case TOKEN_CLOSE_PAREN_TYPE:
293 if (openParens <= 0) {
294 tree->token.type = TOKEN_ERROR_TYPE;
295 return 0;
296 }
297 return lv->next;
298 break;
299 case TOKEN_OPERATOR_TYPE:
300 newPrecedence = _operatorPrecedence[lv->token.operatorValue];
301 if (newPrecedence < precedence) {
302 newTree = _parseTreeCreate();
303 *newTree = *tree;
304 tree->lhs = newTree;
305 tree->rhs = _parseTreeCreate();
306 tree->token = lv->token;
307 lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
308 if (tree->token.type == TOKEN_ERROR_TYPE) {
309 tree->token.type = TOKEN_ERROR_TYPE;
310 }
311 } else {
312 return lv;
313 }
314 break;
315 case TOKEN_ERROR_TYPE:
316 tree->token.type = TOKEN_ERROR_TYPE;
317 return 0;
318 }
319 }
320
321 return 0;
322}
323
324void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
325 if (!tree) {
326 return;
327 }
328
329 tree->token.type = TOKEN_ERROR_TYPE;
330 tree->lhs = 0;
331 tree->rhs = 0;
332
333 _parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
334}
335
336void lexFree(struct LexVector* lv) {
337 while (lv) {
338 struct LexVector* lvNext = lv->next;
339 free(lv);
340 lv = lvNext;
341 }
342}
343
344void parseFree(struct ParseTree* tree) {
345 if (!tree) {
346 return;
347 }
348
349 parseFree(tree->lhs);
350 parseFree(tree->rhs);
351
352 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
353 free(tree->token.identifierValue);
354 }
355 free(tree);
356}