src/debugger/parser.c (view raw)
1/* Copyright (c) 2013-2014 Jeffrey Pfau
2 *
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6#include "parser.h"
7
8#include "util/string.h"
9
10static struct LexVector* _lexOperator(struct LexVector* lv, char operator) {
11 struct LexVector* lvNext = malloc(sizeof(struct LexVector));
12 lvNext->token.type = TOKEN_OPERATOR_TYPE;
13 switch (operator) {
14 case '+':
15 lvNext->token.operatorValue = OP_ADD;
16 break;
17 case '-':
18 lvNext->token.operatorValue = OP_SUBTRACT;
19 break;
20 case '*':
21 lvNext->token.operatorValue = OP_MULTIPLY;
22 break;
23 case '/':
24 lvNext->token.operatorValue = OP_DIVIDE;
25 break;
26 default:
27 lvNext->token.type = TOKEN_ERROR_TYPE;
28 break;
29 }
30 lvNext->next = lv->next;
31 lv->next = lvNext;
32 lv = lvNext;
33 lvNext = malloc(sizeof(struct LexVector));
34 lvNext->next = lv->next;
35 lvNext->token.type = TOKEN_ERROR_TYPE;
36 lv->next = lvNext;
37 return lvNext;
38}
39
40size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
41 if (!string || length < 1) {
42 return 0;
43 }
44
45 uint32_t next = 0;
46 size_t adjusted = 0;
47
48 enum LexState state = LEX_ROOT;
49 const char* tokenStart = 0;
50 struct LexVector* lvNext;
51
52 while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
53 char token = string[0];
54 ++string;
55 ++adjusted;
56 --length;
57 switch (state) {
58 case LEX_ROOT:
59 tokenStart = string - 1;
60 switch (token) {
61 case '1':
62 case '2':
63 case '3':
64 case '4':
65 case '5':
66 case '6':
67 case '7':
68 case '8':
69 case '9':
70 state = LEX_EXPECT_DECIMAL;
71 next = token - '0';
72 break;
73 case '0':
74 state = LEX_EXPECT_PREFIX;
75 next = 0;
76 break;
77 case '$':
78 state = LEX_EXPECT_HEX;
79 next = 0;
80 break;
81 case '(':
82 state = LEX_ROOT;
83 lv->token.type = TOKEN_OPEN_PAREN_TYPE;
84 lvNext = malloc(sizeof(struct LexVector));
85 lvNext->next = lv->next;
86 lvNext->token.type = TOKEN_ERROR_TYPE;
87 lv->next = lvNext;
88 lv = lvNext;
89 break;
90 default:
91 if (tolower(token) >= 'a' && tolower(token <= 'z')) {
92 state = LEX_EXPECT_IDENTIFIER;
93 } else {
94 state = LEX_ERROR;
95 }
96 break;
97 };
98 break;
99 case LEX_EXPECT_IDENTIFIER:
100 switch (token) {
101 case '+':
102 case '-':
103 case '*':
104 case '/':
105 lv->token.type = TOKEN_IDENTIFIER_TYPE;
106 lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
107 lv = _lexOperator(lv, token);
108 state = LEX_ROOT;
109 break;
110 case ')':
111 lv->token.type = TOKEN_IDENTIFIER_TYPE;
112 lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1);
113 state = LEX_EXPECT_OPERATOR;
114 break;
115 default:
116 break;
117 }
118 break;
119 case LEX_EXPECT_BINARY:
120 switch (token) {
121 case '0':
122 case '1':
123 // TODO: handle overflow
124 next <<= 1;
125 next += token - '0';
126 break;
127 case '+':
128 case '-':
129 case '*':
130 case '/':
131 lv->token.type = TOKEN_UINT_TYPE;
132 lv->token.uintValue = next;
133 lv = _lexOperator(lv, token);
134 state = LEX_ROOT;
135 break;
136 case ')':
137 lv->token.type = TOKEN_UINT_TYPE;
138 lv->token.uintValue = next;
139 state = LEX_EXPECT_OPERATOR;
140 break;
141 default:
142 state = LEX_ERROR;
143 break;
144 }
145 break;
146 case LEX_EXPECT_DECIMAL:
147 switch (token) {
148 case '0':
149 case '1':
150 case '2':
151 case '3':
152 case '4':
153 case '5':
154 case '6':
155 case '7':
156 case '8':
157 case '9':
158 // TODO: handle overflow
159 next *= 10;
160 next += token - '0';
161 break;
162 case '+':
163 case '-':
164 case '*':
165 case '/':
166 lv->token.type = TOKEN_UINT_TYPE;
167 lv->token.uintValue = next;
168 lv = _lexOperator(lv, token);
169 state = LEX_ROOT;
170 break;
171 case ')':
172 lv->token.type = TOKEN_UINT_TYPE;
173 lv->token.uintValue = next;
174 state = LEX_EXPECT_OPERATOR;
175 break;
176 default:
177 state = LEX_ERROR;
178 }
179 break;
180 case LEX_EXPECT_HEX:
181 switch (token) {
182 case '0':
183 case '1':
184 case '2':
185 case '3':
186 case '4':
187 case '5':
188 case '6':
189 case '7':
190 case '8':
191 case '9':
192 // TODO: handle overflow
193 next *= 16;
194 next += token - '0';
195 break;
196 case 'A':
197 case 'B':
198 case 'C':
199 case 'D':
200 case 'E':
201 case 'F':
202 // TODO: handle overflow
203 next *= 16;
204 next += token - 'A' + 10;
205 break;
206 case 'a':
207 case 'b':
208 case 'c':
209 case 'd':
210 case 'e':
211 case 'f':
212 // TODO: handle overflow
213 next *= 16;
214 next += token - 'a' + 10;
215 break;
216 case '+':
217 case '-':
218 case '*':
219 case '/':
220 lv->token.type = TOKEN_UINT_TYPE;
221 lv->token.uintValue = next;
222 lv = _lexOperator(lv, token);
223 state = LEX_ROOT;
224 break;
225 case ')':
226 lv->token.type = TOKEN_UINT_TYPE;
227 lv->token.uintValue = next;
228 state = LEX_EXPECT_OPERATOR;
229 break;
230 default:
231 state = LEX_ERROR;
232 break;
233 }
234 break;
235 case LEX_EXPECT_PREFIX:
236 switch (token) {
237 case 'X':
238 case 'x':
239 next = 0;
240 state = LEX_EXPECT_HEX;
241 break;
242 case 'B':
243 case 'b':
244 next = 0;
245 state = LEX_EXPECT_BINARY;
246 break;
247 case '+':
248 case '-':
249 case '*':
250 case '/':
251 lv->token.type = TOKEN_UINT_TYPE;
252 lv->token.uintValue = next;
253 lv = _lexOperator(lv, token);
254 state = LEX_ROOT;
255 break;
256 case ')':
257 lv->token.type = TOKEN_UINT_TYPE;
258 lv->token.uintValue = next;
259 state = LEX_EXPECT_OPERATOR;
260 break;
261 case '0':
262 case '1':
263 case '2':
264 case '3':
265 case '4':
266 case '5':
267 case '6':
268 case '7':
269 case '8':
270 case '9':
271 next = token - '0';
272 state = LEX_EXPECT_DECIMAL;
273 break;
274 default:
275 state = LEX_ERROR;
276 }
277 break;
278 case LEX_EXPECT_OPERATOR:
279 switch (token) {
280 case '+':
281 case '-':
282 case '*':
283 case '/':
284 lvNext = malloc(sizeof(struct LexVector));
285 lvNext->next = lv->next;
286 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
287 lv->next = lvNext;
288 lv = _lexOperator(lv->next, token);
289 state = LEX_ROOT;
290 break;
291 default:
292 state = LEX_ERROR;
293 }
294 break;
295 case LEX_ERROR:
296 // This shouldn't be reached
297 break;
298 }
299 }
300
301 switch (state) {
302 case LEX_EXPECT_BINARY:
303 case LEX_EXPECT_DECIMAL:
304 case LEX_EXPECT_HEX:
305 case LEX_EXPECT_PREFIX:
306 lv->token.type = TOKEN_UINT_TYPE;
307 lv->token.uintValue = next;
308 break;
309 case LEX_EXPECT_IDENTIFIER:
310 lv->token.type = TOKEN_IDENTIFIER_TYPE;
311 lv->token.identifierValue = strndup(tokenStart, string - tokenStart);
312 break;
313 case LEX_EXPECT_OPERATOR:
314 lvNext = malloc(sizeof(struct LexVector));
315 lvNext->next = lv->next;
316 lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
317 lv->next = lvNext;
318 break;
319 case LEX_ERROR:
320 default:
321 lv->token.type = TOKEN_ERROR_TYPE;
322 break;
323 }
324 return adjusted;
325}
326
327static const int _operatorPrecedence[] = {
328 2,
329 1,
330 1,
331 0,
332 0
333};
334
335static struct ParseTree* _parseTreeCreate() {
336 struct ParseTree* tree = malloc(sizeof(struct ParseTree));
337 tree->token.type = TOKEN_ERROR_TYPE;
338 tree->rhs = 0;
339 tree->lhs = 0;
340 return tree;
341}
342
343static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) {
344 struct ParseTree* newTree = 0;
345 while (lv) {
346 int newPrecedence;
347 switch (lv->token.type) {
348 case TOKEN_IDENTIFIER_TYPE:
349 case TOKEN_UINT_TYPE:
350 if (tree->token.type == TOKEN_ERROR_TYPE) {
351 tree->token = lv->token;
352 lv = lv->next;
353 } else {
354 tree->token.type = TOKEN_ERROR_TYPE;
355 return 0;
356 }
357 break;
358 case TOKEN_OPEN_PAREN_TYPE:
359 lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1);
360 break;
361 case TOKEN_CLOSE_PAREN_TYPE:
362 if (openParens <= 0) {
363 tree->token.type = TOKEN_ERROR_TYPE;
364 return 0;
365 }
366 return lv->next;
367 break;
368 case TOKEN_OPERATOR_TYPE:
369 newPrecedence = _operatorPrecedence[lv->token.operatorValue];
370 if (newPrecedence < precedence) {
371 newTree = _parseTreeCreate();
372 *newTree = *tree;
373 tree->lhs = newTree;
374 tree->rhs = _parseTreeCreate();
375 tree->token = lv->token;
376 lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens);
377 if (tree->token.type == TOKEN_ERROR_TYPE) {
378 tree->token.type = TOKEN_ERROR_TYPE;
379 }
380 } else {
381 return lv;
382 }
383 break;
384 case TOKEN_ERROR_TYPE:
385 tree->token.type = TOKEN_ERROR_TYPE;
386 return 0;
387 }
388 }
389
390 return 0;
391}
392
393void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
394 if (!tree) {
395 return;
396 }
397
398 tree->token.type = TOKEN_ERROR_TYPE;
399 tree->lhs = 0;
400 tree->rhs = 0;
401
402 _parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN], 0);
403}
404
405void lexFree(struct LexVector* lv) {
406 while (lv) {
407 struct LexVector* lvNext = lv->next;
408 free(lv);
409 lv = lvNext;
410 }
411}
412
413void parseFree(struct ParseTree* tree) {
414 if (!tree) {
415 return;
416 }
417
418 parseFree(tree->lhs);
419 parseFree(tree->rhs);
420
421 if (tree->token.type == TOKEN_IDENTIFIER_TYPE) {
422 free(tree->token.identifierValue);
423 }
424 free(tree);
425}