Proper parser, without identifier support currently
Jeffrey Pfau jeffrey@endrift.com
Sat, 12 Jul 2014 17:12:35 -0700
4 files changed,
408 insertions(+),
335 deletions(-)
M
CMakeLists.txt
→
CMakeLists.txt
@@ -45,7 +45,8 @@
if(USE_CLI_DEBUGGER AND NOT WIN32) # Win32 doesn't have a usable command line, nor libedit, so this is useless on Windows add_definitions(-DUSE_CLI_DEBUGGER) - set(DEBUGGER_SRC "${DEBUGGER_SRC};${CMAKE_SOURCE_DIR}/src/debugger/cli-debugger.c") + list(APPEND DEBUGGER_SRC "${CMAKE_SOURCE_DIR}/src/debugger/cli-debugger.c") + list(APPEND DEBUGGER_SRC "${CMAKE_SOURCE_DIR}/src/debugger/parser.c") set(DEBUGGER_LIB "edit") else() set(DEBUGGER_LIB "")
M
src/debugger/cli-debugger.c
→
src/debugger/cli-debugger.c
@@ -1,5 +1,6 @@
#include "cli-debugger.h" #include "decoder.h" +#include "parser.h" #include <signal.h>@@ -10,9 +11,9 @@
struct DebugVector { struct DebugVector* next; enum DVType { - ERROR_TYPE, - INT_TYPE, - CHAR_TYPE + DV_ERROR_TYPE, + DV_INT_TYPE, + DV_CHAR_TYPE } type; union { int32_t intValue;@@ -132,14 +133,14 @@ } else {
wordSize = WORD_SIZE_THUMB; } - if (!dv || dv->type != INT_TYPE) { + if (!dv || dv->type != DV_INT_TYPE) { address = debugger->d.cpu->gprs[ARM_PC] - wordSize; } else { address = dv->intValue; dv = dv->next; } - if (!dv || dv->type != INT_TYPE) { + if (!dv || dv->type != DV_INT_TYPE) { size = 1; } else { size = dv->intValue;@@ -212,7 +213,7 @@ debugger->d.state = DEBUGGER_SHUTDOWN;
} static void _readByte(struct CLIDebugger* debugger, struct DebugVector* dv) { - if (!dv || dv->type != INT_TYPE) { + if (!dv || dv->type != DV_INT_TYPE) { printf("%s\n", ERROR_MISSING_ARGS); return; }@@ -222,7 +223,7 @@ printf(" 0x%02X\n", value);
} static void _readHalfword(struct CLIDebugger* debugger, struct DebugVector* dv) { - if (!dv || dv->type != INT_TYPE) { + if (!dv || dv->type != DV_INT_TYPE) { printf("%s\n", ERROR_MISSING_ARGS); return; }@@ -232,7 +233,7 @@ printf(" 0x%04X\n", value);
} static void _readWord(struct CLIDebugger* debugger, struct DebugVector* dv) { - if (!dv || dv->type != INT_TYPE) { + if (!dv || dv->type != DV_INT_TYPE) { printf("%s\n", ERROR_MISSING_ARGS); return; }@@ -242,7 +243,7 @@ printf(" 0x%08X\n", value);
} static void _setBreakpoint(struct CLIDebugger* debugger, struct DebugVector* dv) { - if (!dv || dv->type != INT_TYPE) { + if (!dv || dv->type != DV_INT_TYPE) { printf("%s\n", ERROR_MISSING_ARGS); return; }@@ -251,7 +252,7 @@ ARMDebuggerSetBreakpoint(&debugger->d, address);
} static void _clearBreakpoint(struct CLIDebugger* debugger, struct DebugVector* dv) { - if (!dv || dv->type != INT_TYPE) { + if (!dv || dv->type != DV_INT_TYPE) { printf("%s\n", ERROR_MISSING_ARGS); return; }@@ -260,7 +261,7 @@ ARMDebuggerClearBreakpoint(&debugger->d, address);
} static void _setWatchpoint(struct CLIDebugger* debugger, struct DebugVector* dv) { - if (!dv || dv->type != INT_TYPE) { + if (!dv || dv->type != DV_INT_TYPE) { printf("%s\n", ERROR_MISSING_ARGS); return; }@@ -273,334 +274,43 @@ UNUSED(signal);
ARMDebuggerEnter(&_activeDebugger->d, DEBUGGER_ENTER_MANUAL); } -enum _DVParseState { - PARSE_ERROR = -1, - PARSE_ROOT = 0, - PARSE_EXPECT_REGISTER, - PARSE_EXPECT_REGISTER_2, - PARSE_EXPECT_LR, - PARSE_EXPECT_PC, - PARSE_EXPECT_SP, - PARSE_EXPECT_DECIMAL, - PARSE_EXPECT_HEX, - PARSE_EXPECT_PREFIX, - PARSE_EXPECT_SUFFIX, -}; - -enum Operation { - ASSIGN, - ADD, - SUBTRACT, - MULTIPLY, - DIVIDE -}; - -static void _performOperation(enum Operation operation, uint32_t next, struct DebugVector* dv) { +static uint32_t _performOperation(enum Operation operation, uint32_t current, uint32_t next, struct DebugVector* dv) { switch (operation) { - case ASSIGN: - dv->intValue = next; + case OP_ASSIGN: + current = next; break; - case ADD: - dv->intValue += next; + case OP_ADD: + current += next; break; - case SUBTRACT: - dv->intValue -= next; + case OP_SUBTRACT: + current -= next; break; - case MULTIPLY: - dv->intValue *= next; + case OP_MULTIPLY: + current *= next; break; - case DIVIDE: + case OP_DIVIDE: if (next != 0) { - dv->intValue /= next; + current /= next; } else { - dv->type = ERROR_TYPE; - return; + dv->type = DV_ERROR_TYPE; + return 0; } break; } + return current; } -static size_t _parseExpression(struct CLIDebugger* debugger, const char* string, size_t length, struct DebugVector* dv) { - if (!string || length < 1) { - return 0; - } - - uint32_t next = 0; - size_t adjusted = 0; - - enum _DVParseState state = PARSE_ROOT; - enum Operation operation = ASSIGN; - - while (length > 0 && string[0] && string[0] != ' ' && state != PARSE_ERROR) { - char token = string[0]; - ++string; - ++adjusted; - --length; - switch (state) { - case PARSE_ROOT: - switch (token) { - case 'r': - state = PARSE_EXPECT_REGISTER; - break; - case 'p': - state = PARSE_EXPECT_PC; - break; - case 's': - state = PARSE_EXPECT_SP; - break; - case 'l': - state = PARSE_EXPECT_LR; - break; - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - state = PARSE_EXPECT_DECIMAL; - next = token - '0'; - break; - case '0': - state = PARSE_EXPECT_PREFIX; - break; - case '$': - state = PARSE_EXPECT_HEX; - next = 0; - break; - default: - state = PARSE_ERROR; - break; - }; - break; - case PARSE_EXPECT_LR: - switch (token) { - case 'r': - next = debugger->d.cpu->gprs[ARM_LR]; - state = PARSE_EXPECT_SUFFIX; - break; - default: - state = PARSE_ERROR; - break; - } - break; - case PARSE_EXPECT_PC: - switch (token) { - case 'c': - next = debugger->d.cpu->gprs[ARM_PC]; - state = PARSE_EXPECT_SUFFIX; - break; - default: - state = PARSE_ERROR; - break; - } - break; - case PARSE_EXPECT_SP: - switch (token) { - case 'p': - next = debugger->d.cpu->gprs[ARM_SP]; - state = PARSE_EXPECT_SUFFIX; - break; - default: - state = PARSE_ERROR; - break; - } - break; - case PARSE_EXPECT_REGISTER: - switch (token) { - case '0': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - next = debugger->d.cpu->gprs[token - '0']; - state = PARSE_EXPECT_SUFFIX; - break; - case '1': - state = PARSE_EXPECT_REGISTER_2; - break; - default: - state = PARSE_ERROR; - break; - } - break; - case PARSE_EXPECT_REGISTER_2: - switch (token) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - next = debugger->d.cpu->gprs[token - '0' + 10]; - state = PARSE_EXPECT_SUFFIX; - break; - default: - state = PARSE_ERROR; - break; - } - break; - case PARSE_EXPECT_DECIMAL: - switch (token) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - // TODO: handle overflow - next *= 10; - next += token - '0'; - break; - case '+': - _performOperation(operation, next, dv); - next = 0; - state = PARSE_ROOT; - operation = ADD; - break; - case '-': - _performOperation(operation, next, dv); - next = 0; - state = PARSE_ROOT; - operation = SUBTRACT; - break; - case '*': - _performOperation(operation, next, dv); - next = 0; - state = PARSE_ROOT; - operation = MULTIPLY; - break; - case '/': - _performOperation(operation, next, dv); - next = 0; - state = PARSE_ROOT; - operation = DIVIDE; - break; - default: - state = PARSE_ERROR; - } - break; - case PARSE_EXPECT_HEX: - switch (token) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - // TODO: handle overflow - next *= 16; - next += token - '0'; - break; - case 'A': - case 'B': - case 'C': - case 'D': - case 'E': - case 'F': - // TODO: handle overflow - next *= 16; - next += token - 'A' + 10; - break; - case 'a': - case 'b': - case 'c': - case 'd': - case 'e': - case 'f': - // TODO: handle overflow - next *= 16; - next += token - 'a' + 10; - break; - case '+': - _performOperation(operation, next, dv); - next = 0; - state = PARSE_ROOT; - operation = ADD; - break; - case '-': - _performOperation(operation, next, dv); - next = 0; - state = PARSE_ROOT; - operation = SUBTRACT; - break; - case '*': - _performOperation(operation, next, dv); - next = 0; - state = PARSE_ROOT; - operation = MULTIPLY; - break; - case '/': - _performOperation(operation, next, dv); - next = 0; - state = PARSE_ROOT; - operation = DIVIDE; - break; - default: - state = PARSE_ERROR; - break; - } - break; - case PARSE_EXPECT_PREFIX: - switch (token) { - case 'X': - case 'x': - next = 0; - state = PARSE_EXPECT_HEX; - break; - default: - state = PARSE_ERROR; - break; - } - break; - case PARSE_EXPECT_SUFFIX: - _performOperation(operation, next, dv); - next = 0; - state = PARSE_ROOT; - switch (token) { - case '+': - operation = ADD; - break; - case '-': - operation = SUBTRACT; - break; - case '*': - operation = MULTIPLY; - break; - case '/': - operation = DIVIDE; - break; - default: - state = PARSE_ERROR; - break; - } - break; - case PARSE_ERROR: - // This shouldn't be reached - break; - } +static uint32_t _evaluateParseTree(struct ParseTree* tree, struct DebugVector* dv) { + switch (tree->token.type) { + case TOKEN_UINT_TYPE: + return tree->token.uintValue; + case TOKEN_OPERATOR_TYPE: + return _performOperation(tree->token.operatorValue, _evaluateParseTree(tree->lhs, dv), _evaluateParseTree(tree->rhs, dv), dv); + case TOKEN_IDENTIFIER_TYPE: + case TOKEN_ERROR_TYPE: + dv->type = DV_ERROR_TYPE; } - - if (state == PARSE_ERROR) { - dv->type = ERROR_TYPE; - } else { - _performOperation(operation, next, dv); - } - return adjusted; + return 0; } static struct DebugVector* _DVParse(struct CLIDebugger* debugger, const char* string, size_t length) {@@ -608,25 +318,39 @@ if (!string || length < 1) {
return 0; } - struct DebugVector dvTemp = { .type = INT_TYPE }; + struct DebugVector dvTemp = { .type = DV_INT_TYPE }; - size_t adjusted = _parseExpression(debugger, string, length, &dvTemp); + struct LexVector lv = { .next = 0 }; + size_t adjusted = lexExpression(&lv, string, length); if (adjusted > length) { - dvTemp.type = ERROR_TYPE; + dvTemp.type = DV_ERROR_TYPE; + lexFree(lv.next); } + + struct ParseTree tree; + parseLexedExpression(&tree, &lv); + if (tree.token.type == TOKEN_ERROR_TYPE) { + dvTemp.type = DV_ERROR_TYPE; + } else { + dvTemp.intValue = _evaluateParseTree(&tree, &dvTemp); + } + + parseFree(tree.lhs); + parseFree(tree.rhs); + length -= adjusted; string += adjusted; struct DebugVector* dv = malloc(sizeof(struct DebugVector)); - if (dvTemp.type == ERROR_TYPE) { - dv->type = ERROR_TYPE; + if (dvTemp.type == DV_ERROR_TYPE) { + dv->type = DV_ERROR_TYPE; dv->next = 0; } else { *dv = dvTemp; if (string[0] == ' ') { dv->next = _DVParse(debugger, string + 1, length - 1); - if (dv->next && dv->next->type == ERROR_TYPE) { - dv->type = ERROR_TYPE; + if (dv->next && dv->next->type == DV_ERROR_TYPE) { + dv->type = DV_ERROR_TYPE; } } }@@ -649,7 +373,7 @@ struct DebugVector* dv = 0;
if (firstSpace) { cmdLength = firstSpace - line; dv = _DVParse(debugger, firstSpace + 1, count - cmdLength - 1); - if (dv && dv->type == ERROR_TYPE) { + if (dv && dv->type == DV_ERROR_TYPE) { printf("Parse error\n"); _DVFree(dv); return 0;
A
src/debugger/parser.c
@@ -0,0 +1,293 @@
+#include "parser.h" + +static struct LexVector* _lexOperator(struct LexVector* lv, char operator) { + struct LexVector* lvNext = malloc(sizeof(struct LexVector)); + lvNext->token.type = TOKEN_OPERATOR_TYPE; + switch (operator) { + case '+': + lvNext->token.operatorValue = OP_ADD; + break; + case '-': + lvNext->token.operatorValue = OP_SUBTRACT; + break; + case '*': + lvNext->token.operatorValue = OP_MULTIPLY; + break; + case '/': + lvNext->token.operatorValue = OP_DIVIDE; + break; + default: + lvNext->token.type = TOKEN_ERROR_TYPE; + break; + } + lvNext->next = lv->next; + lv->next = lvNext; + lv = lvNext; + lvNext = malloc(sizeof(struct LexVector)); + lvNext->next = lv->next; + lv->next = lvNext; + return lvNext; +} + +size_t lexExpression(struct LexVector* lv, const char* string, size_t length) { + if (!string || length < 1) { + return 0; + } + + uint32_t next = 0; + size_t adjusted = 0; + + enum LexState state = LEX_ROOT; + const char* tokenStart = 0; + + while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) { + char token = string[0]; + ++string; + ++adjusted; + --length; + switch (state) { + case LEX_ROOT: + tokenStart = string - 1; + switch (token) { + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + state = LEX_EXPECT_DECIMAL; + next = token - '0'; + break; + case '0': + state = LEX_EXPECT_PREFIX; + break; + case '$': + state = LEX_EXPECT_HEX; + next = 0; + break; + default: + if (tolower(token) >= 'a' && tolower(token <= 'z')) { + state = LEX_EXPECT_IDENTIFIER; + } else { + state = LEX_ERROR; + } + break; + }; + break; + case LEX_EXPECT_IDENTIFIER: + switch (token) { + case '+': + case '-': + case '*': + case '/': + lv->token.type = TOKEN_IDENTIFIER_TYPE; + lv->token.identifierValue = strndup(tokenStart, string - tokenStart); + lv = _lexOperator(lv, token); + state = LEX_ROOT; + break; + default: + break; + } + break; + case LEX_EXPECT_DECIMAL: + switch (token) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + // TODO: handle overflow + next *= 10; + next += token - '0'; + break; + case '+': + case '-': + case '*': + case '/': + lv->token.type = TOKEN_UINT_TYPE; + lv->token.uintValue = next; + lv = _lexOperator(lv, token); + state = LEX_ROOT; + break; + default: + state = LEX_ERROR; + } + break; + case LEX_EXPECT_HEX: + switch (token) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + // TODO: handle overflow + next *= 16; + next += token - '0'; + break; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + // TODO: handle overflow + next *= 16; + next += token - 'A' + 10; + break; + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + // TODO: handle overflow + next *= 16; + next += token - 'a' + 10; + break; + case '+': + case '-': + case '*': + case '/': + lv->token.type = TOKEN_UINT_TYPE; + lv->token.uintValue = next; + lv = _lexOperator(lv, token); + state = LEX_ROOT; + break; + default: + state = LEX_ERROR; + break; + } + break; + case LEX_EXPECT_PREFIX: + switch (token) { + case 'X': + case 'x': + next = 0; + state = LEX_EXPECT_HEX; + break; + default: + state = LEX_ERROR; + break; + } + break; + case LEX_ERROR: + // This shouldn't be reached + break; + } + } + + switch (state) { + case LEX_EXPECT_DECIMAL: + case LEX_EXPECT_HEX: + lv->token.type = TOKEN_UINT_TYPE; + lv->token.uintValue = next; + break; + case LEX_EXPECT_IDENTIFIER: + lv->token.type = TOKEN_IDENTIFIER_TYPE; + lv->token.identifierValue = strndup(tokenStart, string - tokenStart); + break; + case LEX_ERROR: + default: + lv->token.type = TOKEN_ERROR_TYPE; + break; + } + return adjusted; +} + +static const int _operatorPrecedence[] = { + 2, + 1, + 1, + 0 +}; + +static struct ParseTree* _parseTreeCreate() { + struct ParseTree* tree = malloc(sizeof(struct ParseTree)); + tree->token.type = TOKEN_ERROR_TYPE; + tree->rhs = 0; + tree->lhs = 0; + return tree; +} + +static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence) { + struct ParseTree* newTree = 0; + while (lv) { + int newPrecedence; + switch (lv->token.type) { + case TOKEN_IDENTIFIER_TYPE: + case TOKEN_UINT_TYPE: + if (tree->token.type == TOKEN_ERROR_TYPE) { + tree->token = lv->token; + lv = lv->next; + } else { + tree->token.type = TOKEN_ERROR_TYPE; + return 0; + } + break; + case TOKEN_OPERATOR_TYPE: + newPrecedence = _operatorPrecedence[lv->token.operatorValue]; + if (newPrecedence < precedence) { + newTree = _parseTreeCreate(); + *newTree = *tree; + tree->lhs = newTree; + tree->rhs = _parseTreeCreate(); + tree->token = lv->token; + lv = _parseExpression(tree->rhs, lv->next, newPrecedence); + if (tree->token.type == TOKEN_ERROR_TYPE) { + tree->token.type = TOKEN_ERROR_TYPE; + } + } else { + return lv; + } + break; + case TOKEN_ERROR_TYPE: + tree->token.type = TOKEN_ERROR_TYPE; + return 0; + } + } + + return 0; +} + +void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) { + if (!tree) { + return; + } + + tree->token.type = TOKEN_ERROR_TYPE; + tree->lhs = 0; + tree->rhs = 0; + + _parseExpression(tree, lv, _operatorPrecedence[OP_ASSIGN]); +} + +void lexFree(struct LexVector* lv) { + while (lv) { + struct LexVector* lvNext = lv->next; + free(lv); + lv = lvNext; + } +} + +void parseFree(struct ParseTree* tree) { + if (!tree) { + return; + } + + parseFree(tree->lhs); + parseFree(tree->rhs); + free(tree); +}
A
src/debugger/parser.h
@@ -0,0 +1,55 @@
+#ifndef PARSER_H +#define PARSER_H + +#include "common.h" +#include "debugger.h" + +enum LexState { + LEX_ERROR = -1, + LEX_ROOT = 0, + LEX_EXPECT_IDENTIFIER, + LEX_EXPECT_DECIMAL, + LEX_EXPECT_HEX, + LEX_EXPECT_PREFIX, +}; + +enum Operation { + OP_ASSIGN, + OP_ADD, + OP_SUBTRACT, + OP_MULTIPLY, + OP_DIVIDE +}; + +struct Token { + enum TokenType { + TOKEN_ERROR_TYPE, + TOKEN_UINT_TYPE, + TOKEN_IDENTIFIER_TYPE, + TOKEN_OPERATOR_TYPE, + } type; + union { + uint32_t uintValue; + char* identifierValue; + enum Operation operatorValue; + }; +}; + +struct LexVector { + struct LexVector* next; + struct Token token; +}; + +struct ParseTree { + struct Token token; + struct ParseTree* lhs; + struct ParseTree* rhs; +}; + +size_t lexExpression(struct LexVector* lv, const char* string, size_t length); +void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv); + +void lexFree(struct LexVector* lv); +void parseFree(struct ParseTree* tree); + +#endif