Util: Text codecs now properly handle null bytes
Jeffrey Pfau jeffrey@endrift.com
Tue, 01 Nov 2016 22:28:26 -0700
2 files changed,
96 insertions(+),
17 deletions(-)
M
src/util/test/text-codec.c
→
src/util/test/text-codec.c
@@ -824,6 +824,80 @@ TextCodecDeinit(&codec);
vf->close(vf); } +M_TEST_DEFINE(nullBytes) { + static const char file[] = + "00=A\n" + "0000=a\n" + "0001=b\n" + "01=B\n" + "0100=c"; + struct VFile* vf = VFileFromConstMemory(file, sizeof(file) - 1); + struct TextCodec codec; + assert_true(TextCodecLoadTBL(&codec, vf, false)); + struct TextCodecIterator iter; + uint8_t output[16] = {}; + size_t len; + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "A", 1); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "a", 1); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "aA", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "b", 1); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "B", 1); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "c", 1); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "cB", 2); + + TextCodecDeinit(&codec); + vf->close(vf); +} + M_TEST_SUITE_DEFINE(TextCodec, cmocka_unit_test(emptyCodec), cmocka_unit_test(singleEntry),@@ -834,4 +908,5 @@ cmocka_unit_test(longEntryReverse),
cmocka_unit_test(overlappingEntry), cmocka_unit_test(overlappingEntryReverse), cmocka_unit_test(raggedEntry), - cmocka_unit_test(controlCodes)) + cmocka_unit_test(controlCodes), + cmocka_unit_test(nullBytes))
M
src/util/text-codec.c
→
src/util/text-codec.c
@@ -32,10 +32,11 @@ TableInit(&node->children, 32, _cleanTree);
return node; } -static void _insertLeafNullTerminated(struct TextCodecNode* node, uint8_t* word, uint8_t* output) { - if (!word[0]) { - node->leafLength = strlen((char*) output); - node->leaf = (uint8_t*) strdup((char*) output); +static void _insertLeaf(struct TextCodecNode* node, uint8_t* word, size_t wordLength, uint8_t* output, size_t outputLength) { + if (!wordLength) { + node->leafLength = outputLength; + node->leaf = malloc(outputLength); + memcpy(node->leaf, output, outputLength); return; } struct TextCodecNode* subnode = TableLookup(&node->children, word[0]);@@ -43,7 +44,7 @@ if (!subnode) {
subnode = _createNode(); TableInsert(&node->children, word[0], subnode); } - _insertLeafNullTerminated(subnode, &word[1], output); + _insertLeaf(subnode, &word[1], wordLength - 1, output, outputLength); } bool TextCodecLoadTBL(struct TextCodec* codec, struct VFile* vf, bool createReverse) {@@ -59,17 +60,20 @@ uint8_t wordBuffer[5];
ssize_t length; while ((length = vf->readline(vf, lineBuffer, sizeof(lineBuffer))) > 0) { memset(wordBuffer, 0, sizeof(wordBuffer)); - if (lineBuffer[length - 1] == '\n') { - lineBuffer[length - 1] = '\0'; + if (lineBuffer[length - 1] == '\n' || lineBuffer[length - 1] == '\r') { + --length; + } + if (!length) { + continue; } if (lineBuffer[length - 1] == '\r') { - lineBuffer[length - 1] = '\0'; + --length; } - if (length > 1 && lineBuffer[length - 2] == '\r') { - lineBuffer[length - 2] = '\0'; + if (!length) { + continue; } size_t i; - for (i = 0; i < sizeof(wordBuffer) - 1; ++i) { + for (i = 0; i < sizeof(wordBuffer) - 1 && i < (size_t) length; ++i) { if (!hex8(&lineBuffer[i * 2], &wordBuffer[i])) { break; }@@ -102,19 +106,19 @@ }
if (i == 0) { return false; } - lineBuffer[1] = '\0'; - _insertLeafNullTerminated(codec->forwardRoot, wordBuffer, (uint8_t*) lineBuffer); + _insertLeaf(codec->forwardRoot, wordBuffer, i, (uint8_t*) lineBuffer, 1); if (codec->reverseRoot) { - _insertLeafNullTerminated(codec->reverseRoot, (uint8_t*) lineBuffer, wordBuffer); + _insertLeaf(codec->reverseRoot, (uint8_t*) lineBuffer, 1, wordBuffer, i); } } } else { if (lineBuffer[i * 2] != '=') { return false; } - _insertLeafNullTerminated(codec->forwardRoot, wordBuffer, (uint8_t*) &lineBuffer[i * 2 + 1]); + size_t offset = i * 2 + 1; + _insertLeaf(codec->forwardRoot, wordBuffer, i, (uint8_t*) &lineBuffer[offset], length - offset); if (codec->reverseRoot) { - _insertLeafNullTerminated(codec->reverseRoot, (uint8_t*) &lineBuffer[i * 2 + 1], wordBuffer); + _insertLeaf(codec->reverseRoot, (uint8_t*) &lineBuffer[offset], length - offset, wordBuffer, i); } } }