Util: Text codec with TBL loader
Jeffrey Pfau jeffrey@endrift.com
Tue, 01 Nov 2016 00:07:30 -0700
4 files changed,
994 insertions(+),
0 deletions(-)
A
src/util/test/text-codec.c
@@ -0,0 +1,778 @@
+/* Copyright (c) 2013-2016 Jeffrey Pfau + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "util/test/suite.h" + +#include "util/text-codec.h" +#include "util/vfs.h" + +M_TEST_DEFINE(emptyCodec) { + struct VFile* vf = VFileMemChunk(NULL, 0); + struct TextCodec codec; + assert_true(TextCodecLoadTBL(&codec, vf, true)); + TextCodecDeinit(&codec); + vf->close(vf); +} + +M_TEST_DEFINE(singleEntry) { + static const char file[] = "41=B"; + struct VFile* vf = VFileFromConstMemory(file, sizeof(file) - 1); + struct TextCodec codec; + assert_true(TextCodecLoadTBL(&codec, vf, false)); + struct TextCodecIterator iter; + uint8_t output[16] = {}; + size_t len; + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "B", 1); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "BB", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x42, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "B", 1); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x42, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "B", 1); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x42, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "BB", 2); + + TextCodecDeinit(&codec); + vf->close(vf); +} + +M_TEST_DEFINE(singleEntryReverse) { + static const char file[] = "41=B"; + struct VFile* vf = VFileFromConstMemory(file, sizeof(file) - 1); + struct TextCodec codec; + assert_true(TextCodecLoadTBL(&codec, vf, true)); + struct TextCodecIterator iter; + uint8_t output[16] = {}; + size_t len; + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'B', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "A", 1); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'B', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'B', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "AA", 2); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'B', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "A", 1); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'B', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "A", 1); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'B', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'B', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "AA", 2); + + TextCodecDeinit(&codec); + vf->close(vf); +} + +M_TEST_DEFINE(twoEntry) { + static const char file[] = + "41=B\n" + "43=D"; + struct VFile* vf = VFileFromConstMemory(file, sizeof(file) - 1); + struct TextCodec codec; + assert_true(TextCodecLoadTBL(&codec, vf, false)); + struct TextCodecIterator iter; + uint8_t output[16] = {}; + size_t len; + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "B", 1); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x43, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "D", 1); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "BB", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x43, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x43, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "DD", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x43, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "BD", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x43, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "DB", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x42, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "B", 1); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x42, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x43, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "D", 1); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x42, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "B", 1); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x43, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x42, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "D", 1); + + TextCodecDeinit(&codec); + vf->close(vf); +} + +M_TEST_DEFINE(longEntry) { + static const char file[] = + "01=Ab\n" + "02=cd"; + struct VFile* vf = VFileFromConstMemory(file, sizeof(file) - 1); + struct TextCodec codec; + assert_true(TextCodecLoadTBL(&codec, vf, false)); + struct TextCodecIterator iter; + uint8_t output[16] = {}; + size_t len; + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "Ab", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 2, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "cd", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 4); + assert_memory_equal(output, "AbAb", 4); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 2, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 2, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 4); + assert_memory_equal(output, "cdcd", 4); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 2, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 4); + assert_memory_equal(output, "Abcd", 4); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 2, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 4); + assert_memory_equal(output, "cdAb", 4); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 3, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "Ab", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 3, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 2, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "cd", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 3, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "Ab", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 2, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 3, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "cd", 2); + + TextCodecDeinit(&codec); + vf->close(vf); +} + +M_TEST_DEFINE(longEntryReverse) { + static const char file[] = + "01=Ab\n" + "02=cd"; + struct VFile* vf = VFileFromConstMemory(file, sizeof(file) - 1); + struct TextCodec codec; + assert_true(TextCodecLoadTBL(&codec, vf, true)); + struct TextCodecIterator iter; + uint8_t output[16] = {}; + size_t len; + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "\1", 1); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'c', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'd', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "\2", 1); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "\1\1", 2); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'c', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'd', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'c', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'd', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "\2\2", 2); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'c', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'd', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "\1\2", 2); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'c', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'd', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "\2\1", 2); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'e', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "\1", 1); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'e', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'c', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'd', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "\2", 1); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'e', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 0); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'e', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "\1", 1); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'c', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'e', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'd', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 0); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'c', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'd', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'e', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "\2", 1); + + TextCodecDeinit(&codec); + vf->close(vf); +} + +M_TEST_DEFINE(overlappingEntry) { + static const char file[] = + "FF01=Ab\n" + "FF02=Ac"; + struct VFile* vf = VFileFromConstMemory(file, sizeof(file) - 1); + struct TextCodec codec; + assert_true(TextCodecLoadTBL(&codec, vf, false)); + struct TextCodecIterator iter; + uint8_t output[16] = {}; + size_t len; + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "Ab", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "Ab", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 2, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "Ac", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 2, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 2, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "Ac", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 4); + assert_memory_equal(output, "AbAb", 4); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 2, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 2, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 4); + assert_memory_equal(output, "AcAc", 4); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 2, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 4); + assert_memory_equal(output, "AbAc", 4); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 2, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 4); + assert_memory_equal(output, "AcAb", 4); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 3, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "Ab", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 3, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 0); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 3, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "Ab", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0xFF, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 3, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "Ab", 2); + + TextCodecDeinit(&codec); + vf->close(vf); +} + +M_TEST_DEFINE(overlappingEntryReverse) { + static const char file[] = + "FF01=Ab\n" + "FF02=Ac"; + struct VFile* vf = VFileFromConstMemory(file, sizeof(file) - 1); + struct TextCodec codec; + assert_true(TextCodecLoadTBL(&codec, vf, true)); + struct TextCodecIterator iter; + uint8_t output[16] = {}; + size_t len; + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "\xFF\1", 2); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "\xFF\1", 2); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'c', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "\xFF\2", 2); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'c', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'c', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "\xFF\2", 2); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 4); + assert_memory_equal(output, "\xFF\1\xFF\1", 4); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'c', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'c', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 4); + assert_memory_equal(output, "\xFF\2\xFF\2", 4); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'c', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 4); + assert_memory_equal(output, "\xFF\1\xFF\2", 4); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'c', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 4); + assert_memory_equal(output, "\xFF\2\xFF\1", 4); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'd', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "\xFF\1", 2); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'd', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 0); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'd', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "\xFF\1", 2); + + len = 0; + TextCodecStartEncode(&codec, &iter); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'b', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'A', output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 'd', output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "\xFF\1", 2); + + TextCodecDeinit(&codec); + vf->close(vf); +} + +M_TEST_DEFINE(raggedEntry) { + static const char file[] = + "4142=bc\n" + "41=B\n" + "42=C"; + struct VFile* vf = VFileFromConstMemory(file, sizeof(file) - 1); + struct TextCodec codec; + assert_true(TextCodecLoadTBL(&codec, vf, false)); + struct TextCodecIterator iter; + uint8_t output[16] = {}; + size_t len; + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 1); + assert_memory_equal(output, "B", 1); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "BB", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x42, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "CB", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x42, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "bc", 2); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x42, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 3); + assert_memory_equal(output, "bcB", 3); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x42, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 3); + assert_memory_equal(output, "Bbc", 3); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x42, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x42, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 3); + assert_memory_equal(output, "bcC", 3); + + len = 0; + TextCodecStartDecode(&codec, &iter); + len += TextCodecAdvance(&iter, 0x41, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x43, output + len, sizeof(output) - len); + len += TextCodecAdvance(&iter, 0x42, output + len, sizeof(output) - len); + len += TextCodecFinish(&iter, output + len, sizeof(output) - len); + assert_int_equal(len, 2); + assert_memory_equal(output, "BC", 2); + + TextCodecDeinit(&codec); + vf->close(vf); +} + +M_TEST_SUITE_DEFINE(TextCodec, + cmocka_unit_test(emptyCodec), + cmocka_unit_test(singleEntry), + cmocka_unit_test(singleEntryReverse), + cmocka_unit_test(twoEntry), + cmocka_unit_test(longEntry), + cmocka_unit_test(longEntryReverse), + cmocka_unit_test(overlappingEntry), + cmocka_unit_test(overlappingEntryReverse), + cmocka_unit_test(raggedEntry))
M
src/util/test/util.c
→
src/util/test/util.c
@@ -5,10 +5,12 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "util/test/suite.h" +M_TEST_SUITE_DECLARE(TextCodec); M_TEST_SUITE_DECLARE(VFS); int TestRunUtil(void) { int failures = 0; + failures += M_TEST_SUITE_RUN(TextCodec); failures += M_TEST_SUITE_RUN(VFS); return failures; }
A
src/util/text-codec.c
@@ -0,0 +1,182 @@
+/* Copyright (c) 2013-2016 Jeffrey Pfau + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "text-codec.h" + +#include "util/string.h" +#include "util/table.h" +#include "util/vfs.h" + +struct TextCodecNode { + uint8_t* leaf; + size_t leafLength; + struct Table children; +}; + +static void _cleanTree(void* value) { + struct TextCodecNode* node = value; + if (node->leaf) { + free(node->leaf); + } + TableDeinit(&node->children); + free(node); +} + +static struct TextCodecNode* _createNode(void) { + struct TextCodecNode* node = malloc(sizeof(*node)); + node->leaf = NULL; + node->leafLength = 0; + TableInit(&node->children, 32, _cleanTree); + return node; +} + +static void _insertLeafNullTerminated(struct TextCodecNode* node, uint8_t* word, uint8_t* output) { + if (!word[0]) { + node->leafLength = strlen((char*) output); + node->leaf = (uint8_t*) strdup((char*) output); + return; + } + struct TextCodecNode* subnode = TableLookup(&node->children, word[0]); + if (!subnode) { + subnode = _createNode(); + TableInsert(&node->children, word[0], subnode); + } + _insertLeafNullTerminated(subnode, &word[1], output); +} + +bool TextCodecLoadTBL(struct TextCodec* codec, struct VFile* vf, bool createReverse) { + codec->forwardRoot = _createNode(); + if (createReverse) { + codec->reverseRoot = _createNode(); + } else { + codec->reverseRoot = NULL; + } + + char lineBuffer[128]; + uint8_t wordBuffer[5]; + ssize_t length; + while ((length = vf->readline(vf, lineBuffer, sizeof(lineBuffer))) > 0) { + memset(wordBuffer, 0, sizeof(wordBuffer)); + if (lineBuffer[length - 1] == '\n') { + lineBuffer[length - 1] = '\0'; + } + if (lineBuffer[length - 1] == '\r') { + lineBuffer[length - 1] = '\0'; + } + if (length > 1 && lineBuffer[length - 2] == '\r') { + lineBuffer[length - 2] = '\0'; + } + size_t i; + for (i = 0; i < sizeof(wordBuffer) - 1; ++i) { + if (!hex8(&lineBuffer[i * 2], &wordBuffer[i])) { + break; + } + } + if (!i) { + uint8_t value; + if (!hex8(lineBuffer, &value)) { + switch (lineBuffer[0]) { + case '*': + wordBuffer[0] = '\n'; + break; + case '\\': + wordBuffer[0] = '\x30'; + break; + case '/': + wordBuffer[0] = '\x31'; + break; + default: + return false; + } + size_t start = 1; + if (lineBuffer[1] == '=') { + start = 2; + } + for (i = 0; i < sizeof(wordBuffer) - 1; ++i) { + if (!hex8(&lineBuffer[start + i * 2], &wordBuffer[i])) { + break; + } + } + if (i == 0) { + return false; + } + lineBuffer[1] = '\0'; + _insertLeafNullTerminated(codec->forwardRoot, wordBuffer, (uint8_t*) lineBuffer); + if (codec->reverseRoot) { + _insertLeafNullTerminated(codec->reverseRoot, (uint8_t*) lineBuffer, wordBuffer); + } + } + } else { + if (lineBuffer[i * 2] != '=') { + return false; + } + _insertLeafNullTerminated(codec->forwardRoot, wordBuffer, (uint8_t*) &lineBuffer[i * 2 + 1]); + if (codec->reverseRoot) { + _insertLeafNullTerminated(codec->reverseRoot, (uint8_t*) &lineBuffer[i * 2 + 1], wordBuffer); + } + } + } + return length == 0; +} + +void TextCodecDeinit(struct TextCodec* codec) { + if (codec->forwardRoot) { + _cleanTree(codec->forwardRoot); + codec->forwardRoot = NULL; + } + if (codec->reverseRoot) { + _cleanTree(codec->reverseRoot); + codec->reverseRoot = NULL; + } +} + +void TextCodecStartDecode(struct TextCodec* codec, struct TextCodecIterator* iter) { + iter->root = codec->forwardRoot; + iter->current = iter->root; +} + +void TextCodecStartEncode(struct TextCodec* codec, struct TextCodecIterator* iter) { + iter->root = codec->reverseRoot; + iter->current = iter->root; +} + +static size_t _TextCodecFinishInternal(struct TextCodecNode* node, uint8_t* output, size_t outputLength) { + if (outputLength > node->leafLength) { + outputLength = node->leafLength; + } + if (node->leafLength == 0) { + return 0; + } + memcpy(output, node->leaf, outputLength); + return node->leafLength; +} + +size_t TextCodecAdvance(struct TextCodecIterator* iter, uint8_t byte, uint8_t* output, size_t outputLength) { + struct TextCodecNode* node = TableLookup(&iter->current->children, byte); + if (!node) { + ssize_t size = _TextCodecFinishInternal(iter->current, output, outputLength); + if (size < 0) { + size = 0; + } + output += size; + outputLength -= size; + if (!outputLength) { + return size; + } + if (iter->current == iter->root) { + return 0; + } + iter->current = iter->root; + return TextCodecAdvance(iter, byte, output, outputLength) + size; + } + iter->current = node; + return 0; +} + +size_t TextCodecFinish(struct TextCodecIterator* iter, uint8_t* output, size_t outputLength) { + struct TextCodecNode* node = iter->current; + iter->current = iter->root; + return _TextCodecFinishInternal(node, output, outputLength); +}
A
src/util/text-codec.h
@@ -0,0 +1,32 @@
+/* Copyright (c) 2013-2016 Jeffrey Pfau + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef TEXT_CODEC_H +#define TEXT_CODEC_H + +#include "util/common.h" + +struct TextCodecNode; +struct TextCodec { + struct TextCodecNode* forwardRoot; + struct TextCodecNode* reverseRoot; +}; + +struct TextCodecIterator { + struct TextCodecNode* root; + struct TextCodecNode* current; +}; + +struct VFile; +bool TextCodecLoadTBL(struct TextCodec*, struct VFile*, bool createReverse); +void TextCodecDeinit(struct TextCodec*); + +void TextCodecStartDecode(struct TextCodec*, struct TextCodecIterator*); +void TextCodecStartEncode(struct TextCodec*, struct TextCodecIterator*); + +size_t TextCodecAdvance(struct TextCodecIterator*, uint8_t byte, uint8_t* output, size_t outputLength); +size_t TextCodecFinish(struct TextCodecIterator*, uint8_t* output, size_t outputLength); + +#endif