src/util/text-codec.c (view raw)
1/* Copyright (c) 2013-2016 Jeffrey Pfau
2 *
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6#include "text-codec.h"
7
8#include "util/string.h"
9#include "util/table.h"
10#include "util/vfs.h"
11
12struct TextCodecNode {
13 uint8_t* leaf;
14 size_t leafLength;
15 struct Table children;
16};
17
18static void _cleanTree(void* value) {
19 struct TextCodecNode* node = value;
20 if (node->leaf) {
21 free(node->leaf);
22 }
23 TableDeinit(&node->children);
24 free(node);
25}
26
27static struct TextCodecNode* _createNode(void) {
28 struct TextCodecNode* node = malloc(sizeof(*node));
29 node->leaf = NULL;
30 node->leafLength = 0;
31 TableInit(&node->children, 32, _cleanTree);
32 return node;
33}
34
35static void _insertLeafNullTerminated(struct TextCodecNode* node, uint8_t* word, uint8_t* output) {
36 if (!word[0]) {
37 node->leafLength = strlen((char*) output);
38 node->leaf = (uint8_t*) strdup((char*) output);
39 return;
40 }
41 struct TextCodecNode* subnode = TableLookup(&node->children, word[0]);
42 if (!subnode) {
43 subnode = _createNode();
44 TableInsert(&node->children, word[0], subnode);
45 }
46 _insertLeafNullTerminated(subnode, &word[1], output);
47}
48
49bool TextCodecLoadTBL(struct TextCodec* codec, struct VFile* vf, bool createReverse) {
50 codec->forwardRoot = _createNode();
51 if (createReverse) {
52 codec->reverseRoot = _createNode();
53 } else {
54 codec->reverseRoot = NULL;
55 }
56
57 char lineBuffer[128];
58 uint8_t wordBuffer[5];
59 ssize_t length;
60 while ((length = vf->readline(vf, lineBuffer, sizeof(lineBuffer))) > 0) {
61 memset(wordBuffer, 0, sizeof(wordBuffer));
62 if (lineBuffer[length - 1] == '\n') {
63 lineBuffer[length - 1] = '\0';
64 }
65 if (lineBuffer[length - 1] == '\r') {
66 lineBuffer[length - 1] = '\0';
67 }
68 if (length > 1 && lineBuffer[length - 2] == '\r') {
69 lineBuffer[length - 2] = '\0';
70 }
71 size_t i;
72 for (i = 0; i < sizeof(wordBuffer) - 1; ++i) {
73 if (!hex8(&lineBuffer[i * 2], &wordBuffer[i])) {
74 break;
75 }
76 }
77 if (!i) {
78 uint8_t value;
79 if (!hex8(lineBuffer, &value)) {
80 switch (lineBuffer[0]) {
81 case '*':
82 lineBuffer[0] = '\n';
83 break;
84 case '\\':
85 lineBuffer[0] = '\x1E';
86 break;
87 case '/':
88 lineBuffer[0] = '\x1F';
89 break;
90 default:
91 return false;
92 }
93 size_t start = 1;
94 if (lineBuffer[1] == '=') {
95 start = 2;
96 }
97 for (i = 0; i < sizeof(wordBuffer) - 1; ++i) {
98 if (!hex8(&lineBuffer[start + i * 2], &wordBuffer[i])) {
99 break;
100 }
101 }
102 if (i == 0) {
103 return false;
104 }
105 lineBuffer[1] = '\0';
106 _insertLeafNullTerminated(codec->forwardRoot, wordBuffer, (uint8_t*) lineBuffer);
107 if (codec->reverseRoot) {
108 _insertLeafNullTerminated(codec->reverseRoot, (uint8_t*) lineBuffer, wordBuffer);
109 }
110 }
111 } else {
112 if (lineBuffer[i * 2] != '=') {
113 return false;
114 }
115 _insertLeafNullTerminated(codec->forwardRoot, wordBuffer, (uint8_t*) &lineBuffer[i * 2 + 1]);
116 if (codec->reverseRoot) {
117 _insertLeafNullTerminated(codec->reverseRoot, (uint8_t*) &lineBuffer[i * 2 + 1], wordBuffer);
118 }
119 }
120 }
121 return length == 0;
122}
123
124void TextCodecDeinit(struct TextCodec* codec) {
125 if (codec->forwardRoot) {
126 _cleanTree(codec->forwardRoot);
127 codec->forwardRoot = NULL;
128 }
129 if (codec->reverseRoot) {
130 _cleanTree(codec->reverseRoot);
131 codec->reverseRoot = NULL;
132 }
133}
134
135void TextCodecStartDecode(struct TextCodec* codec, struct TextCodecIterator* iter) {
136 iter->root = codec->forwardRoot;
137 iter->current = iter->root;
138}
139
140void TextCodecStartEncode(struct TextCodec* codec, struct TextCodecIterator* iter) {
141 iter->root = codec->reverseRoot;
142 iter->current = iter->root;
143}
144
145static size_t _TextCodecFinishInternal(struct TextCodecNode* node, uint8_t* output, size_t outputLength) {
146 if (outputLength > node->leafLength) {
147 outputLength = node->leafLength;
148 }
149 if (node->leafLength == 0) {
150 return 0;
151 }
152 memcpy(output, node->leaf, outputLength);
153 return node->leafLength;
154}
155
156size_t TextCodecAdvance(struct TextCodecIterator* iter, uint8_t byte, uint8_t* output, size_t outputLength) {
157 struct TextCodecNode* node = TableLookup(&iter->current->children, byte);
158 if (!node) {
159 ssize_t size = _TextCodecFinishInternal(iter->current, output, outputLength);
160 if (size < 0) {
161 size = 0;
162 }
163 output += size;
164 outputLength -= size;
165 if (!outputLength) {
166 return size;
167 }
168 if (iter->current == iter->root) {
169 return 0;
170 }
171 iter->current = iter->root;
172 return TextCodecAdvance(iter, byte, output, outputLength) + size;
173 }
174 iter->current = node;
175 return 0;
176}
177
178size_t TextCodecFinish(struct TextCodecIterator* iter, uint8_t* output, size_t outputLength) {
179 struct TextCodecNode* node = iter->current;
180 iter->current = iter->root;
181 return _TextCodecFinishInternal(node, output, outputLength);
182}