src/util/string.c (view raw)
1/* Copyright (c) 2013-2015 Jeffrey Pfau
2 *
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6#include <mgba-util/string.h>
7
8#include <mgba-util/vector.h>
9
10#include <string.h>
11
12DEFINE_VECTOR(StringList, char*);
13
14#ifndef HAVE_STRNDUP
15char* strndup(const char* start, size_t len) {
16 // This is suboptimal, but anything recent should have strndup
17 char* out = malloc((len + 1) * sizeof(char));
18 strncpy(out, start, len);
19 out[len] = '\0';
20 return out;
21}
22#endif
23
24#ifndef HAVE_STRDUP
25char* strdup(const char* str) {
26 size_t len = strlen(str);
27 char* out = malloc(len + 1);
28 strncpy(out, str, len);
29 out[len] = '\0';
30 return out;
31}
32#endif
33
34char* strnrstr(const char* restrict haystack, const char* restrict needle, size_t len) {
35 char* last = 0;
36 const char* next = haystack;
37 size_t needleLen = strlen(needle);
38 for (; len >= needleLen; --len, ++next) {
39 if (strncmp(needle, next, needleLen) == 0) {
40 last = (char*) next;
41 }
42 }
43 return last;
44}
45
46bool endswith(const char* restrict s1, const char* restrict end) {
47 size_t len = strlen(s1);
48 size_t endLen = strlen(end);
49 if (len < endLen) {
50 return false;
51 }
52 return strcmp(&s1[len - endLen], end) == 0;
53}
54
55bool startswith(const char* restrict s1, const char* restrict start) {
56 size_t len = strlen(s1);
57 size_t startLen = strlen(start);
58 if (len < startLen) {
59 return false;
60 }
61 return strncmp(s1, start, startLen) == 0;
62}
63
64uint32_t utf16Char(const uint16_t** unicode, size_t* length) {
65 if (*length < 2) {
66 *length = 0;
67 return 0;
68 }
69 uint32_t unichar = **unicode;
70 ++*unicode;
71 *length -= 2;
72 if (unichar < 0xD800 || unichar >= 0xE000) {
73 return unichar;
74 }
75 if (*length < 2) {
76 *length = 0;
77 return 0;
78 }
79 uint16_t highSurrogate = unichar;
80 uint16_t lowSurrogate = **unicode;
81 ++*unicode;
82 *length -= 2;
83 if (highSurrogate >= 0xDC00) {
84 return 0;
85 }
86 if (lowSurrogate < 0xDC00 || lowSurrogate >= 0xE000) {
87 return 0;
88 }
89 highSurrogate -= 0xD800;
90 lowSurrogate -= 0xDC00;
91 return (highSurrogate << 10) + lowSurrogate + 0x10000;
92}
93
94uint32_t utf8Char(const char** unicode, size_t* length) {
95 if (*length == 0) {
96 return 0;
97 }
98 char byte = **unicode;
99 --*length;
100 ++*unicode;
101 if (!(byte & 0x80)) {
102 return byte;
103 }
104 uint32_t unichar;
105 static const int tops[4] = { 0xC0, 0xE0, 0xF0, 0xF8 };
106 size_t numBytes;
107 for (numBytes = 0; numBytes < 3; ++numBytes) {
108 if ((byte & tops[numBytes + 1]) == tops[numBytes]) {
109 break;
110 }
111 }
112 unichar = byte & ~tops[numBytes];
113 if (numBytes == 3) {
114 return 0;
115 }
116 ++numBytes;
117 if (*length < numBytes) {
118 *length = 0;
119 return 0;
120 }
121 size_t i;
122 for (i = 0; i < numBytes; ++i) {
123 unichar <<= 6;
124 byte = **unicode;
125 --*length;
126 ++*unicode;
127 if ((byte & 0xC0) != 0x80) {
128 return 0;
129 }
130 unichar |= byte & 0x3F;
131 }
132 return unichar;
133}
134
135size_t toUtf8(uint32_t unichar, char* buffer) {
136 if (unichar > 0x10FFFF) {
137 unichar = 0xFFFD;
138 }
139 if (unichar < 0x80) {
140 buffer[0] = unichar;
141 return 1;
142 }
143 if (unichar < 0x800) {
144 buffer[0] = (unichar >> 6) | 0xC0;
145 buffer[1] = (unichar & 0x3F) | 0x80;
146 return 2;
147 }
148 if (unichar < 0x10000) {
149 buffer[0] = (unichar >> 12) | 0xE0;
150 buffer[1] = ((unichar >> 6) & 0x3F) | 0x80;
151 buffer[2] = (unichar & 0x3F) | 0x80;
152 return 3;
153 }
154 if (unichar < 0x200000) {
155 buffer[0] = (unichar >> 18) | 0xF0;
156 buffer[1] = ((unichar >> 12) & 0x3F) | 0x80;
157 buffer[2] = ((unichar >> 6) & 0x3F) | 0x80;
158 buffer[3] = (unichar & 0x3F) | 0x80;
159 return 4;
160 }
161
162 // This shouldn't be possible
163 return 0;
164}
165
166int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t utf8Length) {
167 uint32_t char1 = 0, char2 = 0;
168 while (utf16Length > 0 && utf8Length > 0) {
169 if (char1 < char2) {
170 return -1;
171 }
172 if (char1 > char2) {
173 return 1;
174 }
175 char1 = utf16Char(&utf16, &utf16Length);
176 char2 = utf8Char(&utf8, &utf8Length);
177 }
178 if (utf16Length == 0 && utf8Length > 0) {
179 return -1;
180 }
181 if (utf16Length > 0 && utf8Length == 0) {
182 return 1;
183 }
184 return 0;
185}
186
187char* utf16to8(const uint16_t* utf16, size_t length) {
188 char* utf8 = 0;
189 char* offset = 0;
190 char buffer[4];
191 size_t utf8TotalBytes = 0;
192 size_t utf8Length = 0;
193 while (true) {
194 if (length == 0) {
195 break;
196 }
197 uint32_t unichar = utf16Char(&utf16, &length);
198 size_t bytes = toUtf8(unichar, buffer);
199 utf8Length += bytes;
200 if (utf8Length < utf8TotalBytes) {
201 memcpy(offset, buffer, bytes);
202 offset += bytes;
203 } else if (!utf8) {
204 utf8 = malloc(length);
205 if (!utf8) {
206 return 0;
207 }
208 utf8TotalBytes = length;
209 memcpy(utf8, buffer, bytes);
210 offset = utf8 + bytes;
211 } else if (utf8Length >= utf8TotalBytes) {
212 ptrdiff_t o = offset - utf8;
213 char* newUTF8 = realloc(utf8, utf8TotalBytes * 2);
214 offset = o + newUTF8;
215 if (!newUTF8) {
216 free(utf8);
217 return 0;
218 }
219 utf8 = newUTF8;
220 memcpy(offset, buffer, bytes);
221 offset += bytes;
222 }
223 }
224
225 char* newUTF8 = realloc(utf8, utf8Length + 1);
226 if (!newUTF8) {
227 free(utf8);
228 return 0;
229 }
230 newUTF8[utf8Length] = '\0';
231 return newUTF8;
232}
233
234int hexDigit(char digit) {
235 switch (digit) {
236 case '0':
237 case '1':
238 case '2':
239 case '3':
240 case '4':
241 case '5':
242 case '6':
243 case '7':
244 case '8':
245 case '9':
246 return digit - '0';
247
248 case 'a':
249 case 'b':
250 case 'c':
251 case 'd':
252 case 'e':
253 case 'f':
254 return digit - 'a' + 10;
255
256 case 'A':
257 case 'B':
258 case 'C':
259 case 'D':
260 case 'E':
261 case 'F':
262 return digit - 'A' + 10;
263
264 default:
265 return -1;
266 }
267}
268
269const char* hex32(const char* line, uint32_t* out) {
270 uint32_t value = 0;
271 int i;
272 for (i = 0; i < 8; ++i, ++line) {
273 char digit = *line;
274 value <<= 4;
275 int nybble = hexDigit(digit);
276 if (nybble < 0) {
277 return 0;
278 }
279 value |= nybble;
280 }
281 *out = value;
282 return line;
283}
284
285const char* hex24(const char* line, uint32_t* out) {
286 uint32_t value = 0;
287 int i;
288 for (i = 0; i < 6; ++i, ++line) {
289 char digit = *line;
290 value <<= 4;
291 int nybble = hexDigit(digit);
292 if (nybble < 0) {
293 return 0;
294 }
295 value |= nybble;
296 }
297 *out = value;
298 return line;
299}
300
301const char* hex16(const char* line, uint16_t* out) {
302 uint16_t value = 0;
303 *out = 0;
304 int i;
305 for (i = 0; i < 4; ++i, ++line) {
306 char digit = *line;
307 value <<= 4;
308 int nybble = hexDigit(digit);
309 if (nybble < 0) {
310 return 0;
311 }
312 value |= nybble;
313 }
314 *out = value;
315 return line;
316}
317
318const char* hex12(const char* line, uint16_t* out) {
319 uint16_t value = 0;
320 *out = 0;
321 int i;
322 for (i = 0; i < 3; ++i, ++line) {
323 char digit = *line;
324 value <<= 4;
325 int nybble = hexDigit(digit);
326 if (nybble < 0) {
327 return 0;
328 }
329 value |= nybble;
330 }
331 *out = value;
332 return line;
333}
334
335const char* hex8(const char* line, uint8_t* out) {
336 uint8_t value = 0;
337 *out = 0;
338 int i;
339 for (i = 0; i < 2; ++i, ++line) {
340 char digit = *line;
341 value <<= 4;
342 int nybble = hexDigit(digit);
343 if (nybble < 0) {
344 return 0;
345 }
346 value |= nybble;
347 }
348 *out = value;
349 return line;
350}
351
352const char* hex4(const char* line, uint8_t* out) {
353 uint8_t value = 0;
354 *out = 0;
355 char digit = *line;
356 value <<= 4;
357 int nybble = hexDigit(digit);
358 if (nybble < 0) {
359 return 0;
360 }
361 value |= nybble;
362 *out = value;
363 return line;
364}
365
366void rtrim(char* string) {
367 if (!*string) {
368 return;
369 }
370 char* end = string + strlen(string) - 1;
371 while (isspace((int) *end) && end >= string) {
372 *end = '\0';
373 --end;
374 }
375}