src/util/string.c (view raw)
1/* Copyright (c) 2013-2015 Jeffrey Pfau
2 *
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6#include <mgba-util/string.h>
7
8#include <string.h>
9
10#ifndef HAVE_STRNDUP
11char* strndup(const char* start, size_t len) {
12 // This is suboptimal, but anything recent should have strndup
13 char* out = malloc((len + 1) * sizeof(char));
14 strncpy(out, start, len);
15 out[len] = '\0';
16 return out;
17}
18#endif
19
20#ifndef HAVE_STRDUP
21char* strdup(const char* str) {
22 size_t len = strlen(str);
23 char* out = malloc(len + 1);
24 strncpy(out, str, len);
25 out[len] = '\0';
26 return out;
27}
28#endif
29
30char* strnrstr(const char* restrict haystack, const char* restrict needle, size_t len) {
31 char* last = 0;
32 const char* next = haystack;
33 size_t needleLen = strlen(needle);
34 for (; len >= needleLen; --len, ++next) {
35 if (strncmp(needle, next, needleLen) == 0) {
36 last = (char*) next;
37 }
38 }
39 return last;
40}
41
42bool endswith(const char* restrict s1, const char* restrict end) {
43 size_t len = strlen(s1);
44 size_t endLen = strlen(end);
45 if (len < endLen) {
46 return false;
47 }
48 return strcmp(&s1[len - endLen], end) == 0;
49}
50
51bool startswith(const char* restrict s1, const char* restrict start) {
52 size_t len = strlen(s1);
53 size_t startLen = strlen(start);
54 if (len < startLen) {
55 return false;
56 }
57 return strncmp(s1, start, startLen) == 0;
58}
59
60uint32_t utf16Char(const uint16_t** unicode, size_t* length) {
61 if (*length < 2) {
62 *length = 0;
63 return 0;
64 }
65 uint32_t unichar = **unicode;
66 ++*unicode;
67 *length -= 2;
68 if (unichar < 0xD800 || unichar >= 0xE000) {
69 return unichar;
70 }
71 if (*length < 2) {
72 *length = 0;
73 return 0;
74 }
75 uint16_t highSurrogate = unichar;
76 uint16_t lowSurrogate = **unicode;
77 ++*unicode;
78 *length -= 2;
79 if (highSurrogate >= 0xDC00) {
80 return 0;
81 }
82 if (lowSurrogate < 0xDC00 || lowSurrogate >= 0xE000) {
83 return 0;
84 }
85 highSurrogate -= 0xD800;
86 lowSurrogate -= 0xDC00;
87 return (highSurrogate << 10) + lowSurrogate + 0x10000;
88}
89
90uint32_t utf8Char(const char** unicode, size_t* length) {
91 if (*length == 0) {
92 return 0;
93 }
94 char byte = **unicode;
95 --*length;
96 ++*unicode;
97 if (!(byte & 0x80)) {
98 return byte;
99 }
100 uint32_t unichar;
101 static const int tops[4] = { 0xC0, 0xE0, 0xF0, 0xF8 };
102 size_t numBytes;
103 for (numBytes = 0; numBytes < 3; ++numBytes) {
104 if ((byte & tops[numBytes + 1]) == tops[numBytes]) {
105 break;
106 }
107 }
108 unichar = byte & ~tops[numBytes];
109 if (numBytes == 3) {
110 return 0;
111 }
112 ++numBytes;
113 if (*length < numBytes) {
114 *length = 0;
115 return 0;
116 }
117 size_t i;
118 for (i = 0; i < numBytes; ++i) {
119 unichar <<= 6;
120 byte = **unicode;
121 --*length;
122 ++*unicode;
123 if ((byte & 0xC0) != 0x80) {
124 return 0;
125 }
126 unichar |= byte & 0x3F;
127 }
128 return unichar;
129}
130
131size_t toUtf8(uint32_t unichar, char* buffer) {
132 if (unichar > 0x10FFFF) {
133 unichar = 0xFFFD;
134 }
135 if (unichar < 0x80) {
136 buffer[0] = unichar;
137 return 1;
138 }
139 if (unichar < 0x800) {
140 buffer[0] = (unichar >> 6) | 0xC0;
141 buffer[1] = (unichar & 0x3F) | 0x80;
142 return 2;
143 }
144 if (unichar < 0x10000) {
145 buffer[0] = (unichar >> 12) | 0xE0;
146 buffer[1] = ((unichar >> 6) & 0x3F) | 0x80;
147 buffer[2] = (unichar & 0x3F) | 0x80;
148 return 3;
149 }
150 if (unichar < 0x200000) {
151 buffer[0] = (unichar >> 18) | 0xF0;
152 buffer[1] = ((unichar >> 12) & 0x3F) | 0x80;
153 buffer[2] = ((unichar >> 6) & 0x3F) | 0x80;
154 buffer[3] = (unichar & 0x3F) | 0x80;
155 return 4;
156 }
157
158 // This shouldn't be possible
159 return 0;
160}
161
162int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t utf8Length) {
163 uint32_t char1 = 0, char2 = 0;
164 while (utf16Length > 0 && utf8Length > 0) {
165 if (char1 < char2) {
166 return -1;
167 }
168 if (char1 > char2) {
169 return 1;
170 }
171 char1 = utf16Char(&utf16, &utf16Length);
172 char2 = utf8Char(&utf8, &utf8Length);
173 }
174 if (utf16Length == 0 && utf8Length > 0) {
175 return -1;
176 }
177 if (utf16Length > 0 && utf8Length == 0) {
178 return 1;
179 }
180 return 0;
181}
182
183char* utf16to8(const uint16_t* utf16, size_t length) {
184 char* utf8 = 0;
185 char* offset = 0;
186 char buffer[4];
187 size_t utf8TotalBytes = 0;
188 size_t utf8Length = 0;
189 while (true) {
190 if (length == 0) {
191 break;
192 }
193 uint32_t unichar = utf16Char(&utf16, &length);
194 size_t bytes = toUtf8(unichar, buffer);
195 utf8Length += bytes;
196 if (utf8Length < utf8TotalBytes) {
197 memcpy(offset, buffer, bytes);
198 offset += bytes;
199 } else if (!utf8) {
200 utf8 = malloc(length);
201 if (!utf8) {
202 return 0;
203 }
204 utf8TotalBytes = length;
205 memcpy(utf8, buffer, bytes);
206 offset = utf8 + bytes;
207 } else if (utf8Length >= utf8TotalBytes) {
208 ptrdiff_t o = offset - utf8;
209 char* newUTF8 = realloc(utf8, utf8TotalBytes * 2);
210 offset = o + newUTF8;
211 if (!newUTF8) {
212 free(utf8);
213 return 0;
214 }
215 utf8 = newUTF8;
216 memcpy(offset, buffer, bytes);
217 offset += bytes;
218 }
219 }
220
221 char* newUTF8 = realloc(utf8, utf8Length + 1);
222 if (!newUTF8) {
223 free(utf8);
224 return 0;
225 }
226 newUTF8[utf8Length] = '\0';
227 return newUTF8;
228}
229
230int hexDigit(char digit) {
231 switch (digit) {
232 case '0':
233 case '1':
234 case '2':
235 case '3':
236 case '4':
237 case '5':
238 case '6':
239 case '7':
240 case '8':
241 case '9':
242 return digit - '0';
243
244 case 'a':
245 case 'b':
246 case 'c':
247 case 'd':
248 case 'e':
249 case 'f':
250 return digit - 'a' + 10;
251
252 case 'A':
253 case 'B':
254 case 'C':
255 case 'D':
256 case 'E':
257 case 'F':
258 return digit - 'A' + 10;
259
260 default:
261 return -1;
262 }
263}
264
265const char* hex32(const char* line, uint32_t* out) {
266 uint32_t value = 0;
267 int i;
268 for (i = 0; i < 8; ++i, ++line) {
269 char digit = *line;
270 value <<= 4;
271 int nybble = hexDigit(digit);
272 if (nybble < 0) {
273 return 0;
274 }
275 value |= nybble;
276 }
277 *out = value;
278 return line;
279}
280
281const char* hex24(const char* line, uint32_t* out) {
282 uint32_t value = 0;
283 int i;
284 for (i = 0; i < 6; ++i, ++line) {
285 char digit = *line;
286 value <<= 4;
287 int nybble = hexDigit(digit);
288 if (nybble < 0) {
289 return 0;
290 }
291 value |= nybble;
292 }
293 *out = value;
294 return line;
295}
296
297const char* hex16(const char* line, uint16_t* out) {
298 uint16_t value = 0;
299 *out = 0;
300 int i;
301 for (i = 0; i < 4; ++i, ++line) {
302 char digit = *line;
303 value <<= 4;
304 int nybble = hexDigit(digit);
305 if (nybble < 0) {
306 return 0;
307 }
308 value |= nybble;
309 }
310 *out = value;
311 return line;
312}
313
314const char* hex12(const char* line, uint16_t* out) {
315 uint16_t value = 0;
316 *out = 0;
317 int i;
318 for (i = 0; i < 3; ++i, ++line) {
319 char digit = *line;
320 value <<= 4;
321 int nybble = hexDigit(digit);
322 if (nybble < 0) {
323 return 0;
324 }
325 value |= nybble;
326 }
327 *out = value;
328 return line;
329}
330
331const char* hex8(const char* line, uint8_t* out) {
332 uint8_t value = 0;
333 *out = 0;
334 int i;
335 for (i = 0; i < 2; ++i, ++line) {
336 char digit = *line;
337 value <<= 4;
338 int nybble = hexDigit(digit);
339 if (nybble < 0) {
340 return 0;
341 }
342 value |= nybble;
343 }
344 *out = value;
345 return line;
346}
347
348const char* hex4(const char* line, uint8_t* out) {
349 uint8_t value = 0;
350 *out = 0;
351 char digit = *line;
352 value <<= 4;
353 int nybble = hexDigit(digit);
354 if (nybble < 0) {
355 return 0;
356 }
357 value |= nybble;
358 *out = value;
359 return line;
360}
361
362void rtrim(char* string) {
363 if (!*string) {
364 return;
365 }
366 char* end = string + strlen(string) - 1;
367 while (isspace((int) *end) && end >= string) {
368 *end = '\0';
369 --end;
370 }
371}