all repos — mgba @ 57c127589067d0b6e94083043134c2e5ac732449

mGBA Game Boy Advance Emulator

src/util/string.c (view raw)

  1/* Copyright (c) 2013-2015 Jeffrey Pfau
  2 *
  3 * This Source Code Form is subject to the terms of the Mozilla Public
  4 * License, v. 2.0. If a copy of the MPL was not distributed with this
  5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6#include "util/string.h"
  7
  8#include <string.h>
  9
 10#ifndef HAVE_STRNDUP
 11char* strndup(const char* start, size_t len) {
 12	// This is suboptimal, but anything recent should have strndup
 13	char* out = malloc((len + 1) * sizeof(char));
 14	strncpy(out, start, len);
 15	out[len] = '\0';
 16	return out;
 17}
 18#endif
 19
 20#ifndef HAVE_STRDUP
 21char* strdup(const char* str) {
 22	size_t len = strlen(str);
 23	char* out = malloc(len + 1);
 24	strncpy(out, str, len);
 25	out[len] = '\0';
 26	return out;
 27}
 28#endif
 29
 30char* strnrstr(const char* restrict haystack, const char* restrict needle, size_t len) {
 31	char* last = 0;
 32	const char* next = haystack;
 33	size_t needleLen = strlen(needle);
 34	for (; len >= needleLen; --len, ++next) {
 35		if (strncmp(needle, next, needleLen) == 0) {
 36			last = (char*) next;
 37		}
 38	}
 39	return last;
 40}
 41
 42uint32_t utf16Char(const uint16_t** unicode, size_t* length) {
 43	if (*length < 2) {
 44		*length = 0;
 45		return 0;
 46	}
 47	uint32_t unichar = **unicode;
 48	++*unicode;
 49	*length -= 2;
 50	if (unichar < 0xD800 || unichar >= 0xE000) {
 51		return unichar;
 52	}
 53	if (*length < 2) {
 54		*length = 0;
 55		return 0;
 56	}
 57	uint16_t highSurrogate = unichar;
 58	uint16_t lowSurrogate = **unicode;
 59	++*unicode;
 60	*length -= 2;
 61	if (highSurrogate >= 0xDC00) {
 62		return 0;
 63	}
 64	if (lowSurrogate < 0xDC00 || lowSurrogate >= 0xE000) {
 65		return 0;
 66	}
 67	highSurrogate -= 0xD800;
 68	lowSurrogate -= 0xDC00;
 69	return (highSurrogate << 10) + lowSurrogate + 0x10000;
 70}
 71
 72uint32_t utf8Char(const char** unicode, size_t* length) {
 73	if (*length == 0) {
 74		return 0;
 75	}
 76	char byte = **unicode;
 77	--*length;
 78	++*unicode;
 79	if (!(byte & 0x80)) {
 80		return byte;
 81	}
 82	uint32_t unichar;
 83	static int tops[4] = { 0xC0, 0xE0, 0xF0, 0xF8 };
 84	size_t numBytes;
 85	for (numBytes = 0; numBytes < 3; ++numBytes) {
 86		if ((byte & tops[numBytes + 1]) == tops[numBytes]) {
 87			break;
 88		}
 89	}
 90	unichar = byte & ~tops[numBytes];
 91	if (numBytes == 3) {
 92		return 0;
 93	}
 94	++numBytes;
 95	if (*length < numBytes) {
 96		*length = 0;
 97		return 0;
 98	}
 99	size_t i;
100	for (i = 0; i < numBytes; ++i) {
101		unichar <<= 6;
102		byte = **unicode;
103		--*length;
104		++*unicode;
105		if ((byte & 0xC0) != 0x80) {
106			return 0;
107		}
108		unichar |= byte & 0x3F;
109	}
110	return unichar;
111}
112
113static size_t _toUtf8(uint32_t unichar, char* buffer) {
114	if (unichar > 0x10FFFF) {
115		unichar = 0xFFFD;
116	}
117	if (unichar < 0x80) {
118		buffer[0] = unichar;
119		return 1;
120	}
121	if (unichar < 0x800) {
122		buffer[0] = (unichar >> 6) | 0xC0;
123		buffer[1] = (unichar & 0x3F) | 0x80;
124		return 2;
125	}
126	if (unichar < 0x10000) {
127		buffer[0] = (unichar >> 12) | 0xE0;
128		buffer[1] = ((unichar >> 6) & 0x3F) | 0x80;
129		buffer[2] = (unichar & 0x3F) | 0x80;
130		return 3;
131	}
132	if (unichar < 0x200000) {
133		buffer[0] = (unichar >> 18) | 0xF0;
134		buffer[1] = ((unichar >> 12) & 0x3F) | 0x80;
135		buffer[2] = ((unichar >> 6) & 0x3F) | 0x80;
136		buffer[3] = (unichar & 0x3F) | 0x80;
137		return 4;
138	}
139
140	// This shouldn't be possible
141	return 0;
142}
143
144int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t utf8Length) {
145	uint32_t char1 = 0, char2 = 0;
146	while (utf16Length > 0 && utf8Length > 0) {
147		if (char1 < char2) {
148			return -1;
149		}
150		if (char1 > char2) {
151			return 1;
152		}
153		char1 = utf16Char(&utf16, &utf16Length);
154		char2 = utf8Char(&utf8, &utf8Length);
155	}
156	if (utf16Length == 0 && utf8Length > 0) {
157		return -1;
158	}
159	if (utf16Length > 0 && utf8Length == 0) {
160		return 1;
161	}
162	return 0;
163}
164
165char* utf16to8(const uint16_t* utf16, size_t length) {
166	char* utf8 = 0;
167	char* offset = 0;
168	char buffer[4];
169	size_t utf8TotalBytes = 0;
170	size_t utf8Length = 0;
171	while (true) {
172		if (length == 0) {
173			break;
174		}
175		uint32_t unichar = utf16Char(&utf16, &length);
176		size_t bytes = _toUtf8(unichar, buffer);
177		utf8Length += bytes;
178		if (utf8Length < utf8TotalBytes) {
179			memcpy(offset, buffer, bytes);
180			offset += bytes;
181		} else if (!utf8) {
182			utf8 = malloc(length);
183			if (!utf8) {
184				return 0;
185			}
186			utf8TotalBytes = length;
187			memcpy(utf8, buffer, bytes);
188			offset = utf8 + bytes;
189		} else if (utf8Length >= utf8TotalBytes) {
190			ptrdiff_t o = offset - utf8;
191			char* newUTF8 = realloc(utf8, utf8TotalBytes * 2);
192			offset = o + newUTF8;
193			if (!newUTF8) {
194				free(utf8);
195				return 0;
196			}
197			utf8 = newUTF8;
198			memcpy(offset, buffer, bytes);
199			offset += bytes;
200		}
201	}
202
203	char* newUTF8 = realloc(utf8, utf8Length + 1);
204	if (!newUTF8) {
205		free(utf8);
206		return 0;
207	}
208	newUTF8[utf8Length] = '\0';
209	return newUTF8;
210}
211
212int hexDigit(char digit) {
213	switch (digit) {
214	case '0':
215	case '1':
216	case '2':
217	case '3':
218	case '4':
219	case '5':
220	case '6':
221	case '7':
222	case '8':
223	case '9':
224		return digit - '0';
225
226	case 'a':
227	case 'b':
228	case 'c':
229	case 'd':
230	case 'e':
231	case 'f':
232		return digit - 'a' + 10;
233
234	case 'A':
235	case 'B':
236	case 'C':
237	case 'D':
238	case 'E':
239	case 'F':
240		return digit - 'A' + 10;
241
242	default:
243		return -1;
244	}
245}
246
247const char* hex32(const char* line, uint32_t* out) {
248	uint32_t value = 0;
249	int i;
250	for (i = 0; i < 8; ++i, ++line) {
251		char digit = *line;
252		value <<= 4;
253		int nybble = hexDigit(digit);
254		if (nybble < 0) {
255			return 0;
256		}
257		value |= nybble;
258	}
259	*out = value;
260	return line;
261}
262
263const char* hex24(const char* line, uint32_t* out) {
264	uint32_t value = 0;
265	int i;
266	for (i = 0; i < 6; ++i, ++line) {
267		char digit = *line;
268		value <<= 4;
269		int nybble = hexDigit(digit);
270		if (nybble < 0) {
271			return 0;
272		}
273		value |= nybble;
274	}
275	*out = value;
276	return line;
277}
278
279const char* hex16(const char* line, uint16_t* out) {
280	uint16_t value = 0;
281	*out = 0;
282	int i;
283	for (i = 0; i < 4; ++i, ++line) {
284		char digit = *line;
285		value <<= 4;
286		int nybble = hexDigit(digit);
287		if (nybble < 0) {
288			return 0;
289		}
290		value |= nybble;
291	}
292	*out = value;
293	return line;
294}
295
296const char* hex12(const char* line, uint16_t* out) {
297	uint16_t value = 0;
298	*out = 0;
299	int i;
300	for (i = 0; i < 3; ++i, ++line) {
301		char digit = *line;
302		value <<= 4;
303		int nybble = hexDigit(digit);
304		if (nybble < 0) {
305			return 0;
306		}
307		value |= nybble;
308	}
309	*out = value;
310	return line;
311}
312
313const char* hex8(const char* line, uint8_t* out) {
314	uint8_t value = 0;
315	*out = 0;
316	int i;
317	for (i = 0; i < 2; ++i, ++line) {
318		char digit = *line;
319		value <<= 4;
320		int nybble = hexDigit(digit);
321		if (nybble < 0) {
322			return 0;
323		}
324		value |= nybble;
325	}
326	*out = value;
327	return line;
328}
329
330const char* hex4(const char* line, uint8_t* out) {
331	uint8_t value = 0;
332	*out = 0;
333	char digit = *line;
334	value <<= 4;
335	int nybble = hexDigit(digit);
336	if (nybble < 0) {
337		return 0;
338	}
339	value |= nybble;
340	*out = value;
341	return line;
342}