all repos — mgba @ bf716b947a113056d315ee75802e40c6a5df3d65

mGBA Game Boy Advance Emulator

src/util/string.c (view raw)

  1/* Copyright (c) 2013-2015 Jeffrey Pfau
  2 *
  3 * This Source Code Form is subject to the terms of the Mozilla Public
  4 * License, v. 2.0. If a copy of the MPL was not distributed with this
  5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6#include <mgba-util/string.h>
  7
  8#include <mgba-util/vector.h>
  9
 10#include <string.h>
 11
 12DEFINE_VECTOR(StringList, char*);
 13
 14#ifndef HAVE_STRNDUP
 15char* strndup(const char* start, size_t len) {
 16	// This is suboptimal, but anything recent should have strndup
 17	char* out = malloc((len + 1) * sizeof(char));
 18	strncpy(out, start, len);
 19	out[len] = '\0';
 20	return out;
 21}
 22#endif
 23
 24#ifndef HAVE_STRDUP
 25char* strdup(const char* str) {
 26	size_t len = strlen(str);
 27	char* out = malloc(len + 1);
 28	strncpy(out, str, len);
 29	out[len] = '\0';
 30	return out;
 31}
 32#endif
 33
 34char* strnrstr(const char* restrict haystack, const char* restrict needle, size_t len) {
 35	char* last = 0;
 36	const char* next = haystack;
 37	size_t needleLen = strlen(needle);
 38	for (; len >= needleLen; --len, ++next) {
 39		if (strncmp(needle, next, needleLen) == 0) {
 40			last = (char*) next;
 41		}
 42	}
 43	return last;
 44}
 45
 46bool endswith(const char* restrict s1, const char* restrict end) {
 47	size_t len = strlen(s1);
 48	size_t endLen = strlen(end);
 49	if (len < endLen) {
 50		return false;
 51	}
 52	return strcmp(&s1[len - endLen], end) == 0;
 53}
 54
 55bool startswith(const char* restrict s1, const char* restrict start) {
 56	size_t len = strlen(s1);
 57	size_t startLen = strlen(start);
 58	if (len < startLen) {
 59		return false;
 60	}
 61	return strncmp(s1, start, startLen) == 0;
 62}
 63
 64uint32_t utf16Char(const uint16_t** unicode, size_t* length) {
 65	if (*length < 2) {
 66		*length = 0;
 67		return 0;
 68	}
 69	uint32_t unichar = **unicode;
 70	++*unicode;
 71	*length -= 2;
 72	if (unichar < 0xD800 || unichar >= 0xE000) {
 73		return unichar;
 74	}
 75	if (*length < 2) {
 76		*length = 0;
 77		return 0;
 78	}
 79	uint16_t highSurrogate = unichar;
 80	uint16_t lowSurrogate = **unicode;
 81	++*unicode;
 82	*length -= 2;
 83	if (highSurrogate >= 0xDC00) {
 84		return 0;
 85	}
 86	if (lowSurrogate < 0xDC00 || lowSurrogate >= 0xE000) {
 87		return 0;
 88	}
 89	highSurrogate -= 0xD800;
 90	lowSurrogate -= 0xDC00;
 91	return (highSurrogate << 10) + lowSurrogate + 0x10000;
 92}
 93
 94uint32_t utf8Char(const char** unicode, size_t* length) {
 95	if (*length == 0) {
 96		return 0;
 97	}
 98	char byte = **unicode;
 99	--*length;
100	++*unicode;
101	if (!(byte & 0x80)) {
102		return byte;
103	}
104	uint32_t unichar;
105	static const int tops[4] = { 0xC0, 0xE0, 0xF0, 0xF8 };
106	size_t numBytes;
107	for (numBytes = 0; numBytes < 3; ++numBytes) {
108		if ((byte & tops[numBytes + 1]) == tops[numBytes]) {
109			break;
110		}
111	}
112	unichar = byte & ~tops[numBytes];
113	if (numBytes == 3) {
114		return 0;
115	}
116	++numBytes;
117	if (*length < numBytes) {
118		*length = 0;
119		return 0;
120	}
121	size_t i;
122	for (i = 0; i < numBytes; ++i) {
123		unichar <<= 6;
124		byte = **unicode;
125		--*length;
126		++*unicode;
127		if ((byte & 0xC0) != 0x80) {
128			return 0;
129		}
130		unichar |= byte & 0x3F;
131	}
132	return unichar;
133}
134
135size_t toUtf8(uint32_t unichar, char* buffer) {
136	if (unichar > 0x10FFFF) {
137		unichar = 0xFFFD;
138	}
139	if (unichar < 0x80) {
140		buffer[0] = unichar;
141		return 1;
142	}
143	if (unichar < 0x800) {
144		buffer[0] = (unichar >> 6) | 0xC0;
145		buffer[1] = (unichar & 0x3F) | 0x80;
146		return 2;
147	}
148	if (unichar < 0x10000) {
149		buffer[0] = (unichar >> 12) | 0xE0;
150		buffer[1] = ((unichar >> 6) & 0x3F) | 0x80;
151		buffer[2] = (unichar & 0x3F) | 0x80;
152		return 3;
153	}
154	if (unichar < 0x200000) {
155		buffer[0] = (unichar >> 18) | 0xF0;
156		buffer[1] = ((unichar >> 12) & 0x3F) | 0x80;
157		buffer[2] = ((unichar >> 6) & 0x3F) | 0x80;
158		buffer[3] = (unichar & 0x3F) | 0x80;
159		return 4;
160	}
161
162	// This shouldn't be possible
163	return 0;
164}
165
166int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t utf8Length) {
167	uint32_t char1 = 0, char2 = 0;
168	while (utf16Length > 0 && utf8Length > 0) {
169		if (char1 < char2) {
170			return -1;
171		}
172		if (char1 > char2) {
173			return 1;
174		}
175		char1 = utf16Char(&utf16, &utf16Length);
176		char2 = utf8Char(&utf8, &utf8Length);
177	}
178	if (utf16Length == 0 && utf8Length > 0) {
179		return -1;
180	}
181	if (utf16Length > 0 && utf8Length == 0) {
182		return 1;
183	}
184	return 0;
185}
186
187char* utf16to8(const uint16_t* utf16, size_t length) {
188	char* utf8 = 0;
189	char* offset = 0;
190	char buffer[4];
191	size_t utf8TotalBytes = 0;
192	size_t utf8Length = 0;
193	while (true) {
194		if (length == 0) {
195			break;
196		}
197		uint32_t unichar = utf16Char(&utf16, &length);
198		size_t bytes = toUtf8(unichar, buffer);
199		utf8Length += bytes;
200		if (utf8Length < utf8TotalBytes) {
201			memcpy(offset, buffer, bytes);
202			offset += bytes;
203		} else if (!utf8) {
204			utf8 = malloc(length);
205			if (!utf8) {
206				return 0;
207			}
208			utf8TotalBytes = length;
209			memcpy(utf8, buffer, bytes);
210			offset = utf8 + bytes;
211		} else if (utf8Length >= utf8TotalBytes) {
212			ptrdiff_t o = offset - utf8;
213			char* newUTF8 = realloc(utf8, utf8TotalBytes * 2);
214			offset = o + newUTF8;
215			if (!newUTF8) {
216				free(utf8);
217				return 0;
218			}
219			utf8 = newUTF8;
220			memcpy(offset, buffer, bytes);
221			offset += bytes;
222		}
223	}
224
225	char* newUTF8 = realloc(utf8, utf8Length + 1);
226	if (!newUTF8) {
227		free(utf8);
228		return 0;
229	}
230	newUTF8[utf8Length] = '\0';
231	return newUTF8;
232}
233
234int hexDigit(char digit) {
235	switch (digit) {
236	case '0':
237	case '1':
238	case '2':
239	case '3':
240	case '4':
241	case '5':
242	case '6':
243	case '7':
244	case '8':
245	case '9':
246		return digit - '0';
247
248	case 'a':
249	case 'b':
250	case 'c':
251	case 'd':
252	case 'e':
253	case 'f':
254		return digit - 'a' + 10;
255
256	case 'A':
257	case 'B':
258	case 'C':
259	case 'D':
260	case 'E':
261	case 'F':
262		return digit - 'A' + 10;
263
264	default:
265		return -1;
266	}
267}
268
269const char* hex32(const char* line, uint32_t* out) {
270	uint32_t value = 0;
271	int i;
272	for (i = 0; i < 8; ++i, ++line) {
273		char digit = *line;
274		value <<= 4;
275		int nybble = hexDigit(digit);
276		if (nybble < 0) {
277			return 0;
278		}
279		value |= nybble;
280	}
281	*out = value;
282	return line;
283}
284
285const char* hex24(const char* line, uint32_t* out) {
286	uint32_t value = 0;
287	int i;
288	for (i = 0; i < 6; ++i, ++line) {
289		char digit = *line;
290		value <<= 4;
291		int nybble = hexDigit(digit);
292		if (nybble < 0) {
293			return 0;
294		}
295		value |= nybble;
296	}
297	*out = value;
298	return line;
299}
300
301const char* hex16(const char* line, uint16_t* out) {
302	uint16_t value = 0;
303	*out = 0;
304	int i;
305	for (i = 0; i < 4; ++i, ++line) {
306		char digit = *line;
307		value <<= 4;
308		int nybble = hexDigit(digit);
309		if (nybble < 0) {
310			return 0;
311		}
312		value |= nybble;
313	}
314	*out = value;
315	return line;
316}
317
318const char* hex12(const char* line, uint16_t* out) {
319	uint16_t value = 0;
320	*out = 0;
321	int i;
322	for (i = 0; i < 3; ++i, ++line) {
323		char digit = *line;
324		value <<= 4;
325		int nybble = hexDigit(digit);
326		if (nybble < 0) {
327			return 0;
328		}
329		value |= nybble;
330	}
331	*out = value;
332	return line;
333}
334
335const char* hex8(const char* line, uint8_t* out) {
336	uint8_t value = 0;
337	*out = 0;
338	int i;
339	for (i = 0; i < 2; ++i, ++line) {
340		char digit = *line;
341		value <<= 4;
342		int nybble = hexDigit(digit);
343		if (nybble < 0) {
344			return 0;
345		}
346		value |= nybble;
347	}
348	*out = value;
349	return line;
350}
351
352const char* hex4(const char* line, uint8_t* out) {
353	uint8_t value = 0;
354	*out = 0;
355	char digit = *line;
356	value <<= 4;
357	int nybble = hexDigit(digit);
358	if (nybble < 0) {
359		return 0;
360	}
361	value |= nybble;
362	*out = value;
363	return line;
364}
365
366void rtrim(char* string) {
367	if (!*string) {
368		return;
369	}
370	char* end = string + strlen(string) - 1;
371	while (isspace((int) *end) && end >= string) {
372		*end = '\0';
373		--end;
374	}
375}