all repos — mgba @ 7dc9eb4d5641d5980a2cc0eb8f1e459b4bfdfb4a

mGBA Game Boy Advance Emulator

src/util/string.c (view raw)

  1/* Copyright (c) 2013-2015 Jeffrey Pfau
  2 *
  3 * This Source Code Form is subject to the terms of the Mozilla Public
  4 * License, v. 2.0. If a copy of the MPL was not distributed with this
  5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6#include <mgba-util/string.h>
  7
  8#include <string.h>
  9
 10#ifndef HAVE_STRNDUP
 11char* strndup(const char* start, size_t len) {
 12	// This is suboptimal, but anything recent should have strndup
 13	char* out = malloc((len + 1) * sizeof(char));
 14	strncpy(out, start, len);
 15	out[len] = '\0';
 16	return out;
 17}
 18#endif
 19
 20#ifndef HAVE_STRDUP
 21char* strdup(const char* str) {
 22	size_t len = strlen(str);
 23	char* out = malloc(len + 1);
 24	strncpy(out, str, len);
 25	out[len] = '\0';
 26	return out;
 27}
 28#endif
 29
 30char* strnrstr(const char* restrict haystack, const char* restrict needle, size_t len) {
 31	char* last = 0;
 32	const char* next = haystack;
 33	size_t needleLen = strlen(needle);
 34	for (; len >= needleLen; --len, ++next) {
 35		if (strncmp(needle, next, needleLen) == 0) {
 36			last = (char*) next;
 37		}
 38	}
 39	return last;
 40}
 41
 42bool endswith(const char* restrict s1, const char* restrict end) {
 43	size_t len = strlen(s1);
 44	size_t endLen = strlen(end);
 45	if (len < endLen) {
 46		return false;
 47	}
 48	return strcmp(&s1[len - endLen], end) == 0;
 49}
 50
 51bool startswith(const char* restrict s1, const char* restrict start) {
 52	size_t len = strlen(s1);
 53	size_t startLen = strlen(start);
 54	if (len < startLen) {
 55		return false;
 56	}
 57	return strncmp(s1, start, startLen) == 0;
 58}
 59
 60uint32_t utf16Char(const uint16_t** unicode, size_t* length) {
 61	if (*length < 2) {
 62		*length = 0;
 63		return 0;
 64	}
 65	uint32_t unichar = **unicode;
 66	++*unicode;
 67	*length -= 2;
 68	if (unichar < 0xD800 || unichar >= 0xE000) {
 69		return unichar;
 70	}
 71	if (*length < 2) {
 72		*length = 0;
 73		return 0;
 74	}
 75	uint16_t highSurrogate = unichar;
 76	uint16_t lowSurrogate = **unicode;
 77	++*unicode;
 78	*length -= 2;
 79	if (highSurrogate >= 0xDC00) {
 80		return 0;
 81	}
 82	if (lowSurrogate < 0xDC00 || lowSurrogate >= 0xE000) {
 83		return 0;
 84	}
 85	highSurrogate -= 0xD800;
 86	lowSurrogate -= 0xDC00;
 87	return (highSurrogate << 10) + lowSurrogate + 0x10000;
 88}
 89
 90uint32_t utf8Char(const char** unicode, size_t* length) {
 91	if (*length == 0) {
 92		return 0;
 93	}
 94	char byte = **unicode;
 95	--*length;
 96	++*unicode;
 97	if (!(byte & 0x80)) {
 98		return byte;
 99	}
100	uint32_t unichar;
101	static const int tops[4] = { 0xC0, 0xE0, 0xF0, 0xF8 };
102	size_t numBytes;
103	for (numBytes = 0; numBytes < 3; ++numBytes) {
104		if ((byte & tops[numBytes + 1]) == tops[numBytes]) {
105			break;
106		}
107	}
108	unichar = byte & ~tops[numBytes];
109	if (numBytes == 3) {
110		return 0;
111	}
112	++numBytes;
113	if (*length < numBytes) {
114		*length = 0;
115		return 0;
116	}
117	size_t i;
118	for (i = 0; i < numBytes; ++i) {
119		unichar <<= 6;
120		byte = **unicode;
121		--*length;
122		++*unicode;
123		if ((byte & 0xC0) != 0x80) {
124			return 0;
125		}
126		unichar |= byte & 0x3F;
127	}
128	return unichar;
129}
130
131size_t toUtf8(uint32_t unichar, char* buffer) {
132	if (unichar > 0x10FFFF) {
133		unichar = 0xFFFD;
134	}
135	if (unichar < 0x80) {
136		buffer[0] = unichar;
137		return 1;
138	}
139	if (unichar < 0x800) {
140		buffer[0] = (unichar >> 6) | 0xC0;
141		buffer[1] = (unichar & 0x3F) | 0x80;
142		return 2;
143	}
144	if (unichar < 0x10000) {
145		buffer[0] = (unichar >> 12) | 0xE0;
146		buffer[1] = ((unichar >> 6) & 0x3F) | 0x80;
147		buffer[2] = (unichar & 0x3F) | 0x80;
148		return 3;
149	}
150	if (unichar < 0x200000) {
151		buffer[0] = (unichar >> 18) | 0xF0;
152		buffer[1] = ((unichar >> 12) & 0x3F) | 0x80;
153		buffer[2] = ((unichar >> 6) & 0x3F) | 0x80;
154		buffer[3] = (unichar & 0x3F) | 0x80;
155		return 4;
156	}
157
158	// This shouldn't be possible
159	return 0;
160}
161
162int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t utf8Length) {
163	uint32_t char1 = 0, char2 = 0;
164	while (utf16Length > 0 && utf8Length > 0) {
165		if (char1 < char2) {
166			return -1;
167		}
168		if (char1 > char2) {
169			return 1;
170		}
171		char1 = utf16Char(&utf16, &utf16Length);
172		char2 = utf8Char(&utf8, &utf8Length);
173	}
174	if (utf16Length == 0 && utf8Length > 0) {
175		return -1;
176	}
177	if (utf16Length > 0 && utf8Length == 0) {
178		return 1;
179	}
180	return 0;
181}
182
183char* utf16to8(const uint16_t* utf16, size_t length) {
184	char* utf8 = 0;
185	char* offset = 0;
186	char buffer[4];
187	size_t utf8TotalBytes = 0;
188	size_t utf8Length = 0;
189	while (true) {
190		if (length == 0) {
191			break;
192		}
193		uint32_t unichar = utf16Char(&utf16, &length);
194		size_t bytes = toUtf8(unichar, buffer);
195		utf8Length += bytes;
196		if (utf8Length < utf8TotalBytes) {
197			memcpy(offset, buffer, bytes);
198			offset += bytes;
199		} else if (!utf8) {
200			utf8 = malloc(length);
201			if (!utf8) {
202				return 0;
203			}
204			utf8TotalBytes = length;
205			memcpy(utf8, buffer, bytes);
206			offset = utf8 + bytes;
207		} else if (utf8Length >= utf8TotalBytes) {
208			ptrdiff_t o = offset - utf8;
209			char* newUTF8 = realloc(utf8, utf8TotalBytes * 2);
210			offset = o + newUTF8;
211			if (!newUTF8) {
212				free(utf8);
213				return 0;
214			}
215			utf8 = newUTF8;
216			memcpy(offset, buffer, bytes);
217			offset += bytes;
218		}
219	}
220
221	char* newUTF8 = realloc(utf8, utf8Length + 1);
222	if (!newUTF8) {
223		free(utf8);
224		return 0;
225	}
226	newUTF8[utf8Length] = '\0';
227	return newUTF8;
228}
229
230int hexDigit(char digit) {
231	switch (digit) {
232	case '0':
233	case '1':
234	case '2':
235	case '3':
236	case '4':
237	case '5':
238	case '6':
239	case '7':
240	case '8':
241	case '9':
242		return digit - '0';
243
244	case 'a':
245	case 'b':
246	case 'c':
247	case 'd':
248	case 'e':
249	case 'f':
250		return digit - 'a' + 10;
251
252	case 'A':
253	case 'B':
254	case 'C':
255	case 'D':
256	case 'E':
257	case 'F':
258		return digit - 'A' + 10;
259
260	default:
261		return -1;
262	}
263}
264
265const char* hex32(const char* line, uint32_t* out) {
266	uint32_t value = 0;
267	int i;
268	for (i = 0; i < 8; ++i, ++line) {
269		char digit = *line;
270		value <<= 4;
271		int nybble = hexDigit(digit);
272		if (nybble < 0) {
273			return 0;
274		}
275		value |= nybble;
276	}
277	*out = value;
278	return line;
279}
280
281const char* hex24(const char* line, uint32_t* out) {
282	uint32_t value = 0;
283	int i;
284	for (i = 0; i < 6; ++i, ++line) {
285		char digit = *line;
286		value <<= 4;
287		int nybble = hexDigit(digit);
288		if (nybble < 0) {
289			return 0;
290		}
291		value |= nybble;
292	}
293	*out = value;
294	return line;
295}
296
297const char* hex16(const char* line, uint16_t* out) {
298	uint16_t value = 0;
299	*out = 0;
300	int i;
301	for (i = 0; i < 4; ++i, ++line) {
302		char digit = *line;
303		value <<= 4;
304		int nybble = hexDigit(digit);
305		if (nybble < 0) {
306			return 0;
307		}
308		value |= nybble;
309	}
310	*out = value;
311	return line;
312}
313
314const char* hex12(const char* line, uint16_t* out) {
315	uint16_t value = 0;
316	*out = 0;
317	int i;
318	for (i = 0; i < 3; ++i, ++line) {
319		char digit = *line;
320		value <<= 4;
321		int nybble = hexDigit(digit);
322		if (nybble < 0) {
323			return 0;
324		}
325		value |= nybble;
326	}
327	*out = value;
328	return line;
329}
330
331const char* hex8(const char* line, uint8_t* out) {
332	uint8_t value = 0;
333	*out = 0;
334	int i;
335	for (i = 0; i < 2; ++i, ++line) {
336		char digit = *line;
337		value <<= 4;
338		int nybble = hexDigit(digit);
339		if (nybble < 0) {
340			return 0;
341		}
342		value |= nybble;
343	}
344	*out = value;
345	return line;
346}
347
348const char* hex4(const char* line, uint8_t* out) {
349	uint8_t value = 0;
350	*out = 0;
351	char digit = *line;
352	value <<= 4;
353	int nybble = hexDigit(digit);
354	if (nybble < 0) {
355		return 0;
356	}
357	value |= nybble;
358	*out = value;
359	return line;
360}
361
362void rtrim(char* string) {
363	if (!*string) {
364		return;
365	}
366	char* end = string + strlen(string) - 1;
367	while (isspace((int) *end) && end >= string) {
368		*end = '\0';
369		--end;
370	}
371}