all repos — mgba @ f491196bc4fcc6c5c51c1fecff1db78b2070bb81

mGBA Game Boy Advance Emulator

src/util/string.c (view raw)

  1/* Copyright (c) 2013-2015 Jeffrey Pfau
  2 *
  3 * This Source Code Form is subject to the terms of the Mozilla Public
  4 * License, v. 2.0. If a copy of the MPL was not distributed with this
  5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6#include "util/string.h"
  7
  8#include <string.h>
  9
 10#ifndef HAVE_STRNDUP
 11char* strndup(const char* start, size_t len) {
 12	// This is suboptimal, but anything recent should have strndup
 13	char* out = malloc((len + 1) * sizeof(char));
 14	strncpy(out, start, len);
 15	out[len] = '\0';
 16	return out;
 17}
 18#endif
 19
 20#ifndef HAVE_STRDUP
 21char* strdup(const char* str) {
 22	size_t len = strlen(str);
 23	char* out = malloc(len + 1);
 24	strncpy(out, str, len);
 25	out[len] = '\0';
 26	return out;
 27}
 28#endif
 29
 30char* strnrstr(const char* restrict haystack, const char* restrict needle, size_t len) {
 31	char* last = 0;
 32	const char* next = haystack;
 33	size_t needleLen = strlen(needle);
 34	for (; len >= needleLen; --len, ++next) {
 35		if (strncmp(needle, next, needleLen) == 0) {
 36			last = (char*) next;
 37		}
 38	}
 39	return last;
 40}
 41
 42bool endswith(const char* restrict s1, const char* restrict end) {
 43	size_t len = strlen(s1);
 44	size_t endLen = strlen(end);
 45	if (len < endLen) {
 46		return false;
 47	}
 48	return strcmp(&s1[len - endLen], end) == 0;
 49}
 50
 51uint32_t utf16Char(const uint16_t** unicode, size_t* length) {
 52	if (*length < 2) {
 53		*length = 0;
 54		return 0;
 55	}
 56	uint32_t unichar = **unicode;
 57	++*unicode;
 58	*length -= 2;
 59	if (unichar < 0xD800 || unichar >= 0xE000) {
 60		return unichar;
 61	}
 62	if (*length < 2) {
 63		*length = 0;
 64		return 0;
 65	}
 66	uint16_t highSurrogate = unichar;
 67	uint16_t lowSurrogate = **unicode;
 68	++*unicode;
 69	*length -= 2;
 70	if (highSurrogate >= 0xDC00) {
 71		return 0;
 72	}
 73	if (lowSurrogate < 0xDC00 || lowSurrogate >= 0xE000) {
 74		return 0;
 75	}
 76	highSurrogate -= 0xD800;
 77	lowSurrogate -= 0xDC00;
 78	return (highSurrogate << 10) + lowSurrogate + 0x10000;
 79}
 80
 81uint32_t utf8Char(const char** unicode, size_t* length) {
 82	if (*length == 0) {
 83		return 0;
 84	}
 85	char byte = **unicode;
 86	--*length;
 87	++*unicode;
 88	if (!(byte & 0x80)) {
 89		return byte;
 90	}
 91	uint32_t unichar;
 92	static const int tops[4] = { 0xC0, 0xE0, 0xF0, 0xF8 };
 93	size_t numBytes;
 94	for (numBytes = 0; numBytes < 3; ++numBytes) {
 95		if ((byte & tops[numBytes + 1]) == tops[numBytes]) {
 96			break;
 97		}
 98	}
 99	unichar = byte & ~tops[numBytes];
100	if (numBytes == 3) {
101		return 0;
102	}
103	++numBytes;
104	if (*length < numBytes) {
105		*length = 0;
106		return 0;
107	}
108	size_t i;
109	for (i = 0; i < numBytes; ++i) {
110		unichar <<= 6;
111		byte = **unicode;
112		--*length;
113		++*unicode;
114		if ((byte & 0xC0) != 0x80) {
115			return 0;
116		}
117		unichar |= byte & 0x3F;
118	}
119	return unichar;
120}
121
122size_t toUtf8(uint32_t unichar, char* buffer) {
123	if (unichar > 0x10FFFF) {
124		unichar = 0xFFFD;
125	}
126	if (unichar < 0x80) {
127		buffer[0] = unichar;
128		return 1;
129	}
130	if (unichar < 0x800) {
131		buffer[0] = (unichar >> 6) | 0xC0;
132		buffer[1] = (unichar & 0x3F) | 0x80;
133		return 2;
134	}
135	if (unichar < 0x10000) {
136		buffer[0] = (unichar >> 12) | 0xE0;
137		buffer[1] = ((unichar >> 6) & 0x3F) | 0x80;
138		buffer[2] = (unichar & 0x3F) | 0x80;
139		return 3;
140	}
141	if (unichar < 0x200000) {
142		buffer[0] = (unichar >> 18) | 0xF0;
143		buffer[1] = ((unichar >> 12) & 0x3F) | 0x80;
144		buffer[2] = ((unichar >> 6) & 0x3F) | 0x80;
145		buffer[3] = (unichar & 0x3F) | 0x80;
146		return 4;
147	}
148
149	// This shouldn't be possible
150	return 0;
151}
152
153int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t utf8Length) {
154	uint32_t char1 = 0, char2 = 0;
155	while (utf16Length > 0 && utf8Length > 0) {
156		if (char1 < char2) {
157			return -1;
158		}
159		if (char1 > char2) {
160			return 1;
161		}
162		char1 = utf16Char(&utf16, &utf16Length);
163		char2 = utf8Char(&utf8, &utf8Length);
164	}
165	if (utf16Length == 0 && utf8Length > 0) {
166		return -1;
167	}
168	if (utf16Length > 0 && utf8Length == 0) {
169		return 1;
170	}
171	return 0;
172}
173
174char* utf16to8(const uint16_t* utf16, size_t length) {
175	char* utf8 = 0;
176	char* offset = 0;
177	char buffer[4];
178	size_t utf8TotalBytes = 0;
179	size_t utf8Length = 0;
180	while (true) {
181		if (length == 0) {
182			break;
183		}
184		uint32_t unichar = utf16Char(&utf16, &length);
185		size_t bytes = toUtf8(unichar, buffer);
186		utf8Length += bytes;
187		if (utf8Length < utf8TotalBytes) {
188			memcpy(offset, buffer, bytes);
189			offset += bytes;
190		} else if (!utf8) {
191			utf8 = malloc(length);
192			if (!utf8) {
193				return 0;
194			}
195			utf8TotalBytes = length;
196			memcpy(utf8, buffer, bytes);
197			offset = utf8 + bytes;
198		} else if (utf8Length >= utf8TotalBytes) {
199			ptrdiff_t o = offset - utf8;
200			char* newUTF8 = realloc(utf8, utf8TotalBytes * 2);
201			offset = o + newUTF8;
202			if (!newUTF8) {
203				free(utf8);
204				return 0;
205			}
206			utf8 = newUTF8;
207			memcpy(offset, buffer, bytes);
208			offset += bytes;
209		}
210	}
211
212	char* newUTF8 = realloc(utf8, utf8Length + 1);
213	if (!newUTF8) {
214		free(utf8);
215		return 0;
216	}
217	newUTF8[utf8Length] = '\0';
218	return newUTF8;
219}
220
221int hexDigit(char digit) {
222	switch (digit) {
223	case '0':
224	case '1':
225	case '2':
226	case '3':
227	case '4':
228	case '5':
229	case '6':
230	case '7':
231	case '8':
232	case '9':
233		return digit - '0';
234
235	case 'a':
236	case 'b':
237	case 'c':
238	case 'd':
239	case 'e':
240	case 'f':
241		return digit - 'a' + 10;
242
243	case 'A':
244	case 'B':
245	case 'C':
246	case 'D':
247	case 'E':
248	case 'F':
249		return digit - 'A' + 10;
250
251	default:
252		return -1;
253	}
254}
255
256const char* hex32(const char* line, uint32_t* out) {
257	uint32_t value = 0;
258	int i;
259	for (i = 0; i < 8; ++i, ++line) {
260		char digit = *line;
261		value <<= 4;
262		int nybble = hexDigit(digit);
263		if (nybble < 0) {
264			return 0;
265		}
266		value |= nybble;
267	}
268	*out = value;
269	return line;
270}
271
272const char* hex24(const char* line, uint32_t* out) {
273	uint32_t value = 0;
274	int i;
275	for (i = 0; i < 6; ++i, ++line) {
276		char digit = *line;
277		value <<= 4;
278		int nybble = hexDigit(digit);
279		if (nybble < 0) {
280			return 0;
281		}
282		value |= nybble;
283	}
284	*out = value;
285	return line;
286}
287
288const char* hex16(const char* line, uint16_t* out) {
289	uint16_t value = 0;
290	*out = 0;
291	int i;
292	for (i = 0; i < 4; ++i, ++line) {
293		char digit = *line;
294		value <<= 4;
295		int nybble = hexDigit(digit);
296		if (nybble < 0) {
297			return 0;
298		}
299		value |= nybble;
300	}
301	*out = value;
302	return line;
303}
304
305const char* hex12(const char* line, uint16_t* out) {
306	uint16_t value = 0;
307	*out = 0;
308	int i;
309	for (i = 0; i < 3; ++i, ++line) {
310		char digit = *line;
311		value <<= 4;
312		int nybble = hexDigit(digit);
313		if (nybble < 0) {
314			return 0;
315		}
316		value |= nybble;
317	}
318	*out = value;
319	return line;
320}
321
322const char* hex8(const char* line, uint8_t* out) {
323	uint8_t value = 0;
324	*out = 0;
325	int i;
326	for (i = 0; i < 2; ++i, ++line) {
327		char digit = *line;
328		value <<= 4;
329		int nybble = hexDigit(digit);
330		if (nybble < 0) {
331			return 0;
332		}
333		value |= nybble;
334	}
335	*out = value;
336	return line;
337}
338
339const char* hex4(const char* line, uint8_t* out) {
340	uint8_t value = 0;
341	*out = 0;
342	char digit = *line;
343	value <<= 4;
344	int nybble = hexDigit(digit);
345	if (nybble < 0) {
346		return 0;
347	}
348	value |= nybble;
349	*out = value;
350	return line;
351}
352
353void rtrim(char* string) {
354	if (!*string) {
355		return;
356	}
357	char* end = string + strlen(string) - 1;
358	while (isspace((int) *end) && end >= string) {
359		*end = '\0';
360		--end;
361	}
362}