all repos — mgba @ 0c015461027bd5c2ee09c764c4cbbc6a53c4f5e4

mGBA Game Boy Advance Emulator

src/util/string.c (view raw)

  1/* Copyright (c) 2013-2019 Jeffrey Pfau
  2 *
  3 * This Source Code Form is subject to the terms of the Mozilla Public
  4 * License, v. 2.0. If a copy of the MPL was not distributed with this
  5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6#include <mgba-util/string.h>
  7
  8#include <mgba-util/vector.h>
  9
 10#include <string.h>
 11
 12DEFINE_VECTOR(StringList, char*);
 13
 14#ifndef HAVE_STRNDUP
 15char* strndup(const char* start, size_t len) {
 16	// This is suboptimal, but anything recent should have strndup
 17	char* out = malloc((len + 1) * sizeof(char));
 18	strncpy(out, start, len);
 19	out[len] = '\0';
 20	return out;
 21}
 22#endif
 23
 24#ifndef HAVE_STRDUP
 25char* strdup(const char* str) {
 26	size_t len = strlen(str);
 27	char* out = malloc(len + 1);
 28	strncpy(out, str, len);
 29	out[len] = '\0';
 30	return out;
 31}
 32#endif
 33
 34#ifndef HAVE_STRLCPY
 35size_t strlcpy(char* restrict dst, const char* restrict src, size_t dstsize) {
 36	size_t i = 0;
 37	for (; src[i] && dstsize > 1; ++i) {
 38		dst[i] = src[i];
 39		--dstsize;
 40	}
 41	if (dstsize) {
 42		dst[i] = '\0';
 43	}
 44	while (src[i]) {
 45		++i;
 46	}
 47	return i;
 48}
 49#endif
 50
 51char* strnrstr(const char* restrict haystack, const char* restrict needle, size_t len) {
 52	char* last = 0;
 53	const char* next = haystack;
 54	size_t needleLen = strlen(needle);
 55	for (; len >= needleLen; --len, ++next) {
 56		if (strncmp(needle, next, needleLen) == 0) {
 57			last = (char*) next;
 58		}
 59	}
 60	return last;
 61}
 62
 63bool endswith(const char* restrict s1, const char* restrict end) {
 64	size_t len = strlen(s1);
 65	size_t endLen = strlen(end);
 66	if (len < endLen) {
 67		return false;
 68	}
 69	return strcmp(&s1[len - endLen], end) == 0;
 70}
 71
 72bool startswith(const char* restrict s1, const char* restrict start) {
 73	size_t len = strlen(s1);
 74	size_t startLen = strlen(start);
 75	if (len < startLen) {
 76		return false;
 77	}
 78	return strncmp(s1, start, startLen) == 0;
 79}
 80
 81uint32_t utf16Char(const uint16_t** unicode, size_t* length) {
 82	if (*length < 2) {
 83		*length = 0;
 84		return 0;
 85	}
 86	uint32_t unichar = **unicode;
 87	++*unicode;
 88	*length -= 2;
 89	if (unichar < 0xD800 || unichar >= 0xE000) {
 90		return unichar;
 91	}
 92	if (*length < 2) {
 93		*length = 0;
 94		return 0;
 95	}
 96	uint16_t highSurrogate = unichar;
 97	uint16_t lowSurrogate = **unicode;
 98	++*unicode;
 99	*length -= 2;
100	if (highSurrogate >= 0xDC00) {
101		return 0;
102	}
103	if (lowSurrogate < 0xDC00 || lowSurrogate >= 0xE000) {
104		return 0;
105	}
106	highSurrogate -= 0xD800;
107	lowSurrogate -= 0xDC00;
108	return (highSurrogate << 10) + lowSurrogate + 0x10000;
109}
110
111uint32_t utf8Char(const char** unicode, size_t* length) {
112	if (*length == 0) {
113		return 0;
114	}
115	char byte = **unicode;
116	--*length;
117	++*unicode;
118	if (!(byte & 0x80)) {
119		return byte;
120	}
121	uint32_t unichar;
122	static const int tops[4] = { 0xC0, 0xE0, 0xF0, 0xF8 };
123	size_t numBytes;
124	for (numBytes = 0; numBytes < 3; ++numBytes) {
125		if ((byte & tops[numBytes + 1]) == tops[numBytes]) {
126			break;
127		}
128	}
129	unichar = byte & ~tops[numBytes];
130	if (numBytes == 3) {
131		return 0;
132	}
133	++numBytes;
134	if (*length < numBytes) {
135		*length = 0;
136		return 0;
137	}
138	size_t i;
139	for (i = 0; i < numBytes; ++i) {
140		unichar <<= 6;
141		byte = **unicode;
142		--*length;
143		++*unicode;
144		if ((byte & 0xC0) != 0x80) {
145			return 0;
146		}
147		unichar |= byte & 0x3F;
148	}
149	return unichar;
150}
151
152size_t toUtf8(uint32_t unichar, char* buffer) {
153	if (unichar > 0x10FFFF) {
154		unichar = 0xFFFD;
155	}
156	if (unichar < 0x80) {
157		buffer[0] = unichar;
158		return 1;
159	}
160	if (unichar < 0x800) {
161		buffer[0] = (unichar >> 6) | 0xC0;
162		buffer[1] = (unichar & 0x3F) | 0x80;
163		return 2;
164	}
165	if (unichar < 0x10000) {
166		buffer[0] = (unichar >> 12) | 0xE0;
167		buffer[1] = ((unichar >> 6) & 0x3F) | 0x80;
168		buffer[2] = (unichar & 0x3F) | 0x80;
169		return 3;
170	}
171	if (unichar < 0x200000) {
172		buffer[0] = (unichar >> 18) | 0xF0;
173		buffer[1] = ((unichar >> 12) & 0x3F) | 0x80;
174		buffer[2] = ((unichar >> 6) & 0x3F) | 0x80;
175		buffer[3] = (unichar & 0x3F) | 0x80;
176		return 4;
177	}
178
179	// This shouldn't be possible
180	return 0;
181}
182
183int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t utf8Length) {
184	uint32_t char1 = 0, char2 = 0;
185	while (utf16Length > 0 && utf8Length > 0) {
186		if (char1 < char2) {
187			return -1;
188		}
189		if (char1 > char2) {
190			return 1;
191		}
192		char1 = utf16Char(&utf16, &utf16Length);
193		char2 = utf8Char(&utf8, &utf8Length);
194	}
195	if (utf16Length == 0 && utf8Length > 0) {
196		return -1;
197	}
198	if (utf16Length > 0 && utf8Length == 0) {
199		return 1;
200	}
201	return 0;
202}
203
204char* utf16to8(const uint16_t* utf16, size_t length) {
205	char* utf8 = 0;
206	char* offset = 0;
207	char buffer[4];
208	size_t utf8TotalBytes = 0;
209	size_t utf8Length = 0;
210	while (true) {
211		if (length == 0) {
212			break;
213		}
214		uint32_t unichar = utf16Char(&utf16, &length);
215		size_t bytes = toUtf8(unichar, buffer);
216		utf8Length += bytes;
217		if (utf8Length < utf8TotalBytes) {
218			memcpy(offset, buffer, bytes);
219			offset += bytes;
220		} else if (!utf8) {
221			utf8 = malloc(length);
222			if (!utf8) {
223				return 0;
224			}
225			utf8TotalBytes = length;
226			memcpy(utf8, buffer, bytes);
227			offset = utf8 + bytes;
228		} else if (utf8Length >= utf8TotalBytes) {
229			ptrdiff_t o = offset - utf8;
230			char* newUTF8 = realloc(utf8, utf8TotalBytes * 2);
231			offset = o + newUTF8;
232			if (!newUTF8) {
233				free(utf8);
234				return 0;
235			}
236			utf8 = newUTF8;
237			memcpy(offset, buffer, bytes);
238			offset += bytes;
239		}
240	}
241
242	char* newUTF8 = realloc(utf8, utf8Length + 1);
243	if (!newUTF8) {
244		free(utf8);
245		return 0;
246	}
247	newUTF8[utf8Length] = '\0';
248	return newUTF8;
249}
250
251extern const uint16_t gbkUnicodeTable[];
252
253char* gbkToUtf8(const char* gbk, size_t length) {
254	char* utf8 = NULL;
255	char* utf8Offset = NULL;
256	size_t offset;
257	uint8_t gbk1 = 0;
258	char buffer[4];
259	size_t utf8TotalBytes = 0;
260	size_t utf8Length = 0;
261	for (offset = 0; offset < length; ++offset) {
262		if (length == 0) {
263			break;
264		}
265		unsigned unichar = 0xFFFD;
266		if (!gbk1 && !(gbk[offset] & 0x80)) {
267			unichar = gbk[offset];
268		} else if (gbk1) {
269			uint8_t gbk2 = gbk[offset];
270			if (gbk2 >= 0x40 && gbk2 != 0xFF) {
271				// TODO: GB-18030 support?
272				unichar = gbkUnicodeTable[gbk1 * 0xBF + gbk2 - 0x40];
273			}
274			gbk1 = 0;
275		} else if (((uint8_t*) gbk)[offset] == 0xFF) {
276			unichar = 0xFFFD;
277		} else if (((uint8_t*) gbk)[offset] == 0x80) {
278			unichar = 0x20AC; // Euro
279		} else {
280			gbk1 = ((uint8_t*) gbk)[offset] - 0x81;
281			continue;
282		}
283
284		size_t bytes = toUtf8(unichar, buffer);
285		utf8Length += bytes;
286		if (!utf8) {
287			utf8 = malloc(length);
288			if (!utf8) {
289				return NULL;
290			}
291			utf8TotalBytes = length;
292			memcpy(utf8, buffer, bytes);
293			utf8Offset = utf8 + bytes;
294		} else if (utf8Length < utf8TotalBytes) {
295			memcpy(utf8Offset, buffer, bytes);
296			utf8Offset += bytes;
297		} else if (utf8Length >= utf8TotalBytes) {
298			ptrdiff_t o = utf8Offset - utf8;
299			char* newUTF8 = realloc(utf8, utf8TotalBytes * 2);
300			utf8Offset = o + newUTF8;
301			if (!newUTF8) {
302				free(utf8);
303				return 0;
304			}
305			utf8 = newUTF8;
306			memcpy(utf8Offset, buffer, bytes);
307			utf8Offset += bytes;
308		}
309	}
310
311	char* newUTF8 = realloc(utf8, utf8Length + 1);
312	if (!newUTF8) {
313		free(utf8);
314		return 0;
315	}
316	newUTF8[utf8Length] = '\0';
317	return newUTF8;
318}
319
320int hexDigit(char digit) {
321	switch (digit) {
322	case '0':
323	case '1':
324	case '2':
325	case '3':
326	case '4':
327	case '5':
328	case '6':
329	case '7':
330	case '8':
331	case '9':
332		return digit - '0';
333
334	case 'a':
335	case 'b':
336	case 'c':
337	case 'd':
338	case 'e':
339	case 'f':
340		return digit - 'a' + 10;
341
342	case 'A':
343	case 'B':
344	case 'C':
345	case 'D':
346	case 'E':
347	case 'F':
348		return digit - 'A' + 10;
349
350	default:
351		return -1;
352	}
353}
354
355const char* hex32(const char* line, uint32_t* out) {
356	uint32_t value = 0;
357	int i;
358	for (i = 0; i < 8; ++i, ++line) {
359		char digit = *line;
360		value <<= 4;
361		int nybble = hexDigit(digit);
362		if (nybble < 0) {
363			return 0;
364		}
365		value |= nybble;
366	}
367	*out = value;
368	return line;
369}
370
371const char* hex24(const char* line, uint32_t* out) {
372	uint32_t value = 0;
373	int i;
374	for (i = 0; i < 6; ++i, ++line) {
375		char digit = *line;
376		value <<= 4;
377		int nybble = hexDigit(digit);
378		if (nybble < 0) {
379			return 0;
380		}
381		value |= nybble;
382	}
383	*out = value;
384	return line;
385}
386
387const char* hex16(const char* line, uint16_t* out) {
388	uint16_t value = 0;
389	*out = 0;
390	int i;
391	for (i = 0; i < 4; ++i, ++line) {
392		char digit = *line;
393		value <<= 4;
394		int nybble = hexDigit(digit);
395		if (nybble < 0) {
396			return 0;
397		}
398		value |= nybble;
399	}
400	*out = value;
401	return line;
402}
403
404const char* hex12(const char* line, uint16_t* out) {
405	uint16_t value = 0;
406	*out = 0;
407	int i;
408	for (i = 0; i < 3; ++i, ++line) {
409		char digit = *line;
410		value <<= 4;
411		int nybble = hexDigit(digit);
412		if (nybble < 0) {
413			return 0;
414		}
415		value |= nybble;
416	}
417	*out = value;
418	return line;
419}
420
421const char* hex8(const char* line, uint8_t* out) {
422	uint8_t value = 0;
423	*out = 0;
424	int i;
425	for (i = 0; i < 2; ++i, ++line) {
426		char digit = *line;
427		value <<= 4;
428		int nybble = hexDigit(digit);
429		if (nybble < 0) {
430			return 0;
431		}
432		value |= nybble;
433	}
434	*out = value;
435	return line;
436}
437
438const char* hex4(const char* line, uint8_t* out) {
439	uint8_t value = 0;
440	*out = 0;
441	char digit = *line;
442	int nybble = hexDigit(digit);
443	if (nybble < 0) {
444		return 0;
445	}
446	value |= nybble;
447	*out = value;
448	return line;
449}
450
451void rtrim(char* string) {
452	if (!*string) {
453		return;
454	}
455	char* end = string + strlen(string) - 1;
456	while (isspace((int) *end) && end >= string) {
457		*end = '\0';
458		--end;
459	}
460}
461
462ssize_t parseQuotedString(const char* unparsed, ssize_t unparsedLen, char* parsed, ssize_t parsedLen) {
463	memset(parsed, 0, parsedLen);
464	bool escaped = false;
465	char start = '\0';
466	ssize_t len = 0;
467	ssize_t i;
468	for (i = 0; i < unparsedLen && len < parsedLen; ++i) {
469		if (i == 0) {
470			switch (unparsed[0]) {
471			case '"':
472			case '\'':
473				start = unparsed[0];
474				break;
475			default:
476				return -1;
477			}
478			continue;
479		}
480		if (escaped) {
481			switch (unparsed[i]) {
482			case 'n':
483				parsed[len] = '\n';
484				break;
485			case 'r':
486				parsed[len] = '\r';
487				break;
488			case '\\':
489				parsed[len] = '\\';
490				break;
491			case '\'':
492				parsed[len] = '\'';
493				break;
494			case '"':
495				parsed[len] = '"';
496				break;
497			default:
498				return -1;
499			}
500			escaped = false;
501			++len;
502			continue;
503		}
504		if (unparsed[i] == start) {
505			return len;
506		}
507		switch (unparsed[i]) {
508		case '\\':
509			escaped = true;
510			break;
511		case '\n':
512		case '\r':
513			return len;
514		default:
515			parsed[len] = unparsed[i];
516			++len;
517			break;
518		}
519	}
520	return -1;
521}