all repos — mgba @ c4aedfa69aff7c8d26e5199b006196f47fe517c3

mGBA Game Boy Advance Emulator

src/util/string.c (view raw)

  1/* Copyright (c) 2013-2015 Jeffrey Pfau
  2 *
  3 * This Source Code Form is subject to the terms of the Mozilla Public
  4 * License, v. 2.0. If a copy of the MPL was not distributed with this
  5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6#include "util/string.h"
  7
  8#include <string.h>
  9
 10#ifndef HAVE_STRNDUP
 11char* strndup(const char* start, size_t len) {
 12	// This is suboptimal, but anything recent should have strndup
 13	char* out = malloc((len + 1) * sizeof(char));
 14	strncpy(out, start, len);
 15	out[len] = '\0';
 16	return out;
 17}
 18#endif
 19
 20char* strnrstr(const char* restrict haystack, const char* restrict needle, size_t len) {
 21	char* last = 0;
 22	const char* next = haystack;
 23	size_t needleLen = strlen(needle);
 24	for (; len >= needleLen; --len, ++next) {
 25		if (strncmp(needle, next, needleLen) == 0) {
 26			last = (char*) next;
 27		}
 28	}
 29	return last;
 30}
 31
 32static uint32_t _utf16Char(const uint16_t** unicode, size_t* length) {
 33	if (*length < 2) {
 34		*length = 0;
 35		return 0;
 36	}
 37	uint32_t unichar = **unicode;
 38	++*unicode;
 39	*length -= 2;
 40	if (unichar < 0xD800 || unichar >= 0xE000) {
 41		return unichar;
 42	}
 43	if (*length < 2) {
 44		*length = 0;
 45		return 0;
 46	}
 47	uint16_t highSurrogate = unichar;
 48	uint16_t lowSurrogate = **unicode;
 49	++*unicode;
 50	*length -= 2;
 51	if (highSurrogate >= 0xDC00) {
 52		return 0;
 53	}
 54	if (lowSurrogate < 0xDC00 || lowSurrogate >= 0xE000) {
 55		return 0;
 56	}
 57	highSurrogate -= 0xD800;
 58	lowSurrogate -= 0xDC00;
 59	return (highSurrogate << 10) + lowSurrogate + 0x10000;
 60}
 61
 62static uint32_t _utf8Char(const char** unicode, size_t* length) {
 63	if (*length == 0) {
 64		return 0;
 65	}
 66	char byte = **unicode;
 67	--*length;
 68	++*unicode;
 69	if (!(byte & 0x80)) {
 70		return byte;
 71	}
 72	uint32_t unichar;
 73	static int tops[4] = { 0xC0, 0xE0, 0xF0, 0xF8 };
 74	size_t numBytes;
 75	for (numBytes = 0; numBytes < 3; ++numBytes) {
 76		if ((byte & tops[numBytes + 1]) == tops[numBytes]) {
 77			break;
 78		}
 79	}
 80	unichar = byte & ~tops[numBytes];
 81	if (numBytes == 3) {
 82		return 0;
 83	}
 84	++numBytes;
 85	if (*length < numBytes) {
 86		*length = 0;
 87		return 0;
 88	}
 89	size_t i;
 90	for (i = 0; i < numBytes; ++i) {
 91		unichar <<= 6;
 92		byte = **unicode;
 93		--*length;
 94		++*unicode;
 95		if ((byte & 0xC0) != 0x80) {
 96			return 0;
 97		}
 98		unichar |= byte & 0x3F;
 99	}
100	return unichar;
101}
102
103static size_t _toUtf8(uint32_t unichar, char* buffer) {
104	if (unichar > 0x10FFFF) {
105		unichar = 0xFFFD;
106	}
107	if (unichar < 0x80) {
108		buffer[0] = unichar;
109		return 1;
110	}
111	if (unichar < 0x800) {
112		buffer[0] = (unichar >> 6) | 0xC0;
113		buffer[1] = (unichar & 0x3F) | 0x80;
114		return 2;
115	}
116	if (unichar < 0x10000) {
117		buffer[0] = (unichar >> 12) | 0xE0;
118		buffer[1] = ((unichar >> 6) & 0x3F) | 0x80;
119		buffer[2] = (unichar & 0x3F) | 0x80;
120		return 3;
121	}
122	if (unichar < 0x200000) {
123		buffer[0] = (unichar >> 18) | 0xF0;
124		buffer[1] = ((unichar >> 12) & 0x3F) | 0x80;
125		buffer[2] = ((unichar >> 6) & 0x3F) | 0x80;
126		buffer[3] = (unichar & 0x3F) | 0x80;
127		return 4;
128	}
129
130	// This shouldn't be possible
131	return 0;
132}
133
134int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t utf8Length) {
135	uint32_t char1 = 0, char2 = 0;
136	while (utf16Length > 0 && utf8Length > 0) {
137		if (char1 < char2) {
138			return -1;
139		}
140		if (char1 > char2) {
141			return 1;
142		}
143		char1 = _utf16Char(&utf16, &utf16Length);
144		char2 = _utf8Char(&utf8, &utf8Length);
145	}
146	if (utf16Length == 0 && utf8Length > 0) {
147		return -1;
148	}
149	if (utf16Length > 0 && utf8Length == 0) {
150		return 1;
151	}
152	return 0;
153}
154
155char* utf16to8(const uint16_t* utf16, size_t length) {
156	char* utf8 = 0;
157	char* offset = 0;
158	char buffer[4];
159	size_t utf8TotalBytes = 0;
160	size_t utf8Length = 0;
161	while (true) {
162		if (length == 0) {
163			break;
164		}
165		uint32_t unichar = _utf16Char(&utf16, &length);
166		size_t bytes = _toUtf8(unichar, buffer);
167		utf8Length += bytes;
168		if (utf8Length < utf8TotalBytes) {
169			memcpy(offset, buffer, bytes);
170			offset += bytes;
171		} else if (!utf8) {
172			utf8 = malloc(length);
173			if (!utf8) {
174				return 0;
175			}
176			utf8TotalBytes = length;
177			memcpy(utf8, buffer, bytes);
178			offset = utf8 + bytes;
179		} else if (utf8Length >= utf8TotalBytes) {
180			char* newUTF8 = realloc(utf8, utf8TotalBytes * 2);
181			if (newUTF8 != utf8) {
182				free(utf8);
183			}
184			if (!newUTF8) {
185				return 0;
186			}
187			offset = offset - utf8 + newUTF8;
188			memcpy(offset, buffer, bytes);
189			offset += bytes;
190		}
191	}
192
193	char* newUTF8 = realloc(utf8, utf8Length + 1);
194	if (newUTF8 != utf8) {
195		free(utf8);
196	}
197	newUTF8[utf8Length] = '\0';
198	return newUTF8;
199}
200
201int hexDigit(char digit) {
202	switch (digit) {
203	case '0':
204	case '1':
205	case '2':
206	case '3':
207	case '4':
208	case '5':
209	case '6':
210	case '7':
211	case '8':
212	case '9':
213		return digit - '0';
214
215	case 'a':
216	case 'b':
217	case 'c':
218	case 'd':
219	case 'e':
220	case 'f':
221		return digit - 'a' + 10;
222
223	case 'A':
224	case 'B':
225	case 'C':
226	case 'D':
227	case 'E':
228	case 'F':
229		return digit - 'A' + 10;
230
231	default:
232		return -1;
233	}
234}
235
236const char* hex32(const char* line, uint32_t* out) {
237	uint32_t value = 0;
238	int i;
239	for (i = 0; i < 8; ++i, ++line) {
240		char digit = *line;
241		value <<= 4;
242		int nybble = hexDigit(digit);
243		if (nybble < 0) {
244			return 0;
245		}
246		value |= nybble;
247	}
248	*out = value;
249	return line;
250}
251
252const char* hex16(const char* line, uint16_t* out) {
253	uint16_t value = 0;
254	*out = 0;
255	int i;
256	for (i = 0; i < 4; ++i, ++line) {
257		char digit = *line;
258		value <<= 4;
259		int nybble = hexDigit(digit);
260		if (nybble < 0) {
261			return 0;
262		}
263		value |= nybble;
264	}
265	*out = value;
266	return line;
267}