/* Copyright (c) 2013-2019 Jeffrey Pfau * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include #include #include DEFINE_VECTOR(StringList, char*); #ifndef HAVE_STRNDUP char* strndup(const char* start, size_t len) { // This is suboptimal, but anything recent should have strndup char* out = malloc((len + 1) * sizeof(char)); strncpy(out, start, len); out[len] = '\0'; return out; } #endif #ifndef HAVE_STRDUP char* strdup(const char* str) { size_t len = strlen(str); char* out = malloc(len + 1); strncpy(out, str, len); out[len] = '\0'; return out; } #endif #ifndef HAVE_STRLCPY size_t strlcpy(char* restrict dst, const char* restrict src, size_t dstsize) { size_t i = 0; for (; src[i] && dstsize > 1; ++i) { dst[i] = src[i]; --dstsize; } if (dstsize) { dst[i] = '\0'; } while (src[i]) { ++i; } return i; } #endif char* strnrstr(const char* restrict haystack, const char* restrict needle, size_t len) { char* last = 0; const char* next = haystack; size_t needleLen = strlen(needle); for (; len >= needleLen; --len, ++next) { if (strncmp(needle, next, needleLen) == 0) { last = (char*) next; } } return last; } bool endswith(const char* restrict s1, const char* restrict end) { size_t len = strlen(s1); size_t endLen = strlen(end); if (len < endLen) { return false; } return strcmp(&s1[len - endLen], end) == 0; } bool startswith(const char* restrict s1, const char* restrict start) { size_t len = strlen(s1); size_t startLen = strlen(start); if (len < startLen) { return false; } return strncmp(s1, start, startLen) == 0; } uint32_t utf16Char(const uint16_t** unicode, size_t* length) { if (*length < 2) { *length = 0; return 0; } uint32_t unichar = **unicode; ++*unicode; *length -= 2; if (unichar < 0xD800 || unichar >= 0xE000) { return unichar; } if (*length < 2) { *length = 0; return 0; } uint16_t highSurrogate = unichar; uint16_t lowSurrogate = **unicode; ++*unicode; *length -= 2; if (highSurrogate >= 0xDC00) { return 0; } if (lowSurrogate < 0xDC00 || lowSurrogate >= 0xE000) { return 0; } highSurrogate -= 0xD800; lowSurrogate -= 0xDC00; return (highSurrogate << 10) + lowSurrogate + 0x10000; } uint32_t utf8Char(const char** unicode, size_t* length) { if (*length == 0) { return 0; } char byte = **unicode; --*length; ++*unicode; if (!(byte & 0x80)) { return byte; } uint32_t unichar; static const int tops[4] = { 0xC0, 0xE0, 0xF0, 0xF8 }; size_t numBytes; for (numBytes = 0; numBytes < 3; ++numBytes) { if ((byte & tops[numBytes + 1]) == tops[numBytes]) { break; } } unichar = byte & ~tops[numBytes]; if (numBytes == 3) { return 0; } ++numBytes; if (*length < numBytes) { *length = 0; return 0; } size_t i; for (i = 0; i < numBytes; ++i) { unichar <<= 6; byte = **unicode; --*length; ++*unicode; if ((byte & 0xC0) != 0x80) { return 0; } unichar |= byte & 0x3F; } return unichar; } size_t toUtf8(uint32_t unichar, char* buffer) { if (unichar > 0x10FFFF) { unichar = 0xFFFD; } if (unichar < 0x80) { buffer[0] = unichar; return 1; } if (unichar < 0x800) { buffer[0] = (unichar >> 6) | 0xC0; buffer[1] = (unichar & 0x3F) | 0x80; return 2; } if (unichar < 0x10000) { buffer[0] = (unichar >> 12) | 0xE0; buffer[1] = ((unichar >> 6) & 0x3F) | 0x80; buffer[2] = (unichar & 0x3F) | 0x80; return 3; } if (unichar < 0x200000) { buffer[0] = (unichar >> 18) | 0xF0; buffer[1] = ((unichar >> 12) & 0x3F) | 0x80; buffer[2] = ((unichar >> 6) & 0x3F) | 0x80; buffer[3] = (unichar & 0x3F) | 0x80; return 4; } // This shouldn't be possible return 0; } int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t utf8Length) { uint32_t char1 = 0, char2 = 0; while (utf16Length > 0 && utf8Length > 0) { if (char1 < char2) { return -1; } if (char1 > char2) { return 1; } char1 = utf16Char(&utf16, &utf16Length); char2 = utf8Char(&utf8, &utf8Length); } if (utf16Length == 0 && utf8Length > 0) { return -1; } if (utf16Length > 0 && utf8Length == 0) { return 1; } return 0; } char* utf16to8(const uint16_t* utf16, size_t length) { char* utf8 = 0; char* offset = 0; char buffer[4]; size_t utf8TotalBytes = 0; size_t utf8Length = 0; while (true) { if (length == 0) { break; } uint32_t unichar = utf16Char(&utf16, &length); size_t bytes = toUtf8(unichar, buffer); utf8Length += bytes; if (utf8Length < utf8TotalBytes) { memcpy(offset, buffer, bytes); offset += bytes; } else if (!utf8) { utf8 = malloc(length); if (!utf8) { return 0; } utf8TotalBytes = length; memcpy(utf8, buffer, bytes); offset = utf8 + bytes; } else if (utf8Length >= utf8TotalBytes) { ptrdiff_t o = offset - utf8; char* newUTF8 = realloc(utf8, utf8TotalBytes * 2); offset = o + newUTF8; if (!newUTF8) { free(utf8); return 0; } utf8 = newUTF8; memcpy(offset, buffer, bytes); offset += bytes; } } char* newUTF8 = realloc(utf8, utf8Length + 1); if (!newUTF8) { free(utf8); return 0; } newUTF8[utf8Length] = '\0'; return newUTF8; } extern const uint16_t gbkUnicodeTable[]; char* gbkToUtf8(const char* gbk, size_t length) { char* utf8 = NULL; char* utf8Offset = NULL; size_t offset; uint8_t gbk1 = 0; char buffer[4]; size_t utf8TotalBytes = 0; size_t utf8Length = 0; for (offset = 0; offset < length; ++offset) { if (length == 0) { break; } unsigned unichar = 0xFFFD; if (!gbk1 && !(gbk[offset] & 0x80)) { unichar = gbk[offset]; } else if (gbk1) { uint8_t gbk2 = gbk[offset]; if (gbk2 >= 0x40 && gbk2 != 0xFF) { // TODO: GB-18030 support? unichar = gbkUnicodeTable[gbk1 * 0xBF + gbk2 - 0x40]; } gbk1 = 0; } else if (((uint8_t*) gbk)[offset] == 0xFF) { unichar = 0xFFFD; } else if (((uint8_t*) gbk)[offset] == 0x80) { unichar = 0x20AC; // Euro } else { gbk1 = ((uint8_t*) gbk)[offset] - 0x81; continue; } size_t bytes = toUtf8(unichar, buffer); utf8Length += bytes; if (!utf8) { utf8 = malloc(length); if (!utf8) { return NULL; } utf8TotalBytes = length; memcpy(utf8, buffer, bytes); utf8Offset = utf8 + bytes; } else if (utf8Length < utf8TotalBytes) { memcpy(utf8Offset, buffer, bytes); utf8Offset += bytes; } else if (utf8Length >= utf8TotalBytes) { ptrdiff_t o = utf8Offset - utf8; char* newUTF8 = realloc(utf8, utf8TotalBytes * 2); utf8Offset = o + newUTF8; if (!newUTF8) { free(utf8); return 0; } utf8 = newUTF8; memcpy(utf8Offset, buffer, bytes); utf8Offset += bytes; } } char* newUTF8 = realloc(utf8, utf8Length + 1); if (!newUTF8) { free(utf8); return 0; } newUTF8[utf8Length] = '\0'; return newUTF8; } int hexDigit(char digit) { switch (digit) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return digit - '0'; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': return digit - 'a' + 10; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': return digit - 'A' + 10; default: return -1; } } const char* hex32(const char* line, uint32_t* out) { uint32_t value = 0; int i; for (i = 0; i < 8; ++i, ++line) { char digit = *line; value <<= 4; int nybble = hexDigit(digit); if (nybble < 0) { return 0; } value |= nybble; } *out = value; return line; } const char* hex24(const char* line, uint32_t* out) { uint32_t value = 0; int i; for (i = 0; i < 6; ++i, ++line) { char digit = *line; value <<= 4; int nybble = hexDigit(digit); if (nybble < 0) { return 0; } value |= nybble; } *out = value; return line; } const char* hex16(const char* line, uint16_t* out) { uint16_t value = 0; *out = 0; int i; for (i = 0; i < 4; ++i, ++line) { char digit = *line; value <<= 4; int nybble = hexDigit(digit); if (nybble < 0) { return 0; } value |= nybble; } *out = value; return line; } const char* hex12(const char* line, uint16_t* out) { uint16_t value = 0; *out = 0; int i; for (i = 0; i < 3; ++i, ++line) { char digit = *line; value <<= 4; int nybble = hexDigit(digit); if (nybble < 0) { return 0; } value |= nybble; } *out = value; return line; } const char* hex8(const char* line, uint8_t* out) { uint8_t value = 0; *out = 0; int i; for (i = 0; i < 2; ++i, ++line) { char digit = *line; value <<= 4; int nybble = hexDigit(digit); if (nybble < 0) { return 0; } value |= nybble; } *out = value; return line; } const char* hex4(const char* line, uint8_t* out) { uint8_t value = 0; *out = 0; char digit = *line; int nybble = hexDigit(digit); if (nybble < 0) { return 0; } value |= nybble; *out = value; return line; } void rtrim(char* string) { if (!*string) { return; } char* end = string + strlen(string) - 1; while (isspace((int) *end) && end >= string) { *end = '\0'; --end; } } ssize_t parseQuotedString(const char* unparsed, ssize_t unparsedLen, char* parsed, ssize_t parsedLen) { memset(parsed, 0, parsedLen); bool escaped = false; char start = '\0'; ssize_t len = 0; ssize_t i; for (i = 0; i < unparsedLen && len < parsedLen; ++i) { if (i == 0) { switch (unparsed[0]) { case '"': case '\'': start = unparsed[0]; break; default: return -1; } continue; } if (escaped) { switch (unparsed[i]) { case 'n': parsed[len] = '\n'; break; case 'r': parsed[len] = '\r'; break; case '\\': parsed[len] = '\\'; break; case '\'': parsed[len] = '\''; break; case '"': parsed[len] = '"'; break; default: return -1; } escaped = false; ++len; continue; } if (unparsed[i] == start) { return len; } switch (unparsed[i]) { case '\\': escaped = true; break; case '\n': case '\r': return len; default: parsed[len] = unparsed[i]; ++len; break; } } return -1; } bool wildcard(const char* search, const char* string) { while (true) { if (search[0] == '*') { while (search[0] == '*') { ++search; } if (!search[0]) { return true; } while (string[0]) { if (string[0] == search[0] && wildcard(search, string)) { return true; } ++string; } return false; } else if (!search[0]) { return !string[0]; } else if (!string[0]) { return false; } else if (string[0] != search[0]) { return false; } else { ++search; ++string; } } return false; }