int is_surrogate(char16_t uc) { return (uc - 0xd800u) < 2048u; }
int is_high_surrogate(char16_t uc) { return (uc & 0xfffffc00) == 0xd800; }
int is_low_surrogate(char16_t uc) { return (uc & 0xfffffc00) == 0xdc00; }
wchar_t surrogate_to_utf32(char16_t high, char16_t low) {
return (high << 10) + low - 0x35fdc00;
}
void convert_utf16_to_utf32(const char16_t *input,
size_t input_size,
wchar_t *output)
{
const char16_t * const end = input + input_size;
while (input < end) {
const char16_t uc = *input++;
if (!is_surrogate(uc)) {
*output++ = uc;
}
else {
if (is_high_surrogate(uc) && input < end && is_low_surrogate(*input))
*output++ = surrogate_to_utf32(uc, *input++);
else {
// ERROR
}
}
}
}
//utf32 to utf16
size_t utf32_to_utf16(wchar_t src, char16_t* des)
{
if (src == 0) return 0;
if (src <= 0xFFFF)
{
if (des) (*des) = static_cast(src);
return 1;
}
else
if (src <= 0xEFFFF)
{
if (des)
{
des[0] = static_cast(0xD800 + (src >> 10) - 0x40); // high
des[1] = static_cast(0xDC00 + (src & 0x03FF)); // low
}
return 2;
}
return 0;
}
const String& BinaryReader::ReadUnicode()
{
static String str;
int count = ReadInt32();
if (count > 0)
{
static char buffer[512];
stream.read(buffer, count);
#ifndef __BUILD_IN_MSYS2
str.assign(reinterpret_cast(buffer), count / sizeof(wchar_t));
#else
int iDstLength = count / 2;
wchar_t *wchDst = new wchar_t[iDstLength + 1];
memset(wchDst, 0, sizeof(wchar_t) * (iDstLength + 1));
convert_utf16_to_utf32((const char16_t *)buffer, iDstLength, wchDst);
str = wchDst;
delete[] wchDst;
if (str.length() > iDstLength) {
str = str.substr(0, iDstLength);
}
//to utf16
char16_t *u16Str = new char16_t[iDstLength + 1];
memset(u16Str, 0, sizeof(char16_t) * (iDstLength + 1));
for (int i = 0; i < str.length(); i++) {
utf32_to_utf16(str[i], u16Str + i);
}
//to utf32
wchar_t *wchDst2 = new wchar_t[iDstLength + 1];
memset(wchDst2, 0, sizeof(wchar_t) * (iDstLength + 1));
convert_utf16_to_utf32((const char16_t *)u16Str, iDstLength, wchDst2);
delete [] wchDst2;
delete [] u16Str;
#endif
return str;
}
str = L"";
return str;
}