gcc utf16 to utf32

int is_surrogate(char16_t uc) { return (uc - 0xd800u) < 2048u; }
int is_high_surrogate(char16_t uc) { return (uc & 0xfffffc00) == 0xd800; }
int is_low_surrogate(char16_t uc) { return (uc & 0xfffffc00) == 0xdc00; }
wchar_t surrogate_to_utf32(char16_t high, char16_t low) {
	return (high << 10) + low - 0x35fdc00;
}
void convert_utf16_to_utf32(const char16_t *input,
	size_t input_size,
	wchar_t *output)
{
	const char16_t * const end = input + input_size;
	while (input < end) {
		const char16_t uc = *input++;
		if (!is_surrogate(uc)) {
			*output++ = uc;
		}
		else {
			if (is_high_surrogate(uc) && input < end && is_low_surrogate(*input))
				*output++ = surrogate_to_utf32(uc, *input++);
			else {
				// ERROR
			}
		}
	}
}

//utf32 to utf16
size_t utf32_to_utf16(wchar_t src, char16_t* des)
{
	if (src == 0) return 0;

	if (src <= 0xFFFF)
	{
		if (des) (*des) = static_cast(src);
		return 1;
	}
	else
		if (src <= 0xEFFFF)
		{
			if (des)
			{
				des[0] = static_cast(0xD800 + (src >> 10) - 0x40);  // high
				des[1] = static_cast(0xDC00 + (src & 0x03FF));      // low
			}
			return 2;
		}
	return 0;
}
const String& BinaryReader::ReadUnicode()
{
	static String str;
	int count = ReadInt32();

	if (count > 0)
	{
		static char buffer[512];
		stream.read(buffer, count);
#ifndef __BUILD_IN_MSYS2
		str.assign(reinterpret_cast(buffer), count / sizeof(wchar_t));
#else
		int iDstLength = count / 2;
		wchar_t *wchDst = new wchar_t[iDstLength + 1];
		memset(wchDst, 0, sizeof(wchar_t) * (iDstLength + 1));
		convert_utf16_to_utf32((const char16_t *)buffer, iDstLength, wchDst);
		str = wchDst;
		delete[] wchDst;
		if (str.length() > iDstLength) {
			str = str.substr(0, iDstLength);
		}

		//to utf16
		char16_t *u16Str = new char16_t[iDstLength + 1];
		memset(u16Str, 0, sizeof(char16_t) * (iDstLength + 1));
		for (int i = 0; i < str.length(); i++) {
			utf32_to_utf16(str[i], u16Str + i);
		}

		//to utf32
		wchar_t *wchDst2 = new wchar_t[iDstLength + 1];
		memset(wchDst2, 0, sizeof(wchar_t) * (iDstLength + 1));
		convert_utf16_to_utf32((const char16_t *)u16Str, iDstLength, wchDst2);
		
		delete [] wchDst2;
		delete [] u16Str;
#endif

		return str;
	}

	str = L"";
	return str;
}

发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注

19 + 18 =