// UTFConvert.cpp #include "StdAfx.h" #include "UTFConvert.h" #include "Types.h" static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen) { size_t destPos = 0, srcPos = 0; for (;;) { Byte c; int numAdds; if (srcPos == srcLen) { *destLen = destPos; return True; } c = (Byte)src[srcPos++]; if (c < 0x80) { if (dest) dest[destPos] = (wchar_t)c; destPos++; continue; } if (c < 0xC0) break; for (numAdds = 1; numAdds < 5; numAdds++) if (c < kUtf8Limits[numAdds]) break; UInt32 value = (c - kUtf8Limits[numAdds - 1]); do { Byte c2; if (srcPos == srcLen) break; c2 = (Byte)src[srcPos++]; if (c2 < 0x80 || c2 >= 0xC0) break; value <<= 6; value |= (c2 - 0x80); } while (--numAdds != 0); if (value < 0x10000) { if (dest) dest[destPos] = (wchar_t)value; destPos++; } else { value -= 0x10000; if (value >= 0x100000) break; if (dest) { dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10)); dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF)); } destPos += 2; } } *destLen = destPos; return False; } static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen) { size_t destPos = 0, srcPos = 0; for (;;) { unsigned numAdds; UInt32 value; if (srcPos == srcLen) { *destLen = destPos; return True; } value = src[srcPos++]; if (value < 0x80) { if (dest) dest[destPos] = (char)value; destPos++; continue; } if (value >= 0xD800 && value < 0xE000) { UInt32 c2; if (value >= 0xDC00 || srcPos == srcLen) break; c2 = src[srcPos++]; if (c2 < 0xDC00 || c2 >= 0xE000) break; value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000; } for (numAdds = 1; numAdds < 5; numAdds++) if (value < (((UInt32)1) << (numAdds * 5 + 6))) break; if (dest) dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds))); destPos++; do { numAdds--; if (dest) dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F)); destPos++; } while (numAdds != 0); } *destLen = destPos; return False; } bool ConvertUTF8ToUnicode(const AString &src, UString &dest) { dest.Empty(); size_t destLen = 0; Utf8_To_Utf16(NULL, &destLen, src, src.Length()); wchar_t *p = dest.GetBuffer((int)destLen); Bool res = Utf8_To_Utf16(p, &destLen, src, src.Length()); p[destLen] = 0; dest.ReleaseBuffer(); return res ? true : false; } bool ConvertUnicodeToUTF8(const UString &src, AString &dest) { dest.Empty(); size_t destLen = 0; Utf16_To_Utf8(NULL, &destLen, src, src.Length()); char *p = dest.GetBuffer((int)destLen); Bool res = Utf16_To_Utf8(p, &destLen, src, src.Length()); p[destLen] = 0; dest.ReleaseBuffer(); return res ? true : false; }