|
@@ -20,123 +20,123 @@ SUcs::~SUcs()
|
|
|
//UTF32转UTF8,纯算法实现,只转换一个UNICODE字符
|
|
//UTF32转UTF8,纯算法实现,只转换一个UNICODE字符
|
|
|
int SUcs::UnicodeToUTF8(unsigned int iUnicode, char* sOut)
|
|
int SUcs::UnicodeToUTF8(unsigned int iUnicode, char* sOut)
|
|
|
{
|
|
{
|
|
|
- // 0000 0000-0000 007F | 0xxxxxxx
|
|
|
|
|
- // 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
|
|
|
|
|
- // 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
|
|
|
|
|
- // 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
|
|
|
- unsigned char* sTemp = (unsigned char*)sOut;
|
|
|
|
|
- if (iUnicode > 0xFFFF)
|
|
|
|
|
- {
|
|
|
|
|
- if (sTemp)
|
|
|
|
|
- {
|
|
|
|
|
- sTemp[0] = (((iUnicode >> 18) & 0x07) | 0xF0);
|
|
|
|
|
- sTemp[1] = (((iUnicode >> 12) & 0x3f) | 0x80);
|
|
|
|
|
- sTemp[2] = (((iUnicode >> 6) & 0x3f) | 0x80);
|
|
|
|
|
- sTemp[3] = ((iUnicode & 0x3f) | 0x80);
|
|
|
|
|
- }
|
|
|
|
|
- return 4;
|
|
|
|
|
- }
|
|
|
|
|
- if (iUnicode > 0x7FF)
|
|
|
|
|
- {
|
|
|
|
|
- if (sTemp)
|
|
|
|
|
- {
|
|
|
|
|
- sTemp[0] = (((iUnicode >> 12) & 0x0F) | 0xE0);
|
|
|
|
|
- sTemp[1] = (((iUnicode >> 6) & 0x3F) | 0x80);
|
|
|
|
|
- sTemp[2] = ((iUnicode & 0x3f) | 0x80);
|
|
|
|
|
- }
|
|
|
|
|
- return 3;
|
|
|
|
|
- }
|
|
|
|
|
- if (iUnicode > 0x7F)
|
|
|
|
|
- {
|
|
|
|
|
- if (sTemp)
|
|
|
|
|
- {
|
|
|
|
|
- sTemp[0] = (((iUnicode >> 6) & 0x1F) | 0xC0);
|
|
|
|
|
- sTemp[1] = ((iUnicode & 0x3f) | 0x80);
|
|
|
|
|
- }
|
|
|
|
|
- return 2;
|
|
|
|
|
- }
|
|
|
|
|
- if (sTemp)
|
|
|
|
|
- {
|
|
|
|
|
- //低位
|
|
|
|
|
- *sTemp++ = (iUnicode & 0xff);
|
|
|
|
|
- }
|
|
|
|
|
- return 1;
|
|
|
|
|
|
|
+ // 0000 0000-0000 007F | 0xxxxxxx
|
|
|
|
|
+ // 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
|
|
|
|
|
+ // 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
|
|
|
|
|
+ // 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
|
|
|
+ unsigned char* sTemp = (unsigned char*)sOut;
|
|
|
|
|
+ if (iUnicode > 0xFFFF)
|
|
|
|
|
+ {
|
|
|
|
|
+ if (sTemp)
|
|
|
|
|
+ {
|
|
|
|
|
+ sTemp[0] = (((iUnicode >> 18) & 0x07) | 0xF0);
|
|
|
|
|
+ sTemp[1] = (((iUnicode >> 12) & 0x3f) | 0x80);
|
|
|
|
|
+ sTemp[2] = (((iUnicode >> 6) & 0x3f) | 0x80);
|
|
|
|
|
+ sTemp[3] = ((iUnicode & 0x3f) | 0x80);
|
|
|
|
|
+ }
|
|
|
|
|
+ return 4;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (iUnicode > 0x7FF)
|
|
|
|
|
+ {
|
|
|
|
|
+ if (sTemp)
|
|
|
|
|
+ {
|
|
|
|
|
+ sTemp[0] = (((iUnicode >> 12) & 0x0F) | 0xE0);
|
|
|
|
|
+ sTemp[1] = (((iUnicode >> 6) & 0x3F) | 0x80);
|
|
|
|
|
+ sTemp[2] = ((iUnicode & 0x3f) | 0x80);
|
|
|
|
|
+ }
|
|
|
|
|
+ return 3;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (iUnicode > 0x7F)
|
|
|
|
|
+ {
|
|
|
|
|
+ if (sTemp)
|
|
|
|
|
+ {
|
|
|
|
|
+ sTemp[0] = (((iUnicode >> 6) & 0x1F) | 0xC0);
|
|
|
|
|
+ sTemp[1] = ((iUnicode & 0x3f) | 0x80);
|
|
|
|
|
+ }
|
|
|
|
|
+ return 2;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (sTemp)
|
|
|
|
|
+ {
|
|
|
|
|
+ //低位
|
|
|
|
|
+ *sTemp++ = (iUnicode & 0xff);
|
|
|
|
|
+ }
|
|
|
|
|
+ return 1;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//UTF32转UTF16,纯算法实现,只转换一个UNICODE字符,返回UTF16字符数
|
|
//UTF32转UTF16,纯算法实现,只转换一个UNICODE字符,返回UTF16字符数
|
|
|
int SUcs::UnicodeToUTF16(unsigned int iUnicode, unsigned short* sOut)
|
|
int SUcs::UnicodeToUTF16(unsigned int iUnicode, unsigned short* sOut)
|
|
|
{
|
|
{
|
|
|
- if (iUnicode > 0x10000)
|
|
|
|
|
- {
|
|
|
|
|
- // 0x10000其UTF-16编码就是0xD800 0xDC00(即0x10000经UTF-16编码后的码元序列为0xD800 0xDC00),
|
|
|
|
|
- // ====代理码元1==== ====代理码元2====
|
|
|
|
|
- // 1101 10pp ppxx xxxx 1101 11xx xxxx xxxx
|
|
|
|
|
- // 16个平面(即第2平面~第17平面)平面号需要-1
|
|
|
|
|
- if (sOut)
|
|
|
|
|
- {
|
|
|
|
|
- int iSpace = (iUnicode >> 16) - 1;
|
|
|
|
|
- sOut[0] = 0xD800 | (iSpace << 6) | ((iUnicode >> 10) & 0x3f);
|
|
|
|
|
- sOut[1] = 0xDC00 | (iUnicode & 0x3ff);
|
|
|
|
|
- }
|
|
|
|
|
- return 2;
|
|
|
|
|
- }
|
|
|
|
|
- if (sOut)
|
|
|
|
|
- {
|
|
|
|
|
- *sOut = iUnicode;
|
|
|
|
|
- }
|
|
|
|
|
- return 1;
|
|
|
|
|
|
|
+ if (iUnicode > 0x10000)
|
|
|
|
|
+ {
|
|
|
|
|
+ // 0x10000其UTF-16编码就是0xD800 0xDC00(即0x10000经UTF-16编码后的码元序列为0xD800 0xDC00),
|
|
|
|
|
+ // ====代理码元1==== ====代理码元2====
|
|
|
|
|
+ // 1101 10pp ppxx xxxx 1101 11xx xxxx xxxx
|
|
|
|
|
+ // 16个平面(即第2平面~第17平面)平面号需要-1
|
|
|
|
|
+ if (sOut)
|
|
|
|
|
+ {
|
|
|
|
|
+ int iSpace = (iUnicode >> 16) - 1;
|
|
|
|
|
+ sOut[0] = 0xD800 | (iSpace << 6) | ((iUnicode >> 10) & 0x3f);
|
|
|
|
|
+ sOut[1] = 0xDC00 | (iUnicode & 0x3ff);
|
|
|
|
|
+ }
|
|
|
|
|
+ return 2;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (sOut)
|
|
|
|
|
+ {
|
|
|
|
|
+ *sOut = iUnicode;
|
|
|
|
|
+ }
|
|
|
|
|
+ return 1;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
//UTF8转UTF16,纯算法实现,只转换一个UNICODE字符,返回UTF8字符数
|
|
//UTF8转UTF16,纯算法实现,只转换一个UNICODE字符,返回UTF8字符数
|
|
|
int SUcs::UTF8ToUnicode(const char* sUTF8, unsigned int *iUnicode)
|
|
int SUcs::UTF8ToUnicode(const char* sUTF8, unsigned int *iUnicode)
|
|
|
{
|
|
{
|
|
|
- // 0000 0000-0000 007F | 0xxxxxxx
|
|
|
|
|
- // 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
|
|
|
|
|
- // 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
|
|
|
|
|
- // 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
|
|
|
- unsigned char* sTemp = (unsigned char*)sUTF8;
|
|
|
|
|
- if ((*sTemp & 0xf0) == 0xf0) //1111 四字节
|
|
|
|
|
- {
|
|
|
|
|
- *iUnicode = ((sTemp[0] & 0x07) << 18) |
|
|
|
|
|
- ((sTemp[1] & 0x3f) << 12) |
|
|
|
|
|
- ((sTemp[2] & 0x3f) << 6 ) |
|
|
|
|
|
- (sTemp[3] & 0x3f);
|
|
|
|
|
- return 4;
|
|
|
|
|
- }
|
|
|
|
|
- if ((*sTemp & 0xf0) == 0xE0) //1110 三字节
|
|
|
|
|
- {
|
|
|
|
|
- *iUnicode = ((sTemp[0] & 0x0f) << 12) |
|
|
|
|
|
- ((sTemp[1] & 0x3f) << 6 ) |
|
|
|
|
|
- (sTemp[2] & 0x3f);
|
|
|
|
|
- return 3;
|
|
|
|
|
- }
|
|
|
|
|
- if ((*sTemp & 0xf0) == 0xC0) //1100 二字节
|
|
|
|
|
- {
|
|
|
|
|
- *iUnicode = ((sTemp[0] & 0x1f) << 6 ) | (sTemp[1] & 0x3f);
|
|
|
|
|
- return 2;
|
|
|
|
|
- }
|
|
|
|
|
- *iUnicode = sTemp[0];
|
|
|
|
|
- return 1;
|
|
|
|
|
|
|
+ // 0000 0000-0000 007F | 0xxxxxxx
|
|
|
|
|
+ // 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
|
|
|
|
|
+ // 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
|
|
|
|
|
+ // 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
|
|
|
+ unsigned char* sTemp = (unsigned char*)sUTF8;
|
|
|
|
|
+ if ((*sTemp & 0xf0) == 0xf0) //1111 四字节
|
|
|
|
|
+ {
|
|
|
|
|
+ *iUnicode = ((sTemp[0] & 0x07) << 18) |
|
|
|
|
|
+ ((sTemp[1] & 0x3f) << 12) |
|
|
|
|
|
+ ((sTemp[2] & 0x3f) << 6 ) |
|
|
|
|
|
+ (sTemp[3] & 0x3f);
|
|
|
|
|
+ return 4;
|
|
|
|
|
+ }
|
|
|
|
|
+ if ((*sTemp & 0xf0) == 0xE0) //1110 三字节
|
|
|
|
|
+ {
|
|
|
|
|
+ *iUnicode = ((sTemp[0] & 0x0f) << 12) |
|
|
|
|
|
+ ((sTemp[1] & 0x3f) << 6 ) |
|
|
|
|
|
+ (sTemp[2] & 0x3f);
|
|
|
|
|
+ return 3;
|
|
|
|
|
+ }
|
|
|
|
|
+ if ((*sTemp & 0xf0) == 0xC0) //1100 二字节
|
|
|
|
|
+ {
|
|
|
|
|
+ *iUnicode = ((sTemp[0] & 0x1f) << 6 ) | (sTemp[1] & 0x3f);
|
|
|
|
|
+ return 2;
|
|
|
|
|
+ }
|
|
|
|
|
+ *iUnicode = sTemp[0];
|
|
|
|
|
+ return 1;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//UTF16转UTF32,纯算法实现,只转换一个UNICODE字符,返回UTF16字符数
|
|
//UTF16转UTF32,纯算法实现,只转换一个UNICODE字符,返回UTF16字符数
|
|
|
int SUcs::UTF16ToUnicode(const unsigned short* sUTF16, unsigned int *iUnicode)
|
|
int SUcs::UTF16ToUnicode(const unsigned short* sUTF16, unsigned int *iUnicode)
|
|
|
{
|
|
{
|
|
|
- // 0x10000其UTF-16编码就是0xD800 0xDC00(即0x10000经UTF-16编码后的码元序列为0xD800 0xDC00),
|
|
|
|
|
- // ====代理码元1==== ====代理码元2====
|
|
|
|
|
- // 1101 10pp ppxx xxxx 1101 11xx xxxx xxxx
|
|
|
|
|
- // 16个平面(即第2平面~第17平面)平面号需要-1
|
|
|
|
|
|
|
+ // 0x10000其UTF-16编码就是0xD800 0xDC00(即0x10000经UTF-16编码后的码元序列为0xD800 0xDC00),
|
|
|
|
|
+ // ====代理码元1==== ====代理码元2====
|
|
|
|
|
+ // 1101 10pp ppxx xxxx 1101 11xx xxxx xxxx
|
|
|
|
|
+ // 16个平面(即第2平面~第17平面)平面号需要-1
|
|
|
if ((*sUTF16 & 0xFF00) == 0xD800) //1111 四字节
|
|
if ((*sUTF16 & 0xFF00) == 0xD800) //1111 四字节
|
|
|
- {
|
|
|
|
|
- int iSpace = ((sUTF16[0] >> 6) & 0x0f) + 1;
|
|
|
|
|
- *iUnicode = (iSpace << 16)
|
|
|
|
|
- | ((sUTF16[0] & 0x3f) << 10)
|
|
|
|
|
- | (sUTF16[1] & 0x3ff) ;
|
|
|
|
|
- return 2;
|
|
|
|
|
- }
|
|
|
|
|
- *iUnicode = sUTF16[0];
|
|
|
|
|
- return 1;
|
|
|
|
|
|
|
+ {
|
|
|
|
|
+ int iSpace = ((sUTF16[0] >> 6) & 0x0f) + 1;
|
|
|
|
|
+ *iUnicode = (iSpace << 16)
|
|
|
|
|
+ | ((sUTF16[0] & 0x3f) << 10)
|
|
|
|
|
+ | (sUTF16[1] & 0x3ff) ;
|
|
|
|
|
+ return 2;
|
|
|
|
|
+ }
|
|
|
|
|
+ *iUnicode = sUTF16[0];
|
|
|
|
|
+ return 1;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//UTF16转UTF8,纯算法实现,转换整个字符串,返回UTF8字符数
|
|
//UTF16转UTF8,纯算法实现,转换整个字符串,返回UTF8字符数
|
|
@@ -172,61 +172,61 @@ int SUcs::UTF8ToUTF16(const char* sUTF8, unsigned short* sUTF16)
|
|
|
//GB18030转UTF8,转换整个字符串,返回UTF8字符数
|
|
//GB18030转UTF8,转换整个字符串,返回UTF8字符数
|
|
|
int SUcs::GB18030ToUTF8(const char* sGB18030, char* sUnicode)
|
|
int SUcs::GB18030ToUTF8(const char* sGB18030, char* sUnicode)
|
|
|
{
|
|
{
|
|
|
- int iLen = strlen(sGB18030);
|
|
|
|
|
- wchar_t* wch=new wchar_t[iLen + 1];
|
|
|
|
|
|
|
+ int iLen = strlen(sGB18030);
|
|
|
|
|
+ wchar_t* wch=new wchar_t[iLen + 1];
|
|
|
int iRet = MultiByteToWideChar(CP_ACP, 0, sGB18030, iLen, wch, iLen);
|
|
int iRet = MultiByteToWideChar(CP_ACP, 0, sGB18030, iLen, wch, iLen);
|
|
|
- if (iRet < 0)
|
|
|
|
|
- {
|
|
|
|
|
- delete wch;
|
|
|
|
|
- return -1;
|
|
|
|
|
- }
|
|
|
|
|
- wch[iRet] = 0;
|
|
|
|
|
- iLen = UTF16ToUTF8((const unsigned short*)wch, sUnicode);
|
|
|
|
|
- delete wch;
|
|
|
|
|
- return iRet;
|
|
|
|
|
|
|
+ if (iRet < 0)
|
|
|
|
|
+ {
|
|
|
|
|
+ delete wch;
|
|
|
|
|
+ return -1;
|
|
|
|
|
+ }
|
|
|
|
|
+ wch[iRet] = 0;
|
|
|
|
|
+ iLen = UTF16ToUTF8((const unsigned short*)wch, sUnicode);
|
|
|
|
|
+ delete wch;
|
|
|
|
|
+ return iRet;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//GB18030转UTF16,转换整个字符串,返回UTF16字符数
|
|
//GB18030转UTF16,转换整个字符串,返回UTF16字符数
|
|
|
int SUcs::GB18030ToUTF16(const char* sGB18030, unsigned short* sUnicode)
|
|
int SUcs::GB18030ToUTF16(const char* sGB18030, unsigned short* sUnicode)
|
|
|
{
|
|
{
|
|
|
- int iLen = strlen(sGB18030);
|
|
|
|
|
- wchar_t* wch=new wchar_t[iLen + 1];
|
|
|
|
|
|
|
+ int iLen = strlen(sGB18030);
|
|
|
|
|
+ wchar_t* wch=new wchar_t[iLen + 1];
|
|
|
int iRet = MultiByteToWideChar(CP_ACP, 0, sGB18030, iLen, (LPWSTR)sUnicode, iLen);
|
|
int iRet = MultiByteToWideChar(CP_ACP, 0, sGB18030, iLen, (LPWSTR)sUnicode, iLen);
|
|
|
- if (iRet < 0) return -1;
|
|
|
|
|
- sUnicode[iRet] = 0;
|
|
|
|
|
- return iRet;
|
|
|
|
|
|
|
+ if (iRet < 0) return -1;
|
|
|
|
|
+ sUnicode[iRet] = 0;
|
|
|
|
|
+ return iRet;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//UTF8转GB18030,转换整个字符串,返回UTF8字符数
|
|
//UTF8转GB18030,转换整个字符串,返回UTF8字符数
|
|
|
int SUcs::UTF8ToGB18030(const char* sUnicode, char* sGB18030)
|
|
int SUcs::UTF8ToGB18030(const char* sUnicode, char* sGB18030)
|
|
|
{
|
|
{
|
|
|
- int iLen = strlen(sUnicode);
|
|
|
|
|
- wchar_t* wch = new wchar_t[iLen + 1];
|
|
|
|
|
- int iwl = UTF8ToUTF16(sUnicode, (unsigned short*)wch);
|
|
|
|
|
- BOOL bFail = FALSE;
|
|
|
|
|
|
|
+ int iLen = strlen(sUnicode);
|
|
|
|
|
+ wchar_t* wch = new wchar_t[iLen + 1];
|
|
|
|
|
+ int iwl = UTF8ToUTF16(sUnicode, (unsigned short*)wch);
|
|
|
|
|
+ BOOL bFail = FALSE;
|
|
|
int iRet = WideCharToMultiByte(CP_ACP, 0, wch, iwl, sGB18030, iLen * 2, "?", &bFail);
|
|
int iRet = WideCharToMultiByte(CP_ACP, 0, wch, iwl, sGB18030, iLen * 2, "?", &bFail);
|
|
|
- if (iRet < 0) return -1;
|
|
|
|
|
- sGB18030[iRet] = 0;
|
|
|
|
|
- return iRet;
|
|
|
|
|
|
|
+ if (iRet < 0) return -1;
|
|
|
|
|
+ sGB18030[iRet] = 0;
|
|
|
|
|
+ return iRet;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//UTF16转GB18030,调用单字符函数,转换整个字符串,返回UTF8字符数
|
|
//UTF16转GB18030,调用单字符函数,转换整个字符串,返回UTF8字符数
|
|
|
int SUcs::UTF16ToGB18030(const unsigned short* sUnicode, char* sGB18030)
|
|
int SUcs::UTF16ToGB18030(const unsigned short* sUnicode, char* sGB18030)
|
|
|
{
|
|
{
|
|
|
- int iwl = wcslen((LPWSTR)sUnicode);
|
|
|
|
|
- BOOL bFail = FALSE;
|
|
|
|
|
|
|
+ int iwl = wcslen((LPWSTR)sUnicode);
|
|
|
|
|
+ BOOL bFail = FALSE;
|
|
|
int iRet = WideCharToMultiByte(CP_ACP, 0, (LPWSTR)sUnicode, iwl, sGB18030, iwl * 3, "?", &bFail);
|
|
int iRet = WideCharToMultiByte(CP_ACP, 0, (LPWSTR)sUnicode, iwl, sGB18030, iwl * 3, "?", &bFail);
|
|
|
- if (iRet < 0) return -1;
|
|
|
|
|
- sGB18030[iRet] = 0;
|
|
|
|
|
- return iRet;
|
|
|
|
|
|
|
+ if (iRet < 0) return -1;
|
|
|
|
|
+ sGB18030[iRet] = 0;
|
|
|
|
|
+ return iRet;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
#else
|
|
|
|
|
|
|
|
//GB18030转UTF8,转换整个字符串,返回UTF8字符数
|
|
//GB18030转UTF8,转换整个字符串,返回UTF8字符数
|
|
|
-int SUcs::GB18030ToUTF8(const char* sGB18030, char* sUnicode)
|
|
|
|
|
|
|
+int SUcs::GB18030ToUTF8(const char* sGB18030, int iGBLen, char* sUnicode)
|
|
|
{
|
|
{
|
|
|
- size_t iInLen = strlen(sGB18030);
|
|
|
|
|
|
|
+ size_t iInLen = iGBLen ? iGBLen : strlen(sGB18030);
|
|
|
iconv_t cd;
|
|
iconv_t cd;
|
|
|
cd = iconv_open("UTF-8", "GB18030");
|
|
cd = iconv_open("UTF-8", "GB18030");
|
|
|
if((iconv_t)-1 == cd) return -1;
|
|
if((iconv_t)-1 == cd) return -1;
|
|
@@ -235,72 +235,44 @@ int SUcs::GB18030ToUTF8(const char* sGB18030, char* sUnicode)
|
|
|
char* pOut = sUnicode;
|
|
char* pOut = sUnicode;
|
|
|
int iRet = iconv(cd, &pIn, &iInLen, &pOut, &iOutLen);
|
|
int iRet = iconv(cd, &pIn, &iInLen, &pOut, &iOutLen);
|
|
|
iconv_close(cd);
|
|
iconv_close(cd);
|
|
|
- if((size_t)-1 == iRet) return -1;
|
|
|
|
|
|
|
+ sUnicode[iOutLen] = 0;
|
|
|
|
|
+ if(-1 == iRet) return -1;
|
|
|
return strlen(sUnicode);
|
|
return strlen(sUnicode);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//GB18030转UTF16,转换整个字符串,返回UTF16字符数
|
|
//GB18030转UTF16,转换整个字符串,返回UTF16字符数
|
|
|
-int SUcs::GB18030ToUTF16(const char* sGB18030, unsigned short* sUnicode)
|
|
|
|
|
|
|
+int SUcs::GB18030ToUTF16(const char* sGB18030, int iGBLen, unsigned short* sUnicode)
|
|
|
{
|
|
{
|
|
|
- size_t iInLen = strlen(sGB18030);
|
|
|
|
|
|
|
+ size_t iInLen = iGBLen ? iGBLen : strlen(sGB18030);
|
|
|
iconv_t cd;
|
|
iconv_t cd;
|
|
|
cd = iconv_open("UTF-16", "GB18030");
|
|
cd = iconv_open("UTF-16", "GB18030");
|
|
|
if((iconv_t)-1 == cd) return -1;
|
|
if((iconv_t)-1 == cd) return -1;
|
|
|
size_t iOutLen = iInLen + 2;
|
|
size_t iOutLen = iInLen + 2;
|
|
|
char* pIn = (char*)sGB18030;
|
|
char* pIn = (char*)sGB18030;
|
|
|
- unsigned short* pu16 = new unsigned short[iOutLen];
|
|
|
|
|
- char* pOut = (char*)pu16;
|
|
|
|
|
|
|
+ char* pOut = (char*)sUnicode;
|
|
|
int iRet = iconv(cd, &pIn, &iInLen, &pOut, &iOutLen);
|
|
int iRet = iconv(cd, &pIn, &iInLen, &pOut, &iOutLen);
|
|
|
iconv_close(cd);
|
|
iconv_close(cd);
|
|
|
- if((size_t)-1 == iRet)
|
|
|
|
|
- {
|
|
|
|
|
- delete pu16;
|
|
|
|
|
- return -1;
|
|
|
|
|
- }
|
|
|
|
|
- for (iOutLen = 0; iOutLen < strlen(sGB18030); iOutLen++)
|
|
|
|
|
- {
|
|
|
|
|
- sUnicode[iOutLen] = pu16[iOutLen + 1];
|
|
|
|
|
- if (pu16[iOutLen + 1] == 0) break;
|
|
|
|
|
- }
|
|
|
|
|
- delete pu16;
|
|
|
|
|
|
|
+ if(iRet != 0) return -1;
|
|
|
return iOutLen;
|
|
return iOutLen;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//UTF8转GB18030,转换整个字符串,返回UTF8字符数
|
|
//UTF8转GB18030,转换整个字符串,返回UTF8字符数
|
|
|
-int SUcs::UTF8ToGB18030(const char* sUnicode, char* sGB18030)
|
|
|
|
|
|
|
+int SUcs::UTF8ToGB18030(const char* sUnicode, int iUnLen, char* sGB18030)
|
|
|
{
|
|
{
|
|
|
- size_t iInLen = strlen(sUnicode);
|
|
|
|
|
|
|
+ size_t iInLen = iUnLen ? iUnLen : strlen(sUnicode);
|
|
|
iconv_t cd;
|
|
iconv_t cd;
|
|
|
cd = iconv_open("GB18030", "UTF-8");
|
|
cd = iconv_open("GB18030", "UTF-8");
|
|
|
if((iconv_t)-1 == cd) return -1;
|
|
if((iconv_t)-1 == cd) return -1;
|
|
|
- size_t iOutLen = iInLen * 2;
|
|
|
|
|
|
|
+ size_t iOutMax = iInLen * 2;
|
|
|
|
|
+ size_t iOutLen = iOutMax;
|
|
|
char* pIn = (char*)sUnicode;
|
|
char* pIn = (char*)sUnicode;
|
|
|
char* pOut = sGB18030;
|
|
char* pOut = sGB18030;
|
|
|
int iRet = iconv(cd, &pIn, &iInLen, &pOut, &iOutLen);
|
|
int iRet = iconv(cd, &pIn, &iInLen, &pOut, &iOutLen);
|
|
|
|
|
+ sGB18030[iOutMax - iOutLen] = 0;
|
|
|
iconv_close(cd);
|
|
iconv_close(cd);
|
|
|
- if((size_t)-1 == iRet) return -1;
|
|
|
|
|
|
|
+ if(iRet != 0) return -1;
|
|
|
return strlen(sGB18030);
|
|
return strlen(sGB18030);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-//UTF16转GB18030,调用单字符函数,转换整个字符串,返回UTF8字符数
|
|
|
|
|
-int SUcs::UTF16ToGB18030(const unsigned short* sUnicode, char* sGB18030)
|
|
|
|
|
-{
|
|
|
|
|
- size_t iUL = 0;
|
|
|
|
|
- unsigned short* pUTemp = (unsigned short*)sUnicode;
|
|
|
|
|
- while (*pUTemp){ pUTemp++; iUL+=2;} //get length
|
|
|
|
|
- size_t iOutLen = iUL + 2;
|
|
|
|
|
- char* pIn = (char*)sUnicode;
|
|
|
|
|
- char* pOut = (char*)sGB18030;
|
|
|
|
|
-
|
|
|
|
|
- iconv_t cd;
|
|
|
|
|
- cd = iconv_open("GB18030", "UTF-16");
|
|
|
|
|
- if((iconv_t)-1 == cd) return -1;
|
|
|
|
|
- int iRet = iconv(cd, &pIn, &iUL, &pOut, &iOutLen);
|
|
|
|
|
- iconv_close(cd);
|
|
|
|
|
- if((size_t)-1 == iRet) return -1;
|
|
|
|
|
- return strlen(sGB18030);
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
#endif
|
|
#endif
|
|
|
|
|
|