我们一般使用Windows函数MultiByteToWideChar将多字节字符串转换为宽字符字符串,函数原型如下:
int MultiByteToWideChar(__in UINT CodePage, //与多字节字符串关联的一个代码页值,一般设为CP_ACP
__in DWORD dwFlags, //指定额外的转换控制,一般为0
__in LPCSTR lpMultiByteStr, //要转换的多字节字符串
__in int cbMultiByte, //多字节字符串字节数,为-1则自动判断字符串长度
__out LPWSTR lpWideCharStr, //转换后Unicode字符串的缓冲区
__in int cchWideChar //接收缓冲区最大长度(字符数)
);
一般按下面步骤将一个多字节字符串转换为Unicode形式:
1)调用MultiByteToWideChar,为lpWideCharStr传入NULL,为cchWideChar传入0,为cbMultiByte传入-1;
2)分配一块足以容纳转换后的Unicode字符串的内存,它的大小是1)中函数返回值乘以sizeof(wchar_t);
3)再次调用MultiByteToWideChar,这次将缓冲区地址作为lpWideCharStr参数的值传入,将1)中函数返回值乘以sizeof(w_char_t)后得到的大小作为cchWideChar参数的值传入;
4)使用转换后的字符串;
5)释放Unicode字符串占用的内存块。
下面是我们封装的多字节字符串转Unicode字符串的函数:
/*******************************************************function: convert multibyte character set to wide-character set
*param: pwStr--[out] Points to a buffer that receives the translated buffers.
* pStr--[in] Points to the multibyte character set(or string) to be converted.
* len --[in] Specify the size in bytes of the string pointed to by the pStr
* parameter, or it can be -1 if the string is null terminated.
* IsEnd--[in]Specify whether you add '/0' to the end of converted array or not.
*return: the length of converted set (or string )
*******************************************************/
int ToWideString( WCHAR* &pwStr, const char* pStr, int len, BOOL IsEnd)
{
ASSERT_POINTER(pStr, char);
ASSERT(len >= 0 || len == -1);
int nWideLen = MultiByteToWideChar(CP_ACP, 0, pStr, len, NULL, 0);
if (len == -1)
{
--nWideLen;
}
if (nWideLen == 0)
{
return 0;
}
if (IsEnd)
{
pwStr = new WCHAR[(nWideLen+1)*sizeof(WCHAR)];
ZeroMemory(pwStr, (nWideLen+1)*sizeof(WCHAR));
}
else
{
pwStr = new WCHAR[nWideLen*sizeof(WCHAR)];
ZeroMemory(pwStr, nWideLen*sizeof(WCHAR));
}
MultiByteToWideChar(CP_ACP, 0, pStr, len, pwStr, nWideLen);
return nWideLen;
}
相应的,我们使用WideCharToMultiByte函数将宽字符字符串转换为多字节字符串,函数原型如下:
int WideCharToMultiByte(
__in UINT CodePage, //标识要与新转换的字符串关联的代码页
__in DWORD dwFlags, //指定额外的转换控制,一般设为0
__in LPCWSTR lpWideCharStr, //要转换的字符串的内存地址
__in int cchWideChar, //上面字符串的长度(字符数),传-1则有函数判断字符串长度
__out LPSTR lpMultiByteStr, //转换后得到多字节字符串的缓冲区
__in int cbMultiByte, //上面缓冲区最大值(字节数),传0则导致该函数返回目标缓冲区需要的大小,且返回值直接就是转换成功后所需的字节数,无需乘以sizeof…
__in LPCSTR lpDefaultChar, //遇到不能转换的宽字符时,函数使用该参数指定的字符,为NULL时,函数使用系统默认的一个字符
__out LPBOOL lpUsedDefaultChar //在宽字符字符串中,如果至少有一个字符不能转换为对应的多字节形式,函数将这个值设为TRUE,我们通常传入NULL
);
下面是我们封装的将多字节字符串转换为宽字符字符串的函数:
/*******************************************************function: convert wide-character set to multibyte character set
*param: pStr--[in] Points to a buffer that receives the translated buffer.
* pwStr--[out] Points to the wide character set ( or string ) to be converted.
* len --[in] Specify the size in bytes of the string pointed to by the pwStr
* parameter, or it can be -1 if the string is null terminated.
* IsEnd--[in]Specify whether you add '/0' to the end of converted array or not.
*return: the length of converted set (or string )
*******************************************************/
int ToMultiBytes( char* &pStr, const WCHAR* pwStr, int len, BOOL IsEnd)
{
ASSERT_POINTER(pwStr, WCHAR) ;
ASSERT( len >= 0 || len == -1 ) ;
int nChars = WideCharToMultiByte(CP_ACP, 0, pwStr, len, NULL, 0, NULL, NULL);
if (len == -1)
{
--nChars;
}
if (nChars == 0)
{
return 0;
}
if(IsEnd)
{
pStr = new char[nChars+1];
ZeroMemory(pStr, nChars+1);
}
else
{
pStr = new char[nChars];
ZeroMemory(pStr, nChars);
}
WideCharToMultiByte(CP_ACP, 0, pwStr, len, pStr, nChars, NULL, NULL);
return nChars;
}
上面两个封装的函数使用示例如下:
1)
char *pStr = "ASCE1885";WCHAR *pwStr;
int nWideLen = ToWideString(pwStr, pStr, -1, TRUE);
2)
WCHAR *pwStr = _T("ASCE1885");char *pStr;
int nWideLen = ToMultiBytes(pStr, pwStr, -1, TRUE);