unicode字元和多位元組字元的相互轉換介面
阿新 • • 發佈:2019-01-07
作者:朱金燦
發現開原始碼的可利用資源真多,從sqlite3的原始碼中摳出了幾個字元轉換介面,稍微改造下了發現還挺好用的。下面是實現程式碼:
/* ** Convert a UTF-8 string to microsoft unicode (UTF-16?). ** ** Space to hold the returned string is obtained from malloc. */ static WCHAR *utf8ToUnicode(const char *zFilename){ int nChar; WCHAR *zWideFilename; nChar = MultiByteToWideChar(CP_UTF8, 0, zFilename, -1, NULL, 0); zWideFilename = static_cast<WCHAR *>(malloc(nChar*sizeof(zWideFilename[0]))); if( zWideFilename==0 ){ return 0; } nChar = MultiByteToWideChar(CP_UTF8, 0, zFilename, -1, zWideFilename, nChar); if( nChar==0 ){ free(zWideFilename); zWideFilename = 0; } return zWideFilename; } /* ** Convert microsoft unicode to UTF-8. Space to hold the returned string is ** obtained from malloc(). */ static char *unicodeToUtf8(const WCHAR *zWideFilename){ int nByte; char *zFilename; nByte = WideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, 0, 0, 0, 0); zFilename = static_cast<char*>(malloc( nByte )); if( zFilename==0 ){ return 0; } nByte = WideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, zFilename, nByte, 0, 0); if( nByte == 0 ) { free(zFilename); zFilename = 0; } return zFilename; } /* ** Convert an ansi string to microsoft unicode, based on the ** current codepage settings for file apis. ** ** Space to hold the returned string is obtained ** from malloc. */ static WCHAR *mbcsToUnicode(const char *zFilename){ int nByte; WCHAR *zMbcsFilename; int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP; nByte = MultiByteToWideChar(codepage, 0, zFilename, -1, NULL,0)*sizeof(WCHAR); zMbcsFilename = static_cast<WCHAR*>(malloc( nByte*sizeof(zMbcsFilename[0]))); if( zMbcsFilename==0 ){ return 0; } nByte = MultiByteToWideChar(codepage, 0, zFilename, -1, zMbcsFilename, nByte); if( nByte==0 ) { free(zMbcsFilename); zMbcsFilename = 0; } return zMbcsFilename; } /* ** Convert microsoft unicode to multibyte character string, based on the ** user's Ansi codepage. ** ** Space to hold the returned string is obtained from ** malloc(). */ static char* unicodeToMbcs(const WCHAR* zWideFilename){ int nByte; char *zFilename; int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP; nByte = WideCharToMultiByte(codepage, 0, zWideFilename, -1, 0, 0, 0, 0); zFilename = static_cast<char*>(malloc(nByte )); if( zFilename==0 ){ return 0; } nByte = WideCharToMultiByte(codepage, 0, zWideFilename, -1, zFilename, nByte, 0, 0); if( nByte == 0 ){ free(zFilename); zFilename = 0; } return zFilename; } /* ** Convert multibyte character string to UTF-8. Space to hold the ** returned string is obtained from malloc(). */ static char* mbcsToUtf8(const char *zFilename){ char *zFilenameUtf8; WCHAR *zTmpWide; zTmpWide = mbcsToUnicode(zFilename); if( zTmpWide==0 ){ return 0; } zFilenameUtf8 = unicodeToUtf8(zTmpWide); free(zTmpWide); return zFilenameUtf8; } /* ** Convert UTF-8 to multibyte character string. Space to hold the ** returned string is obtained from malloc(). */ static char* utf8ToMbcs(const char *zFilename){ char *zFilenameMbcs; WCHAR* zTmpWide; zTmpWide = utf8ToUnicode(zFilename); if( zTmpWide==0 ){ return 0; } zFilenameMbcs = unicodeToMbcs(zTmpWide); free(zTmpWide); return zFilenameMbcs; } std::string MbcsToUtf8( const char* pszMbcs ) { std::string str; WCHAR *pwchar=0; CHAR *pchar=0; int len=0; int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP; len=MultiByteToWideChar(codepage, 0, pszMbcs, -1, NULL,0); pwchar=new WCHAR[len]; if(pwchar!=0) { len = MultiByteToWideChar(codepage, 0, pszMbcs, -1, pwchar, len); if( len!=0 ) { len = WideCharToMultiByte(CP_UTF8, 0, pwchar, -1, 0, 0, 0, 0); pchar=new CHAR[len]; if(pchar!=0) { len = WideCharToMultiByte(CP_UTF8, 0, pwchar, -1, pchar, len,0, 0); if(len!=0) { str = pchar; } delete pchar; } delete pwchar; } } return str; }
要測試這些介面,為此我寫了一個測試工程,是讀取一個xml檔案把裡面的字元進行轉換的,測試工程的程式碼下載地址如下: