c中實現utf8和gbk的互轉
阿新 • • 發佈:2019-02-08
#include <iconv.h> #include <stdlib.h> #include <stdio.h> #include <unistd.h> #include <fcntl.h> #include <string.h> #include <sys/stat.h> int code_convert(char *from_charset, char *to_charset, char *inbuf, size_t inlen, char *outbuf, size_t outlen) { iconv_t cd; char **pin = &inbuf; char **pout = &outbuf; cd = iconv_open(to_charset, from_charset); if (cd == 0) return -1; memset(outbuf, 0, outlen); if (iconv(cd, pin, &inlen, pout, &outlen) == -1) return -1; iconv_close(cd); *pout = '\0'; return 0; } int u2g(char *inbuf, size_t inlen, char *outbuf, size_t outlen) { return code_convert("utf-8", "gb2312", inbuf, inlen, outbuf, outlen); } int g2u(char *inbuf, size_t inlen, char *outbuf, size_t outlen) { return code_convert("gb2312", "utf-8", inbuf, inlen, outbuf, outlen); } int main(void) { char *s = "中國"; int fd = open("test.txt", O_RDWR|O_CREAT, S_IRUSR | S_IWUSR); char buf[10]; u2g(s, strlen(s), buf, sizeof(buf)); write(fd, buf, strlen(buf)); close(fd); fd = open("test.txt2", O_RDWR|O_CREAT, S_IRUSR | S_IWUSR); char buf2[10]; g2u(buf, strlen(buf), buf2, sizeof(buf2)); write(fd, buf2, strlen(buf2)); close(fd); return 1; }
上面是使用iconv函式。
方式二: 使用如下兩個函式
mbstowcs將多位元組編碼轉換為寬位元組編碼
wcstombs將寬位元組編碼轉換為多位元組編碼
注意, 需要系統編碼的支援, 可以通過locale -a 檢視系統支援的。若不支援zh_CN.gbk, 需要安裝,例如,在ubuntu上的安裝步驟如下:
編輯
$sudo vi /var/lib/locales/supported.d/zh-hans更新成
zh_CN.UTF-8 UTF-8 zh_SG.UTF-8 UTF-8 zh_CN.GBK GBK zh_CN.GB18030 GB18030
// 更新 $ sudo locale-gen // 檢視 $ locale -a C POSIX zh_CN.gb18030 zh_CN.gbk zh_CN.utf8 zh_SG.utf8
#include <stdlib.h> #include <stdio.h> #include <string.h> #include <unistd.h> #include <fcntl.h> #include <sys/stat.h> #include <locale.h> /** * DESCRIPTION: 實現由utf8編碼到gbk編碼的轉換 * * Input: gbkStr,轉換後的字串; srcStr,待轉換的字串; maxGbkStrlen, gbkStr的最 大長度 * Output: gbkStr * Returns: -1,fail;>0,success * */ int utf82gbk(char *gbkStr, const char *srcStr, int maxGbkStrlen) { if (NULL == srcStr) { printf("Bad Parameter\n"); return -1; } //首先先將utf8編碼轉換為unicode編碼 if (NULL == setlocale(LC_ALL, "zh_CN.utf8")) //設定轉換為unicode前的碼,當前為utf8編碼 { printf("Bad Parameter\n"); return -1; } int unicodeLen = mbstowcs(NULL, srcStr, 0); //計算轉換後的長度 if (unicodeLen <= 0) { printf("Can not Transfer!!!\n"); return -1; } wchar_t *unicodeStr = (wchar_t *) calloc(sizeof(wchar_t), unicodeLen + 1); mbstowcs(unicodeStr, srcStr, strlen(srcStr)); //將utf8轉換為unicode //將unicode編碼轉換為gbk編碼 if (NULL == setlocale(LC_ALL, "zh_CN.gbk")) //設定unicode轉換後的碼,當前為gbk { printf("Bad Parameter\n"); return -1; } int gbkLen = wcstombs(NULL, unicodeStr, 0); //計算轉換後的長度 if (gbkLen <= 0) { printf("Can not Transfer!!!\n"); return -1; } else if (gbkLen >= maxGbkStrlen) //判斷空間是否足夠 { printf("Dst Str memory not enough\n"); return -1; } wcstombs(gbkStr, unicodeStr, gbkLen); gbkStr[gbkLen] = 0; //新增結束符 free(unicodeStr); return gbkLen; } int main(void) { char *s = "中國"; int fd = open("test.txt", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); char buf[10]; utf82gbk(buf, s, sizeof(buf)); write(fd, buf, strlen(buf)); close(fd); return 1; }