1. 程式人生 > >c中實現utf8和gbk的互轉

c中實現utf8和gbk的互轉

#include <iconv.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <sys/stat.h>

int code_convert(char *from_charset, char *to_charset, char *inbuf, size_t inlen,
		char *outbuf, size_t outlen) {
	iconv_t cd;
	char **pin = &inbuf;
	char **pout = &outbuf;

	cd = iconv_open(to_charset, from_charset);
	if (cd == 0)
		return -1;
	memset(outbuf, 0, outlen);
	if (iconv(cd, pin, &inlen, pout, &outlen) == -1)
		return -1;
	iconv_close(cd);
	*pout = '\0';

	return 0;
}

int u2g(char *inbuf, size_t inlen, char *outbuf, size_t outlen) {
	return code_convert("utf-8", "gb2312", inbuf, inlen, outbuf, outlen);
}

int g2u(char *inbuf, size_t inlen, char *outbuf, size_t outlen) {
	return code_convert("gb2312", "utf-8", inbuf, inlen, outbuf, outlen);
}

int main(void) {
	char *s = "中國";
	int fd = open("test.txt", O_RDWR|O_CREAT, S_IRUSR | S_IWUSR);
	char buf[10];
	u2g(s, strlen(s), buf, sizeof(buf));
	write(fd, buf, strlen(buf));
	close(fd);

	fd = open("test.txt2", O_RDWR|O_CREAT, S_IRUSR | S_IWUSR);
	char buf2[10];
	g2u(buf, strlen(buf), buf2, sizeof(buf2));
	write(fd, buf2, strlen(buf2));
	close(fd);
	return 1;
}

上面是使用iconv函式。

方式二: 使用如下兩個函式

mbstowcs將多位元組編碼轉換為寬位元組編碼

wcstombs將寬位元組編碼轉換為多位元組編碼

注意, 需要系統編碼的支援, 可以通過locale -a 檢視系統支援的。若不支援zh_CN.gbk, 需要安裝,例如,在ubuntu上的安裝步驟如下:


編輯

$sudo vi /var/lib/locales/supported.d/zh-hans
更新成
zh_CN.UTF-8 UTF-8
zh_SG.UTF-8 UTF-8
zh_CN.GBK GBK
zh_CN.GB18030 GB18030
// 更新
$ sudo locale-gen

// 檢視
$ locale -a
C
POSIX
zh_CN.gb18030
zh_CN.gbk
zh_CN.utf8
zh_SG.utf8

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <locale.h>

/**
 * DESCRIPTION: 實現由utf8編碼到gbk編碼的轉換
 *
 * Input: gbkStr,轉換後的字串;  srcStr,待轉換的字串; maxGbkStrlen, gbkStr的最
 大長度
 * Output: gbkStr
 * Returns: -1,fail;>0,success
 *
 */
int utf82gbk(char *gbkStr, const char *srcStr, int maxGbkStrlen) {
	if (NULL == srcStr) {
		printf("Bad Parameter\n");
		return -1;
	}

	//首先先將utf8編碼轉換為unicode編碼
	if (NULL == setlocale(LC_ALL, "zh_CN.utf8")) //設定轉換為unicode前的碼,當前為utf8編碼
			{
		printf("Bad Parameter\n");
		return -1;
	}

	int unicodeLen = mbstowcs(NULL, srcStr, 0); //計算轉換後的長度
	if (unicodeLen <= 0) {
		printf("Can not Transfer!!!\n");
		return -1;
	}
	wchar_t *unicodeStr = (wchar_t *) calloc(sizeof(wchar_t), unicodeLen + 1);
	mbstowcs(unicodeStr, srcStr, strlen(srcStr)); //將utf8轉換為unicode

	//將unicode編碼轉換為gbk編碼
	if (NULL == setlocale(LC_ALL, "zh_CN.gbk")) //設定unicode轉換後的碼,當前為gbk
			{
		printf("Bad Parameter\n");
		return -1;
	}
	int gbkLen = wcstombs(NULL, unicodeStr, 0); //計算轉換後的長度
	if (gbkLen <= 0) {
		printf("Can not Transfer!!!\n");
		return -1;
	} else if (gbkLen >= maxGbkStrlen) //判斷空間是否足夠
			{
		printf("Dst Str memory not enough\n");
		return -1;
	}
	wcstombs(gbkStr, unicodeStr, gbkLen);
	gbkStr[gbkLen] = 0; //新增結束符
	free(unicodeStr);
	return gbkLen;
}

int main(void) {
	char *s = "中國";
	int fd = open("test.txt", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
	char buf[10];
	utf82gbk(buf, s, sizeof(buf));
	write(fd, buf, strlen(buf));
	close(fd);

	return 1;
}