1. 程式人生 > >使用libzip讀取修改zip檔案comment的方法(支援新增擴充套件字元)

使用libzip讀取修改zip檔案comment的方法(支援新增擴充套件字元)

2014-10-01 wcdj

摘要:本文使用libzip讀取和修改zip檔案comment的方法,並支援新增擴充套件字元。前提是需要修改libzip的原始碼支援可以新增擴充套件字元, 否則_zip_guess_encoding會判斷出錯(修改程式碼 zip_set_archive_comment.c:65)。

0 方法

(1) 首先要安裝zlib和libzip編譯環境:

初始化編譯libzip
./configure --prefix="/Users/gerryyang/LAMP/libzip/install/libzip-0.11.2" --with-zlib="/Users/gerryyang/LAMP/zlib/install/zlib-1.2.8"

(2) 修改libzip的原始碼:

去除對comment編碼格式的判斷,zip_set_archive_comment.c:65

ZIP_EXTERN int
zip_set_archive_comment(struct zip *za, const char *comment, zip_uint16_t len)
{
	struct zip_string *cstr;

	if (ZIP_IS_RDONLY(za)) {
		_zip_error_set(&za->error, ZIP_ER_RDONLY, 0);
		return -1;
	}

	if (len > 0 && comment == NULL) {
		_zip_error_set(&za->error, ZIP_ER_INVAL, 0);
		return -1;
	}

	if (len > 0) {
		if ((cstr=_zip_string_new((const zip_uint8_t *)comment, len, ZIP_FL_ENC_GUESS, &za->error)) == NULL)
		{
			printf("_zip_string_new err\n");
			return -1;
		}

#if 0
		if (_zip_guess_encoding(cstr, ZIP_ENCODING_UNKNOWN) == ZIP_ENCODING_CP437) {
			printf("_zip_guess_encoding err\n");
			_zip_string_free(cstr);
			_zip_error_set(&za->error, ZIP_ER_INVAL, 0);
			return -1;
		}
#endif
	}
	else
		cstr = NULL;

	_zip_string_free(za->comment_changes);
	za->comment_changes = NULL;

	if (((za->comment_orig && _zip_string_equal(za->comment_orig, cstr))
				|| (za->comment_orig == NULL && cstr == NULL))) {
		_zip_string_free(cstr);
		za->comment_changed = 0;
	}
	else {
		za->comment_changes = cstr;
		za->comment_changed = 1;
	}

	return 0;
}

zip_utf-8.c:119

enum zip_encoding_type
_zip_guess_encoding(struct zip_string *str, enum zip_encoding_type expected_encoding)
{
	enum zip_encoding_type enc;
	const zip_uint8_t *name;
	zip_uint32_t i, j, ulen;

	if (str == NULL)
		return ZIP_ENCODING_ASCII;

	name = str->raw;

	if (str->encoding != ZIP_ENCODING_UNKNOWN)
		enc = str->encoding;
	else {
		enc = ZIP_ENCODING_ASCII;
		for (i=0; i<str->length; i++) {
			if ((name[i] > 31 && name[i] < 128) || name[i] == '\r' || name[i] == '\n' || name[i] == '\t')
				continue;

			enc = ZIP_ENCODING_UTF8_GUESSED;
			if ((name[i] & UTF_8_LEN_2_MASK) == UTF_8_LEN_2_MATCH)
				ulen = 1;
			else if ((name[i] & UTF_8_LEN_3_MASK) == UTF_8_LEN_3_MATCH)
				ulen = 2;
			else if ((name[i] & UTF_8_LEN_4_MASK) == UTF_8_LEN_4_MATCH)
				ulen = 3;
			else {
				enc = ZIP_ENCODING_CP437;
				break;
			}

			if (i + ulen >= str->length) {
				enc = ZIP_ENCODING_CP437;
				break;
			}

			for (j=1; j<=ulen; j++) {
				if ((name[i+j] & UTF_8_CONTINUE_MASK) != UTF_8_CONTINUE_MATCH) {
					enc = ZIP_ENCODING_CP437;
					goto done;
				}
			}
			i += ulen;
		}
	}

done:
	str->encoding = enc;

	if (expected_encoding != ZIP_ENCODING_UNKNOWN) {
		if (expected_encoding == ZIP_ENCODING_UTF8_KNOWN && enc == ZIP_ENCODING_UTF8_GUESSED)
			str->encoding = enc = ZIP_ENCODING_UTF8_KNOWN;

		if (expected_encoding != enc && enc != ZIP_ENCODING_ASCII)
			return ZIP_ENCODING_ERROR;
	}

	return enc;
}

1 測試程式碼


參考程式碼:
https://github.com/gerryyang/mac-utils/tree/master/tools/libzip/src
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <errno.h>
#include "zip.h"
using namespace std;

int encode_hex_string(const unsigned char *src, int len, unsigned char *dst)
{
	unsigned char szHexTable[] = "0123456789ABCDEF";

	for (int i = 0; i < len; ++i)
	{
		*dst = szHexTable[(src[i] >> 4) & 0x0f];
		++dst;
		*dst = szHexTable[src[i] & 0x0f];
		++dst;
	}
	*dst = '\0';

	return 0;
}

int main(int argc, char* argv[])
{

	if (argc < 3)
	{
		printf("usage: %s zipfile zipcomment\n", argv[0]);
		exit(1);
	}

	string zipfile = argv[1];
	string zipcomment = argv[2];
	int error;

	struct zip * zipfd = zip_open(zipfile.c_str(), ZIP_CHECKCONS, &error);
	if (zipfd == NULL)
	{
		switch (error)
		{
		case ZIP_ER_NOENT:
			printf("The file specified by path does not exist and ZIP_CREATE is not set [%d]\n", error);
			break;
		case ZIP_ER_EXISTS:
			printf("The file specified by path exists and ZIP_EXCL is set [%d]\n", error);
			break;
		case ZIP_ER_INVAL:
			printf("The path argument is NULL [%d]\n", error);
			break;
		case ZIP_ER_NOZIP:
			printf("The file specified by path is not a zip archive [%d]\n", error);
			break;
		case ZIP_ER_OPEN:
			printf("The file specified by path could not be opened [%d]\n", error);
			break;
		case ZIP_ER_READ:
			printf("A read error occurred; see errno for details [%d]\n", error);
			break;
		case ZIP_ER_SEEK:
			printf("The file specified by path does not allow seeks [%d]\n", error);
			break;
		default:
			printf("unknown err [%d]\n", error);
			break;
		}
		exit(1);
	}

	// get the comment for the entire zip archive
	int commentlen = 0;
	const char * comment = zip_get_archive_comment(zipfd, &commentlen, ZIP_FL_ENC_RAW);
	if (comment == NULL)
	{
		printf("zip_get_archive_comment get null or err[%d:%s]\n", errno, strerror(errno));
	}
	else 
	{
		printf("zip_get_archive_comment[%d:%s]\n", commentlen, comment);
		char copy[1024] = {0};
		memcpy(copy, comment, commentlen);
		unsigned char hex[1024] = {0};
		encode_hex_string((unsigned char *)copy, commentlen, hex);
		printf("zip_get_archive_comment hex[%d:%s]\n", commentlen, hex);
	}

	// Midas Header
	// idx:0 bytes:2 0X96FA
	// idx:2 bytes:2 comment len = strlen(channelId) + 0D0A
	// idx:4 bytes:N channelId=xxx
	// idx:4+N bytes:2 end:0X0D0A
	char dstcomment[1024] = {0};
	zip_uint16_t dstlen = 0;
	memset(dstcomment + dstlen, 0XFA, 1);
	dstlen += 1;
	memset(dstcomment + dstlen, 0X96, 1);
	dstlen += 1;
	memset(dstcomment + dstlen, (zipcomment.length() + 2) % 0XFF, 1);// 0D0A
	dstlen += 1;
	memset(dstcomment + dstlen, (zipcomment.length() + 2) / 0XFF, 1);
	dstlen += 1;
	memcpy(dstcomment + dstlen, zipcomment.data(), zipcomment.length());
	dstlen += zipcomment.length();
	memset(dstcomment + dstlen, 0X0D, 1);
	dstlen += 1;
	memset(dstcomment + dstlen, 0X0A, 1);
	dstlen += 1;

	unsigned char hex[1024] = {0};
	encode_hex_string((unsigned char *)dstcomment, dstlen, hex);
	printf("zip_set_archive_comment hex[%d:%s]\n", dstlen, hex);


	// sets the comment for the entire zip archive
	// If comment is NULL and len is 0, the archive comment will be removed
	// comment must be encoded in ASCII or UTF-8
	int iret = zip_set_archive_comment(zipfd, dstcomment, dstlen);// err !!!
	if (iret != 0)
	{
		printf("zip_set_archive_comment err[%d:%s]\n", iret, strerror(errno));
		switch (iret)
		{
		case ZIP_ER_INVAL:
			printf("zip_set_archive_comment: len is less than 0 or longer than the maximum comment length in a zip file (65535), or comment is not a valid UTF-8 encoded string\n");
			break;
		case ZIP_ER_MEMORY:
			printf("zip_set_archive_comment: Required memory could not be allocated\n");
			break;
		default:
			printf("zip_set_archive_comment: unknown err\n");
			break;
		}
	}

	// close, If any files within were changed, those changes are written to disk first
	iret = zip_close(zipfd);
	if (iret != 0)
	{
		printf("zip_close err[%d:%s]\n", errno, strerror(errno));
	}

	return 0;
}

2 總結

通過使用libzip可以方便的對zip的comment內容進行修改,但是限制必須使用可見的字符集,通過對libzip原始碼的簡單修改,可以做到新增擴充套件的字符集。除了通過程式碼的方式,也可以直通使用命令列工具zip和unzip (-z   add zipfile comment) 修改和讀取zip的comment內容。

3 參考