1. 程式人生 > >msgpack[C++]使用筆記 和 msgpack/cPickle效能對比

msgpack[C++]使用筆記 和 msgpack/cPickle效能對比

python版本msgpack安裝:

wget http://pypi.python.org/packages/source/m/msgpack-python/msgpack-python-0.1.9.tar.gz

python2.x setup.py install --prefix=/usr/local/similarlib/

python版本的msgpack灰常好用,速度上比python內建的pickle和cpickle都要快一些,C++版本的使用比較麻煩,下面是本人學習時的一個demo,解析python-msgpack dump的一個複雜字典

#include <msgpack.hpp>

#include <fstream>
#include <iostream>
using namespace std;

template <class T>
void msgunpack(const char* binary_file, T& t, char* buff, uint32_t max){
	msgpack::unpacked msg;
	ifstream tf_file(binary_file,ios::in|ios::binary|ios::ate);
	uint32_t size = tf_file.tellg();
	tf_file.seekg(0, ios::beg);
	tf_file.read(buff, size);
	tf_file.close();
	msgpack::unpack(&msg, buff, size);
	msg.get().convert(&t);
}


typedef map<uint32_t, uint32_t> WordsMap;
typedef map<uint32_t, WordsMap> FieldsMap;
typedef map<uint64_t, FieldsMap> DocsMap;

int main(int argc, char** argv)
{
	uint32_t MAX_BUFF = 1024*1024*100; //100MB
	char* BUFF = new char[MAX_BUFF];

	DocsMap docsMap;
	msgpack::unpacked msg;
	msgunpack("/data/wikidoc/tf_dict_for_nodes/1-1000", docsMap, BUFF, MAX_BUFF);
	//        msg.get().convert(&docsMap);
	cout << docsMap.size() << endl;
        delete[] BUFF;
}


下面是本人自己封裝的一個msgpack介面標頭檔案mymsgpack.h

 #ifndef MY_MSGPACK_H

#ifndef MY_MSGPACK_H
#define MY_MSGPACK_H
#include <fstream>
#include <msgpack.hpp>
using namespace std;

template <class T>
void load_from_file(const char* binary_file, T& t) {
        ifstream binaryFstream(binary_file,ios::in|ios::binary|ios::ate);
        uint32_t size = binaryFstream.tellg();
        char* buff = new char[size];
        binaryFstream.seekg(0, ios::beg);
        binaryFstream.read(buff, size);
        binaryFstream.close();
        msgpack::unpacked msg;
        msgpack::unpack(&msg, buff, size);
        msg.get().convert(&t);
        delete[] buff;
}

template <class T>
void load_from_str(const char* binary_str, int len, T& t) {
        msgpack::unpacked msg;
        msgpack::unpack(&msg, binary_str, len);
        msg.get().convert(&t);
}

template <class T>
void dump_to_file(T& t, const char* dump_file) {
	msgpack::sbuffer sbuf;
	msgpack::pack(sbuf, t);
	ofstream dumpFstream(dump_file, ios::out|ios::binary|ios::trunc);
	dumpFstream.write(sbuf.data(), sbuf.size());
	dumpFstream.close();
}

template <class T>
void dump_to_str(T& t, char** dump_str, int& len) { //外部釋放*dump_str
	msgpack::sbuffer sbuf;
	msgpack::pack(sbuf, t);
	len = sbuf.size();
	*dump_str = (char*)malloc(sbuf.size());
	memcpy(*dump_str, sbuf.data(), sbuf.size());
}

#endif

msgpack編譯通過,連結不上的問題 undefined reference to `__sync_sub_and_fetch_4'

在x84_64機器上正常,在32bit機器上出現上述問題

[[email protected] msgpack-0.5.7]$ cat /etc/issue
CentOS release 5.4 (Final)
Kernel \r on an \m

[[email protected] msgpack-0.5.7]$ file /sbin/init
/sbin/init: ELF 32-bit LSB executable, Intel 80386, version 1 (SYSV), for GNU/Linux 2.6.9, dynamically linked (uses shared libs), for GNU/Linux 2.6.9, stripped

./configure不報錯,但是檢視config.log顯示有錯誤,程式連結msgpack的庫時也報錯

原因:gcc不能識別CPU體系,需要手動指明

[[email protected] msgpack-0.5.7]$ CFLAGS="-march=pentium -mtune=pentium" ./configure --prefix=/home/xudongsong/msgpack_static --enable-static=yes --enable-shared=no

make, make install

[[email protected] jobs]$ g++ job_calc_weight.cpp -o job_calc_weight -I/home/xudongsong/msgpack_static/include/ -L/home/xudongsong/msgpack_static/lib/ -lmsgpack

通過!

下面是msgpack和cPickle進行效能pk的demo程式(不比較pickle,是因為它比cPickle更慢,《Python cook book》裡面有說明):

mport sys,time,msgpack,pickle,cPickle,random

test_list = []
i = 0
while i<100000:
	test_list = random.randrange(1,100000)
	i += 1

print "common len(serialize) = %s"%len(cPickle.dumps(test_list,0))
print "compress len(serialize) = %s"%len(cPickle.dumps(test_list,1))

#------------------------------------------------------------------------
results = {}
time_start = time.time()
for i in range(1,1000000):
        results[i] = cPickle.dumps(test_list,1)
time_mid_1 = time.time()
print "cPickle dumps eats %s s"%str(time_mid_1-time_start)

for i in range(1,1000000):
	cPickle.loads(results[i])
time_mid_2 = time.time()
print "cPickle loads eats %s s"%str(time_mid_2-time_mid_1)

#------------------------------------------------------------------------
results = {}
time_start = time.time()
for i in range(1,1000000):
	results[i] = msgpack.dumps(test_list)
time_mid_1 = time.time()
print "msgpack pack eats %s s"%str(time_mid_1-time_start)

for i in range(1,1000000):
	msgpack.loads(results[i])
time_mid_2 = time.time()
print "msgpack unpack eats %s s"%str(time_mid_2-time_mid_1)