1. 程式人生 > >一致性Hash簡單介紹和使用

一致性Hash簡單介紹和使用

mes red second count main ace 背景 -- file

背景:
一致性Hash用於分布式緩存系統,將Key值映射到詳細機器Ip上,而且添加和刪除1臺機器的數據移動量較小,對現網影響較小

實現:
1 Hash環:將節點的Hash值映射到一個Hash環中。每一個Key順時針第一個找到的節點。就是這個Key被路由到的機器
2 "虛擬節點":將節點虛擬成多個"虛擬節點"分布在Hash環上,使得分布更均勻。擴縮容影響較小

代碼實例:

/*
 * @ 一致性Hash模擬測試
 * @ 結論:模擬4臺機器擴容1臺。遍歷Key[0,999983]
		- 一致性Hash需移動181161個Key,約占18%(1/5左右,符合預期效果)
		- 取模Hash需移動799984個Key,約占80%
 * @ 2014.05.30 
 */

#include <stdint.h>
#include <iostream>
#include <string.h>
#include <sstream>
#include <map>
#include <vector>
using namespace std;

#define HASH_MOD (999983) 

template <class T> 
string ToStr(const T &t)
{
	stringstream stream;
	stream << t;
	return stream.str();
}

uint32_t APHash(string &sKey)
{
	char *key = (char*)sKey.c_str();
	unsigned int hash = 0;
	for (int i=0; *key; i++)
	{
		if ((i & 1) == 0) {
			hash ^= ((hash<<7)^(*key++)^(hash>>3));
		} else {
			hash ^= (~((hash<<11)^(*key++)^(hash>>5)));
		}
	}
	return hash%HASH_MOD;
}

class CMyConHash
{
public:
	/* 加入一臺機器(IP) */
	void AddIp(const string &sIp) 
	{
		// 每一個IP分配128個虛擬節點,原因:結合APHash實驗結果分布較均勻
		for (int i = 0; i < 128; i ++)
		{
			string sCode = sIp + ToStr(i) + "#Hash";
			uint32_t uVirKey = APHash(sCode);
			mapVirKey2Ip[uVirKey] = sIp;
			mapIp2VirKey[sIp].push_back(uVirKey);
		}
	}
	
	/* 刪除一臺機器(IP) */
	void DelIp(const string &sIp) 
	{
		if (mapIp2VirKey.count(sIp) == 0) {	
			cout << "DelIp Err: mapIp2VirKey Don`t Has Ip=" << sIp << endl;
			return;
		}
		vector<uint32_t> vecVirKey = mapIp2VirKey[sIp];
		for (int i = 0; i < vecVirKey.size(); i ++)
		{
			uint32_t uVirKey = vecVirKey[i];
			if (mapVirKey2Ip[uVirKey] == sIp) {
				// 得推斷下。有可能2個IP虛擬節點相等後覆蓋了
				mapVirKey2Ip.erase(uVirKey);
			}
		}
		mapIp2VirKey.erase(sIp);
	}
	
	/* 路由:給每一個Key找到負責的機器(IP) */
	int FindIp(uint32_t uKey, string &sIp)
	{
		if (mapVirKey2Ip.size() == 0) {
			cout << "FindIp Err: mapVirKey2Ip.size() == 0" << endl;
			return -1;
		}
		bool bFind = false;
		uint32_t uVirKey;
		map<uint32_t, string>::iterator iter;
		// 遍歷std::map是按Key大小順序輸出(差別std::tr1::unordered_map)
		for(iter = mapVirKey2Ip.begin(); iter != mapVirKey2Ip.end(); iter ++)
		{
			uVirKey = iter->first;
			if (uVirKey > uKey%HASH_MOD) {
				sIp = iter->second; 
				bFind = true;
				break;
			}
		}
		if (!bFind) {
			// 找不到比Key小的虛擬節點,故使用最小的虛擬節點(環)
			iter = mapVirKey2Ip.begin();
			uVirKey = iter->first;
			sIp = iter->second; 
		}
		//cout << "FindIp Suc:" << uKey%HASH_MOD << "=>" << uVirKey << "," << sIp << endl;
		return 0;
	}
	
	/* 打印各個IP負責的Key區域大小。影響因素:1 Hash函數 2 虛擬節點個數 */
	/* 4臺機器的情況,相對還是較均勻:
		Ip=202.168.14.241,Cnt=251649
		Ip=202.168.14.242,Cnt=257902
		Ip=202.168.14.243,Cnt=245945
		Ip=202.168.14.244,Cnt=235516 */
	void EchoIpState()
	{
		map<string, uint32_t> mapIpCnt; 
		map<uint32_t, string>::iterator iter = mapVirKey2Ip.end(); 
		iter --;
		uint32_t uPreKey = iter->first;
		string sPreIp = iter->second;
		do {
			iter --;
			uint32_t uVirKey = iter->first;
			string sIp = iter->second;
			if (mapIpCnt.count(sPreIp) == 0) {
				mapIpCnt[sPreIp] = uPreKey-uVirKey;
			} else {
				mapIpCnt[sPreIp] += uPreKey-uVirKey;
			} 
			uPreKey = uVirKey;
			sPreIp = sIp;
		} while (iter != mapVirKey2Ip.begin()); 
		
		cout << "Ip Size=" << mapIpCnt.size() << endl;
		map<string, uint32_t>::iterator iter1;
		for(iter1 = mapIpCnt.begin(); iter1 != mapIpCnt.end(); iter1 ++)
		{
			cout << "Ip=" << iter1->first << ",Cnt=" << iter1->second << endl;
		}
	}
private:
	map< uint32_t, string > mapVirKey2Ip;
	map< string, vector<uint32_t> > mapIp2VirKey; 
};

class CMyModHash
{
public: 
	void AddIp(const string &sIp) 
	{
		vecIpList.push_back(sIp);
	}
	void FindIp(uint32_t uKey, string &sIp)
	{
		sIp = vecIpList[uKey%vecIpList.size()];
	}
	void EchoIpState()
	{
		cout << "Ip Cnt=" << vecIpList.size() << endl;
	}
private: 
	vector<string> vecIpList;
};

int main()
{
	CMyConHash oMyHash;
	// CMyModHash oMyHash;
	
	// 模擬初始化4臺機器
	oMyHash.AddIp("202.168.14.241"); 
	oMyHash.AddIp("202.168.14.242"); 
	oMyHash.AddIp("202.168.14.243"); 
	oMyHash.AddIp("202.168.14.244"); 
	oMyHash.EchoIpState();
	
	// 保存下各個Key路由的機器 
	string sIp, arrKeyIp[HASH_MOD];
	for (uint32_t key = 0; key < HASH_MOD; key ++)
	{
		oMyHash.FindIp(key, sIp);
		arrKeyIp[key] = sIp;
	}
	
	// 模擬加入1臺機器
	oMyHash.AddIp("202.168.14.245"); 
	oMyHash.EchoIpState();
	
	// 推斷多少Key相應數據須要移動機器
	uint32_t uCnt = 0;
	for (uint32_t key = 0; key < HASH_MOD; key ++)
	{
		oMyHash.FindIp(key, sIp);
		if (arrKeyIp[key] != sIp) {
			uCnt ++;
		}
	}
	cout << "Key Sum=" << HASH_MOD << " , Need To Move:" << uCnt << endl;
	
	return 0;
}


一致性Hash簡單介紹和使用