一致性Hash簡單介紹和使用
阿新 • • 發佈:2017-07-18
mes red second count main ace 背景 -- file
背景:
一致性Hash用於分布式緩存系統,將Key值映射到詳細機器Ip上,而且添加和刪除1臺機器的數據移動量較小,對現網影響較小
實現:
1 Hash環:將節點的Hash值映射到一個Hash環中。每一個Key順時針第一個找到的節點。就是這個Key被路由到的機器
2 "虛擬節點":將節點虛擬成多個"虛擬節點"分布在Hash環上,使得分布更均勻。擴縮容影響較小
一致性Hash用於分布式緩存系統,將Key值映射到詳細機器Ip上,而且添加和刪除1臺機器的數據移動量較小,對現網影響較小
實現:
1 Hash環:將節點的Hash值映射到一個Hash環中。每一個Key順時針第一個找到的節點。就是這個Key被路由到的機器
2 "虛擬節點":將節點虛擬成多個"虛擬節點"分布在Hash環上,使得分布更均勻。擴縮容影響較小
代碼實例:
/* * @ 一致性Hash模擬測試 * @ 結論:模擬4臺機器擴容1臺。遍歷Key[0,999983] - 一致性Hash需移動181161個Key,約占18%(1/5左右,符合預期效果) - 取模Hash需移動799984個Key,約占80% * @ 2014.05.30 */ #include <stdint.h> #include <iostream> #include <string.h> #include <sstream> #include <map> #include <vector> using namespace std; #define HASH_MOD (999983) template <class T> string ToStr(const T &t) { stringstream stream; stream << t; return stream.str(); } uint32_t APHash(string &sKey) { char *key = (char*)sKey.c_str(); unsigned int hash = 0; for (int i=0; *key; i++) { if ((i & 1) == 0) { hash ^= ((hash<<7)^(*key++)^(hash>>3)); } else { hash ^= (~((hash<<11)^(*key++)^(hash>>5))); } } return hash%HASH_MOD; } class CMyConHash { public: /* 加入一臺機器(IP) */ void AddIp(const string &sIp) { // 每一個IP分配128個虛擬節點,原因:結合APHash實驗結果分布較均勻 for (int i = 0; i < 128; i ++) { string sCode = sIp + ToStr(i) + "#Hash"; uint32_t uVirKey = APHash(sCode); mapVirKey2Ip[uVirKey] = sIp; mapIp2VirKey[sIp].push_back(uVirKey); } } /* 刪除一臺機器(IP) */ void DelIp(const string &sIp) { if (mapIp2VirKey.count(sIp) == 0) { cout << "DelIp Err: mapIp2VirKey Don`t Has Ip=" << sIp << endl; return; } vector<uint32_t> vecVirKey = mapIp2VirKey[sIp]; for (int i = 0; i < vecVirKey.size(); i ++) { uint32_t uVirKey = vecVirKey[i]; if (mapVirKey2Ip[uVirKey] == sIp) { // 得推斷下。有可能2個IP虛擬節點相等後覆蓋了 mapVirKey2Ip.erase(uVirKey); } } mapIp2VirKey.erase(sIp); } /* 路由:給每一個Key找到負責的機器(IP) */ int FindIp(uint32_t uKey, string &sIp) { if (mapVirKey2Ip.size() == 0) { cout << "FindIp Err: mapVirKey2Ip.size() == 0" << endl; return -1; } bool bFind = false; uint32_t uVirKey; map<uint32_t, string>::iterator iter; // 遍歷std::map是按Key大小順序輸出(差別std::tr1::unordered_map) for(iter = mapVirKey2Ip.begin(); iter != mapVirKey2Ip.end(); iter ++) { uVirKey = iter->first; if (uVirKey > uKey%HASH_MOD) { sIp = iter->second; bFind = true; break; } } if (!bFind) { // 找不到比Key小的虛擬節點,故使用最小的虛擬節點(環) iter = mapVirKey2Ip.begin(); uVirKey = iter->first; sIp = iter->second; } //cout << "FindIp Suc:" << uKey%HASH_MOD << "=>" << uVirKey << "," << sIp << endl; return 0; } /* 打印各個IP負責的Key區域大小。影響因素:1 Hash函數 2 虛擬節點個數 */ /* 4臺機器的情況,相對還是較均勻: Ip=202.168.14.241,Cnt=251649 Ip=202.168.14.242,Cnt=257902 Ip=202.168.14.243,Cnt=245945 Ip=202.168.14.244,Cnt=235516 */ void EchoIpState() { map<string, uint32_t> mapIpCnt; map<uint32_t, string>::iterator iter = mapVirKey2Ip.end(); iter --; uint32_t uPreKey = iter->first; string sPreIp = iter->second; do { iter --; uint32_t uVirKey = iter->first; string sIp = iter->second; if (mapIpCnt.count(sPreIp) == 0) { mapIpCnt[sPreIp] = uPreKey-uVirKey; } else { mapIpCnt[sPreIp] += uPreKey-uVirKey; } uPreKey = uVirKey; sPreIp = sIp; } while (iter != mapVirKey2Ip.begin()); cout << "Ip Size=" << mapIpCnt.size() << endl; map<string, uint32_t>::iterator iter1; for(iter1 = mapIpCnt.begin(); iter1 != mapIpCnt.end(); iter1 ++) { cout << "Ip=" << iter1->first << ",Cnt=" << iter1->second << endl; } } private: map< uint32_t, string > mapVirKey2Ip; map< string, vector<uint32_t> > mapIp2VirKey; }; class CMyModHash { public: void AddIp(const string &sIp) { vecIpList.push_back(sIp); } void FindIp(uint32_t uKey, string &sIp) { sIp = vecIpList[uKey%vecIpList.size()]; } void EchoIpState() { cout << "Ip Cnt=" << vecIpList.size() << endl; } private: vector<string> vecIpList; }; int main() { CMyConHash oMyHash; // CMyModHash oMyHash; // 模擬初始化4臺機器 oMyHash.AddIp("202.168.14.241"); oMyHash.AddIp("202.168.14.242"); oMyHash.AddIp("202.168.14.243"); oMyHash.AddIp("202.168.14.244"); oMyHash.EchoIpState(); // 保存下各個Key路由的機器 string sIp, arrKeyIp[HASH_MOD]; for (uint32_t key = 0; key < HASH_MOD; key ++) { oMyHash.FindIp(key, sIp); arrKeyIp[key] = sIp; } // 模擬加入1臺機器 oMyHash.AddIp("202.168.14.245"); oMyHash.EchoIpState(); // 推斷多少Key相應數據須要移動機器 uint32_t uCnt = 0; for (uint32_t key = 0; key < HASH_MOD; key ++) { oMyHash.FindIp(key, sIp); if (arrKeyIp[key] != sIp) { uCnt ++; } } cout << "Key Sum=" << HASH_MOD << " , Need To Move:" << uCnt << endl; return 0; }
一致性Hash簡單介紹和使用