1. 程式人生 > >雜湊表的靜態,動態,以及key/value形式

雜湊表的靜態,動態,以及key/value形式

  雜湊是一種演算法,將指定的資料按一定規律對映到一段空間內,又可以按照這種規律對它的值進行相應的操作,這一段空間可以稱作雜湊表,它的的查詢速度要快於線性的資料結構,同時也快於表格佇列等,所以它具有獨特的優勢,一般將雜湊演算法用於快速查詢和加密演算法。

   對於最簡單的雜湊表,裡面設定一個key,它決定將這個值存於雜湊表的什麼位置,同時把每個設定一個狀態,如果有插入資料就將其設定為EXITS,其他操作同理,現在可以實現最簡單的雜湊表。

namespace First

{

enum State

{

EMPTY,

DELETE,

EXITS

};

template <typename T>

class HashTable

{

public:

HashTable(size_t capacity = 10)//構造

:_capacity(capacity)

, _tables(new T[_capacity])

, _states(new State[_capacity])

, _size(0)

{

for (int i = 0; i < _capacity; i++)//最初始得狀態置成空的

{

_states[i] = EMPTY;

}

}

~HashTable()//析構

{

delete[] _tables;

delete[] _states;

}

HashTable(const HashTable<T>& h)//拷貝構造

:_capacity(h._capacity)

, _tables(new T[h._capacity])

, _states(new State[h._capacity])

, _size(h._size)

{

for (int i = 0; i < h._capacity; i++)

{

_tables[i] = h._tables[i];

_states[i] = h._states[i];

}

}

HashTable& operator=(HashTable<T> h)//賦值運算子過載

{

if (this != &h)

{

swap(_tables, h._tables);

swap(_states, h._states);

swap(_capacity, h._capacity);

swap(_size, h._size);

}

return *this;

}

bool Insert(const T& key)//插入

{

if (_size == _capacity)

{

cout << "HashTable full" << endl;

return false;

}

int index = HashFunc(key);

int start = index;

while (_states[index] == EXITS)//往後線形探測

{

if (_tables[index] == key)//有相等的

{

return false;

}

index++;

if (index == _capacity)//最後一個

{

index = 0;

}

if (index == start)//找了一圈沒找到

{

return false;

}

}

_tables[index] = key;

_states[index] = EXITS;

_size++;

}

bool Find(const T& key)//查詢

{

int index = HashFunc(key);

int start = index;

while (_states[index] != EMPTY)

{

if (_tables[index] == key)

{

if (_states[index] != DELETE)

{

cout << "find succees" << endl;

return true;

}

else

{

cout << "find fail" << endl;

return false;

}

}

index++;

if (index == _capacity)

{

index = 0;

}

if (start == index)

{

cout << "find fail" << endl;

return false;

}

}

cout << "find fail" << endl;

return false;

}

bool Remove(const T& key)///刪除

{

int index = HashFunc(key);

int start = index;

while (_states[index] != EMPTY)

{

if (_tables[index] == key)

{

if (_states[index] != DELETE)

{

cout << "delete key" << endl;

_states[index] = DELETE;

return true;

}

else

{

cout << "delete fail" << endl;

return false;

}

}

index++;

if (index == _capacity)

{

index = 0;

}

if (start == index)

{

return false;

}

}

cout << "delete fail" << endl;

return true;

}

void Print()//列印雜湊表

{

for (int i = 0; i < _capacity; i++)

{

cout << '[' << _tables[i] << ',' << _states[i] << ']' << ' ';

}

cout << endl;

}

protected:

int HashFunc(const T& key)

{

return key%_capacity;

}

private:

size_t _capacity;

T* _tables;

State* _states;

size_t _size;

};

}

/**************************************/

從上面的程式碼可以看出,這個雜湊表並不適用於實際,因為首先它是一個靜態的,如果存入的key值過多就會造成越界訪問,同時用的是線性探測方法,這樣降低了cpu的訪問命中率,現在可以實現一種動態的而且隨意設定負載因子的功能。

namespace Second//因為有負載因子的限制,可以提高cpu訪問命中率

{

enum State

{

EMPTY,

DELETE,

EXITS

};

template <typename T>

class HashTable

{

public:

HashTable(size_t capacity = 30)//構造

:_capacity(capacity)

, _tables(new T[_capacity])

, _states(new State[_capacity])

, _size(0)

{

for (int i = 0; i < _capacity; i++)//最初始得狀態置成空的

{

_states[i] = EMPTY;

}

}

~HashTable()//析構

{

delete[] _tables;

delete[] _states;

}

HashTable(const HashTable<T>& h)//拷貝構造

:_capacity(h._capacity)

, _tables(new T[h._capacity])

, _states(new State[h._capacity])

, _size(h._size)

{

for (int i = 0; i<h._capacity; i++)

{

_tables[i] = h._tables[i];

_states[i] = h._states[i];

}

}

HashTable& operator=(HashTable<T> h)//賦值運算子過載

{

if (this != &h)

{

swap(_tables, h._tables);

swap(_states, h._states);

swap(_capacity, h._capacity);

swap(_size, h._size);

}

return *this;

}

//bool Insert(const T& key)//插入(線性探測)

//{

//_CheckCapacity();

//int index = _HashFunc(key);

//int start = index;

//while (_states[index]==EXITS)

//{

//if (_tables[index] == key)

//{

//return false;

//}

//index++;

//if (index == _capacity)

//{

//index = 0;

//}

//if (index == start)

//{

//return false;

//}

//

//}

//_tables[index] = key;

//_states[index] = EXITS;

//_size++;

//}

bool Insert(const T& key)//插入(二次探測,即某個數的二次方,這樣資料存著更稀疏)

{

_CheckCapacity();

int index = _HashFunc(key);

int start = index;

int i = 0;

while (_states[index]==EXITS)

{

if (_tables[index] == key)

{

return false;

}

index = _HashFuncT(index, ++i);

if (start = index)

{

return false;

}

if (index == _capacity)

{

index = 0;

}

}

_tables[index] = key;

_states[index] = EXITS;

_size++;

}

bool Find(const T& key)//查詢

{

int index = _HashFunc(key);

int start = index;

int i = 0;

while (_states[index]!=EMPTY)

{

if (_tables[index] == key)

{

if (_states[index] != DELETE)

{

cout << "find success" << endl;

return true;

}

else

{

cout << "find fail" << endl;

return false;

}

}

index = _HashFuncT(index, ++i);

if (start = index)

{

cout << "find fail" << endl;

return false;

}

if (index == _capacity)

{

index = 0;

}

}

cout << "find fail" << endl;

return false;

}

bool Remove(const T& key)///刪除

{

int index = _HashFunc(key);

int start = index;

int i = 0;

while (_states[index] == EXITS)

{

if (_tables[index] == key)

{

_states[index] = DELETE;

_size--;

return true;

}

index = _HashFuncT(index, ++i);

if (start == index)

{

return false;

}

if (index == _capacity)

{

index = 0;

}

}

return false;

}

void Print()//列印雜湊表

{

for (int i = 0; i < _capacity; i++)

{

cout << '[' << _tables[i] << ',' << _states[i] << ']' << ' ';

}

cout << endl;

}

protected:

int _HashFuncT(int index,int i)

{

return (index + i*i) % _capacity;

}

int _HashFunc(const T& key)

{

return key%_capacity;

}

void _CheckCapacity()//檢查容量

{

if ((10 * _size)/ _capacity == 6)//負載因子設為0.6

{

HashTable<T> tmp(2 * _capacity);

for (int i = 0; i < _capacity; i++)

{

if (_states[i]==EXITS)

{

tmp.Insert(_tables[i]);

}

}

_swap(tmp);

}

}

void _swap(HashTable<T> h)

{

swap(_tables, h._tables);

swap(_states, h._states);

swap(_capacity, h._capacity);

swap(_size, h._size);

}

private:

size_t _capacity;

T* _tables;

State* _states;

size_t _size;

};

}

/****************************************/

上面的程式碼對於key形式的相對第一種已經比較健全了。現在可以利用雜湊演算法可以實現一種key/value形式的功能,可以支援字典功能,key是一個資訊,同時value是key的一個附帶資訊,比如說key為學號,那麼班級就是附帶的資訊value,例如還有簡單的英漢字典形式,現進行簡單的實現。

namespace Third//支援字典形式的

{

enum State

{

EMPTY,

DELETE,

EXITS

};

template<class T,class V>

struct HashTableNode

{

HashTableNode()

{}

HashTableNode(const T& key, const V& value)

:_key(key)

, _value(value)

{}

T _key;

V _value;

};

template <class T>

struct __HashFunc

{

size_t operator()(const T& key)

{

return key;

}

};

//實現key,value形式,並且是二次探測的

template <class T ,class V,class HashFunc=__HashFunc<T>>

class Dictionary

{

public:

Dictionary(size_t capacity=10)

:_capacity(capacity)

, _tables(new HashTableNode<T,V> [_capacity])

, _states(new State[_capacity])

,_size(0)

{

for (int i = 0; i < _capacity; i++)

{

_states[i] = EMPTY;//將最開始的狀態置為空

}

}

~Dictionary()

{

delete[] _tables;

delete[] _states;

}

bool Insert(const T& key,const V& value)

{

_CheckCapacity();

int index = _HashFunonce(key);

int start = index;

int i = 0;

while (_states[index] == EXITS)

{

if (_tables[index]._key == key)

{

return false;

}

index = _HashFuntwice(index, ++i);

if (index == _capacity)

{

index = 0;

}

if (index == start)

{

return false;

}

}

_tables[index] = HashTableNode<T, V>(key, value);

_states[index] = EXITS;

_size++;

return true;

}

HashTableNode<T,V>* Find(const T& key)

{

int index = _HashFunonce(key);

int start = index;

int i = 0;

while (_states[index]==EXITS)

{

if (_tables[index]._key == key)

{

cout << "find success" << endl;

return _tables+index;

}

index = _HashFuntwice(index, ++i);

if (start == index)

{

cout << "find fail" << endl;

return NULL;

}

}

cout << "find fail" << endl;

return NULL;

}

bool Remove(const T& key)

{

int index = _HashFunonce(key);

int start = index;

int i = 0;

while (_states[index]!=EMPTY)

{

if (_tables[index]._key == key)

{

if (_states[index]!=DELETE)

{

_states[index] = DELETE;

_size--;

return true;

}

else

{

return false;

}

}

index = _HashFuntwice(index, ++i);

if (index == start)

{

return false;

}

}

return false;

}

void Print()

{

for (int i = 0; i < _capacity; i++)

{

cout << "[" << _tables[i]._key << "," << _tables[i]._value <<","<< _states[i]<<"]" << " ";

}

cout << endl;

}

protected:

void _CheckCapacity()//將負載因子設為0.6

{

if (_size * 10 / _capacity == 6)

{

Dictionary<T, V, HashFunc> tmp(2 * _capacity);

for (int i = 0; i < _capacity; i++)

{

if (_states[i] == EXITS)

{

tmp.Insert(_tables[i]._key,_tables[i]._value);

}

}

_Swap(tmp);

}

}

void _Swap(Dictionary<T, V, HashFunc> tmp)

{

swap(_tables, tmp._tables);

swap(_states, tmp._states);

swap(_capacity, tmp._capacity);

swap(_size, tmp._size);

}

size_t _HashFunonce(const T& key)

{

return key %_capacity;

}

size_t _HashFuntwice(int index,int i)//獲取二次探測的下標

{

return (index + i*i) % _capacity;

}

private:

size_t _capacity;

HashTableNode<T,V>* _tables;

State* _states;

size_t _size;

};

}

void test3()//二次探測,負載因子,實現字典的功能

{

/*Third::Dictionary<int, string> h1;

h1.Insert(10, "c語言基礎");

h1.Insert(59, "c++基礎");

h1.Insert(9, "資料結構");

h1.Insert(19, "Linux");

h1.Insert(18, "網路程式設計");*/

Third::Dictionary<int,int>h1;

h1.Insert(10, 1);

h1.Insert(59, 2);

h1.Insert(9, 3);

h1.Insert(19,4);

h1.Insert(18, 5);

//h1.Print();

cout<<h1.Find(9)->_value<<endl;

//h1.Remove(9);

//h1.Remove(19);

//h1.Remove(10);

//h1.Print();

}

上述就是對雜湊演算法的簡單應用。