1. 程式人生 > >利用哈夫曼樹進行檔案壓縮

利用哈夫曼樹進行檔案壓縮

專案描述:

專案簡介:利用哈夫曼編碼的方式對檔案進行壓縮,並且對壓縮檔案可以解壓

開發環境:windows vs2013

專案概述:

        1.壓縮

            a.讀取檔案,將每個字元,該字元出現的次數和權值構成哈夫曼樹

            b.哈夫曼樹是利用小堆構成,字元出現次數少的節點指標存在堆頂,出現次數多的在堆底

            c.每次取堆頂的兩個數,再將兩個數相加進堆,直到堆被取完,這時哈夫曼樹也建成

            d.從哈夫曼樹中獲取哈夫曼編碼,然後再根據整個字元陣列來獲取出現了得字元的編碼

            e.獲取編碼後每次湊滿8位就將編碼串寫入到壓縮檔案(value處理編碼1與它即可,0只移動位)

             f.寫好配置檔案,統計每個字元及其出現次數,並以“字元+','+次數”的形式儲存到配置檔案中

         2.解壓

             a.讀取配置檔案,統計所有字元的個數

             b.構建哈夫曼樹,讀解壓縮檔案,將所讀到的編碼字元的這個節點所所含的字元寫入到解壓縮檔案中,知道將壓縮檔案讀完

             c.壓縮解壓縮完全完成,進行小檔案大檔案的測試

程式碼如下:

#pragma once 
#include"HuffManTree.h"
#include<string>

struct CharInfo
{
	CharInfo(int count=0)
	:_count(count)
	{
	}

	bool operator<(const CharInfo info)
	{
		return _count < info._count;
	}

	bool operator>(const CharInfo info)
	{
		return _count>info._count;
	}

	bool operator!=(const CharInfo info)
	{
		return _count != info._count;
	}

	CharInfo operator+(const CharInfo Info)
	{
		return CharInfo(_count + Info._count);
	}

	char _ch;//字元
	int _count;//字元出現的次數
	string _code;//字元對應的編碼
};

class FileCompress
{
public:
	FileCompress()
	{
		for (int i = 0; i < 256; i++)
		{
			_info[i]._ch = i;
			_info[i]._count = 0;
		}
	}

public:
	void  Compress(const char* FileName)//壓縮
	{ 
		FILE* fout = fopen(FileName, "rb");
		assert(fout);
		
		//統計字元出現的次數
		int ch = fgetc(fout);
		printf("%c\n", ch);
		int count = 0;
		while (ch!= EOF)
		{
			_info[unsigned char(ch)]._count++;
			ch = fgetc(fout);
			count++;
		}

		//構建哈夫曼樹
		CharInfo invalid;
		HuffManTree<CharInfo> h(_info, 256, invalid);

		//生成哈夫曼編碼
		string code;
		_GetHuffManCode(h._GetRoot(), code);

		string CompressFileName = FileName;
		CompressFileName += ".compress";
		FILE* fin = fopen(CompressFileName.c_str(), "wb");
		assert(fin);
		fseek(fout, 0, SEEK_SET);//從檔案開頭

		ch =(unsigned char)fgetc(fout);

		char value = 0;
		int size = 0;
		while (ch != EOF)
		{
			string _ccode = _info[(unsigned char)ch]._code;
			for (int i = 0; i < _ccode.size(); ++i)
			{
				value <<= 1;
				if (_ccode[i] =='1')
				{
					value |=1;
				}
				size++;
				if (size == 8)
				{
					fputc(value, fin);
					value = 0;
					size = 0;
				}
				
			}
			ch = fgetc(fout);
		}
		//補位
		if (size!=0)
		{
			value <<= ( 8- size);
			fputc(value, fin);
		}

		//寫配置檔案
		string configFileName = FileName;
		configFileName += ".config.txt";
		FILE* finConfig = fopen(configFileName.c_str(), "wb");
		assert(finConfig);
	
		string str;
		char buf[128];
		for (int i = 0; i < 256; i++)
		{
			if (_info[i]._count>0)
			{
				str += _info[i]._ch;
				str += ',';
				_itoa(_info[i]._count, buf, 10);
				str += buf;
				str += '\n';

				fputs(str.c_str(), finConfig);
				str.clear();
			}
		}
		
	

		fclose(fin);
		fclose(fout);
		fclose(finConfig);
	}

	void unCompress(const char* FileName)//解壓縮
	{
		//讀配置檔案
		string configFileNane = FileName;
		configFileNane += ".config.txt";
		FILE* foutConfig = fopen(configFileNane.c_str(), "rb");
		assert(foutConfig);
		int count = 0;
		string str;
		while (Read_a_Line(foutConfig,str))
		{
			if (str.empty())
			{
				str += '\n';
				count += 1;
				str.clear();
			}
			//else
			//{
			//	//_info[(unsigned char)str[0]] = atoi(str.substr(2).c_str());
			//	count += _info[(unsigned char)str[0]]._count;
			//	str.clear();
			//}	
		//	_info[((unsigned char)str[0])]._count = atoi(str.substr(2).c_str());
			//count += _info[(unsigned char)str[0]]._count;

			else
			{
				unsigned char ch = str[0];
				_info[ch]._count = atoi(str.substr(2).c_str());
				count += _info[ch]._count;
				str.clear();
			}
		
		}

		CharInfo invaild;
		HuffManTree<CharInfo> tree(_info, 256,invaild);

		string unCompressFileName = FileName;
		unCompressFileName += ".unCompress";//解壓縮檔案
		string CompressFileName = FileName;
		CompressFileName += ".compress";

		FILE* fout = fopen(CompressFileName.c_str(), "rb");
		assert(fout);
		FILE* fin = fopen(unCompressFileName.c_str(), "wb");
		assert(fin);

		HuffManTreeNode<CharInfo>* root = tree._GetRoot();
		HuffManTreeNode<CharInfo>* cur = root;
		int ch = fgetc(fout);
		int size =7;

		while (ch != EOF)
		{
			if (ch & (1 << size))
			{
				cur = cur->_right;
			}
			else
			{
				cur = cur->_left;
			}
			if (cur->_left==NULL&&cur->_right==NULL)
			{
				fputc(cur->_weight._ch, fin);
				cur = root;
				//count--;
				//if (count == 0)
				//	break;
			}
			size--;
			if (size<0)
			{
				ch=fgetc(fout);
				size = 7;
			}
		
		}

		fclose(fin);
		fclose(fout);
		fclose(foutConfig);
	}

protected:
	bool Read_a_Line(FILE*& fout,string& str)
	{
		int ch = fgetc(fout);
		if (ch == EOF)
			return false;
		
		while (ch != EOF&&ch!='\n')
		{
			str += ch;
			ch = fgetc(fout);
		}
		return true;
	}

	void _GetHuffManCode(const HuffManTreeNode<CharInfo>* root,string code)//生成哈夫曼編碼
	{
		if (root == NULL)
		{
			return;
		}
		if (root->_left == NULL&&root->_right == NULL)
		{
			_info[unsigned char((root->_weight)._ch)]._code = code;
			return;
		}
		if (root->_left)
			_GetHuffManCode(root->_left, code + '0');//左路為0
		if (root->_right)
			_GetHuffManCode(root->_right, code + '1');//右路為1
		
	}

private:
	CharInfo _info[256];
};


#pragma once 
#include<iostream>
#include"Heap.h"
using namespace std;


template<class T>
struct HuffManTreeNode
{
	HuffManTreeNode(const T& weight)
	:_left(NULL)
	,_right(NULL)
	,_weight(weight)
	{
	}
	HuffManTreeNode<T>* _left;
	HuffManTreeNode<T>* _right;
	T _weight;
};


template<class T>
class HuffManTree
{
public:
	typedef HuffManTreeNode<T> Node;
public:

	HuffManTree(T* arr, int size, T& invalid)//建立一個小堆
	{
		struct CompareNode
		{
			bool operator()(Node*& L,Node*& R)
			{
				return L->_weight < R->_weight;
			}
		};
		Heap<Node*, CompareNode> MinHeap;

		for (int i = 0; i < size; i++)
		{
			if (arr[i]!=invalid)
				MinHeap.Push(new Node(arr[i]));
		}
						
		while (MinHeap.Size()>1)
		{
			Node* left = MinHeap.Top();
			MinHeap.Pop();
			Node* right = MinHeap.Top();
			MinHeap.Pop();
			Node* parent = new Node(left->_weight + right->_weight);
			parent->_left = left;
			parent->_right = right;
			MinHeap.Push(parent);
		}

		_root = MinHeap.Top();
		MinHeap.Pop();
	}

	Node* _GetRoot()
	{
		return _root;
	}

private:
	Node* _root;
};			


#pragma once 
#include<iostream>

#include<vector>
using namespace std;
#include<assert.h>


template<class T>
struct Small
{
public:
	bool operator()(const T& l, const T& r)
	{
		return l < r;
	}
};
//
//template<class T>//可用來建大堆
//struct Big
//{
//	bool operator()(const T& l, const T& r)
//	{
//		return l > r;
//	}
//};

template<class T,class CompareNode=Small<T>>//建立小堆
class Heap
{
public:
	
	Heap()
	{
	}

	Heap(const T* arr,int size)
	{
		for (int i = 0; i < size; i++)
		{
			_v.push_back(arr[i]);
		}

		for (int i = _v.size() / 2-1; i>=0; i--)
		{
			_AdjustDown(i);
		}
	}

	~Heap()
	{}

	void Push(const T& d)
	{
		_v.push_back(d);
		_AdjustUp(_v.size()-1);
	}

	int Size()
	{
		return _v.size();
	}

	T& Top()
	{
		return *(_v.begin());
	}

	void Pop()//用交換法
	{
		swap(_v[0], _v[_v.size()-1]);
		_v.pop_back();
		_AdjustDown(0);
	}
protected:
	void _AdjustDown(int parent)//向下調整
	{
		CompareNode compareNode;
		int child = 2 * parent + 1;

		while (child < _v.size())
		{
			if (child + 1 < _v.size() && compareNode(_v[child + 1], _v[child]))//找較小的child
			{
				child++;
			}
			if (compareNode(_v[child], _v[parent]))
			{
				swap(_v[parent], _v[child]);
				parent = child;
				child = 2 * parent + 1;
			}
			else
				break;
		}
	}

	void _AdjustUp(int child)//向上調整
	{
		CompareNode compareNode;
		int parent = (child-1)/2;
		while (child>0)
		{
			/*if (child + 1 < _v.size() && compareNode(_v[child + 1], _v[child]))
			{
				child++;
			}
*/
			if (compareNode(_v[child], _v[parent]))
			{
				swap(_v[parent], _v[child]);
				child = parent;
				parent = (child - 1) / 2;
			}
			else
				break;
		}
	}
private:
	vector<T> _v;
};


#include"FileCompress.h"

void test()
{
	FileCompress f;
	f.Compress("input.txt");

	f.unCompress("input.txt");
}


int main()
{
	test();
	system("pause");
	return 0;
}