c++ 資料結構 *** 哈夫曼樹的應用——壓縮軟體
阿新 • • 發佈:2019-01-01
資料結構的作業,壓縮軟體用的,具體寫的過程中有哪些問題在程式裡說吧。
標頭檔案與常量部分:
利用char的8位,來儲存檔案裡的元素。每次取出檔案中的8位並記錄這八位出現的次數用來進行哈夫曼數的建立。
三個類的宣告部分: Buffer類用來從檔案中讀取8位字元或者寫入8位字元。因為每次都只能從檔案中讀取或者寫入一個字元,所以利用Buffer類進行緩衝。 treeNode類是哈夫曼樹的結點類,其中儲存了樹中每個結點出現的頻率與其左右子結點與父節點。 HuffmanTree類是哈夫曼樹類,可以進行壓縮或解壓。#include<iostream> #include<stack> #include<string> #include<list> #include<iterator> using namespace std; //利用char進行8位一取,這樣不管是字元型別還是漢字或者其他格式都可以進行壓縮 const unsigned int N = 256;//char共有8位,那麼一共最多有256個可能的字元,葉節點數目 const unsigned int M = 2 * N;//對於擁有n個葉結點的huffman樹,一共有2*n-1個結點,陣列最後一位不存數,供select函式選擇使用 const unsigned long long MAX = 0xffffffffffff;
class Buffer {//緩衝類,對檔案進行讀取/寫入操作的時候通過Buffer類進行整個字元的讀取/寫入 public: unsigned int bits;//實際位元組長度 char ch;//位元組 }; class HuffmanTree;//huffman樹類宣告 class treeNode {//huffman樹結點類 friend HuffmanTree; private: unsigned long long weight;//該結點一共出現了多少次 unsigned int right, left; unsigned int parents; }; class HuffmanTree {//huffman樹類 private: treeNode nodes[M];//儲存樹的每個結點資訊,最多M個 unsigned int leaf[N];//儲存葉節點字元資訊,最多N個 unsigned int index[N];//儲存葉節點的下標值 char* leafCode[N];//儲存葉節點的編碼資訊 FILE* input, *output; unsigned int num;//實際上葉結點個數 unsigned long long size;//一共有多少個字元 Buffer buf;//輸入輸出緩衝 //輔助函式 void write(unsigned int i);//向檔案中寫入一個bite void write(unsigned int num, unsigned int bits);//向檔案中寫入bits位的num數 void writerest();//如果最後buf裡面還有字元沒有寫入,用這個函式一起寫入 void read(unsigned int &i);//從檔案中讀取一位bite,用i輸出 void read(unsigned int &num, unsigned int bits);//從檔案中讀取bits位的數,並用num輸出 //Encode輔助函式 void enSieve();//讀取檔案,並且統計檔案中字元數 void select(unsigned int pos,unsigned int &t1,unsigned int &t2);//在0——pos間選擇兩個權重最小的,用t1、t2輸出 void enSetTree();//對需要被壓縮的檔案中的字元建立huffman樹,並且完善每個結點的編碼資訊 //Decode輔助函式 void deSieve();//讀取需要被解壓檔案,並建造其哈夫曼樹 public: void Encode();//對檔案編碼 void Decode();//對檔案譯碼 };
HuffmanTree類的實現:
讀取/寫入輔助函式:
主要是利用Buffer類中的bits來記錄ch中實際有效的位數。在write的時候,如果ch中實際位數已經足夠8位,那麼將ch寫入檔案,然後再將bits置8,ch置0;在read的時候,如果ch的實際位數已經等於0,那麼從檔案中fget()一個8位數給ch,同時bits置8。同時,有可能在寫入到最後的時候,bits實際上並不等於0,那麼用writerest將剩下的ch全部寫入。
//輔助函式塊 void HuffmanTree::write(unsigned int i) {//向檔案中寫入一個bite buf.ch = (buf.ch << 1) + i;//對ch加上一位 buf.bits++;//ch的八位bite的實際使用量加一 if (buf.bits == 8) {//如果全部用完,那麼將buf.ch輸入進output中,並將buf.bits,buf.ch置0重新開始新一輪計數 fputc(buf.ch, output); buf.bits = 0; buf.ch = 0; } } void HuffmanTree::write(unsigned int target, unsigned int bits) {//向檔案中寫入bits位的num數 /*傻逼做法 for (unsigned int i = 0; i < bits; ++i) {//一位一位的放進去 write((target & 128)>>7 );//從高位依次放到低位 target <<= 1; } */ stack<unsigned int> s; unsigned int i, bit; for (i = 1; i <= bits; i++) { s.push(target & 1); target = (target >> 1); } for (i = 1; i <= bits; i++) { bit = s.top(); write(bit); s.pop(); } } void HuffmanTree::writerest() {//如果最後buf裡面還有字元沒有寫入,用這個函式一起寫入 unsigned int now = buf.bits; if (now>0) for (unsigned int i = 0; i<8 - now; i++)write(0); } void HuffmanTree::read(unsigned int &i) {//從檔案中讀取一位bite,用i輸出 if (buf.bits == 0) { buf.bits = 8; buf.ch = fgetc(input); } i = (buf.ch & 128) >> 7; buf.bits--; buf.ch <<= 1; } void HuffmanTree::read(unsigned int &target, unsigned int bits) {//從檔案中讀取bits位的數,並用num輸出 unsigned int tmp; target = 0; for (unsigned int i = 0; i < bits; ++i) { read(tmp); target = (target << 1) + tmp; } }
EnCode()函式及其輔助函式的實現部分:
昨晚在這部分出現了一個錯誤點,在enSetTree函式的實現部分,如下程式碼:
for (int i = 0; i < N; ++i)
leafCode[i] = NULL;
被我寫成了:
for (int i = 0; i <= N; ++i)
leafCode[i] = NULL;
然後就導致了FILE* input莫名其妙被置成了NULL,之後找到錯誤之後感慨了一下不要亂置NULL。。下標一定要看清。。。//Encode輔助函式塊
void HuffmanTree::enSieve() {//讀取檔案,並且統計檔案中字元數
char inName[1000], outName[1000];
cout << "Input file name that you want to code:";
cin >> inName;
cout << "Input target file name:";
cin >> outName;
if ((input = fopen(inName, "rb")) == NULL) {
cout << "Can not open file." << endl;
system("pause");
exit(1);
}
if (feof(input)) {
cout << "Empty source file" << endl;
system("pause");
exit(1);
}
if ((output = fopen(outName, "wb")) == NULL) {
cout << "Can not open file." << endl;;
system("pause");
exit(1);
}
//從檔案中讀取字元,並統計字元出現頻率
rewind(input);
unsigned int ch;
size = 0;
for (unsigned int i = 0; i < N; ++i) {
leaf[i] = 0;
index[i] = 0;
}
for (unsigned int i = 0; i < M; ++i) {
nodes[i].weight = 0;
nodes[i].left = nodes[i].right = nodes[i].parents = M-1;
}
ch = fgetc(input);
while (!feof(input)) {
leaf[ch]++;
size++;
ch = fgetc(input);
}
//nodes[N-1].weight置為最大
nodes[M-1].weight = MAX;
//篩掉出現頻率為0的字元,並寫入nodes,index陣列,並修改num值
num = 0;
for (unsigned int i = 0; i < N; ++i)
if (leaf[i]) {
nodes[num].weight = leaf[i];
leaf[i] = num;
index[num] = i;
num++;
}
if (!num) {
cout << "doesn't have a word" << endl;
system("pause");
exit(1);
}
}
void HuffmanTree::select(unsigned int pos, unsigned int &t1,unsigned int &t2) {//在0——pos間選擇兩個權重最小的,用t1、t2輸出
t1 = M-1, t2 = M-1;
for (unsigned int i = 0; i < pos; ++i) {
if (nodes[i].weight < nodes[t1].weight&&nodes[i].parents==M-1)
t1 = i;
}
for (unsigned int i = 0; i < pos; ++i) {
if (nodes[i].weight < nodes[t2].weight&&i != t1&&nodes[i].parents == M-1)
t2 = i;
}
}
void HuffmanTree::enSetTree() {//對需要被壓縮的檔案中的字元建立huffman樹,並且完善每個結點的編碼資訊
//建立huffman樹
for (unsigned int i = num; i < num * 2 - 1; ++i) {
unsigned int t1, t2;
select(i, t1, t2);
nodes[i].weight = nodes[t1].weight + nodes[t2].weight;
nodes[i].left = t1;
nodes[i].right = t2;
nodes[t1].parents = nodes[t2].parents = i;
}
for (int i = 0; i < N; ++i)
leafCode[i] = NULL;
//對每個結點進行編碼
unsigned int start, c, f, i;
char *cd = new char[num]; //編碼臨時變數
for (i = 0; i < N; i++)
if (leafCode[i] != NULL) {
delete[]leafCode[i]; //釋放儲存空間
leafCode[i] = NULL;
}
cd[num - 1] = '\0'; //編碼結束符
for (i = 0; i < num; i++) { //逐位求Huffman編碼
start = num - 1; //編碼結束符位置
for (c = i, f = nodes[i].parents; f != M - 1; c = f, f = nodes[c].parents) { //從葉到根求編碼
if (nodes[f].left == c)cd[--start] = '0';
else cd[--start] = '1';
}
leafCode[i] = new char[num - start]; //為第i個字元編碼分配空間
strcpy(leafCode[i], &cd[start]); //從cd複製編碼到HuffmanCode
}
delete cd;
}
void HuffmanTree::Encode() {//對檔案編碼
enSieve();//初始化input,output;統計檔案中字元
enSetTree();//根據enSieve完成huffman樹的建立與對字元進行編碼
rewind(output);
rewind(input);
//向output的開頭中寫入樹結構
buf.bits = 0;
buf.ch = 0;
fwrite(&size,sizeof(unsigned long long),1,output);//寫入size
write(num, 8);//將樹結構中的葉結點個數寫入
for (unsigned int i = 0; i < num; ++i)//將樹節點中的葉節點寫入
fwrite(&index[i], sizeof(char), 1, output);
//選擇num最大需要多少位來儲存
unsigned maxbit = 1;
unsigned int tmp = num * 2 - 1;
while (tmp) {
maxbit++;
tmp >>= 1;
}
for (unsigned int i = num; i < num * 2 - 1; ++i) {//寫入左右孩子資訊
write(nodes[i].left, maxbit);
write(nodes[i].right, maxbit);
}
//寫入編碼資訊
unsigned int ch;
ch = fgetc(input);
while (!feof(input)) {
unsigned int start = 0;//判斷對ch的編碼leafCode[loc]的起始位置
while (leafCode[leaf[ch]][start] != '\0') {
if (leafCode[leaf[ch]][start] == '1')write(1);
else write(0);
++start;
}
ch = fgetc(input);
}
writerest();//寫入剩下的字元
cout << "Done!\n\n";
fclose(input);
fclose(output);
}
DeCode()函式及其輔助函式的實現部分:
//Decode輔助函式塊
void HuffmanTree::deSieve() {//讀取需要被解壓檔案,並建造其哈夫曼樹
char inName[1000], outName[1000];
cout << "Input file name that you want to decode:";
cin >> inName;
cout << "Input target file name:";
cin >> outName;
if ((input = fopen(inName, "rb")) == NULL) {
cout << "Can not open file." << endl;
system("pause");
exit(1);
}
if (feof(input)) {
cout << "Empty source file" << endl;
system("pause");
exit(1);
}
if ((output = fopen(outName, "wb")) == NULL) {
cout << "Can not open file." << endl;;
system("pause");
exit(1);
}
//開始讀取樹結構
rewind(input);
for (unsigned int i = 0; i < M; ++i) {
nodes[i].parents = nodes[i].right = nodes[i].left = N-1;
}
buf.bits = 0; //清空緩衝區
buf.ch = 0;
fread(&size,sizeof(unsigned long long),1, input);//讀取size
read(num, 8);//讀取樹結構中的葉結點個數
if (num == 0)num = 256;
for (unsigned int i = 0; i < num; ++i)//讀取樹節點中的葉節點
fread(&index[i], sizeof(char), 1, input);
//選擇num最大需要多少位來儲存
unsigned maxbit = 1;
unsigned int tmp = num * 2 - 1;
while (tmp) {
maxbit++;
tmp >>= 1;
}
for (unsigned int i = num; i < num * 2 - 1; ++i) {//讀取左右孩子資訊
read(nodes[i].left, maxbit);
read(nodes[i].right, maxbit);
nodes[nodes[i].left].parents = nodes[nodes[i].right].parents = i;
}
}
void HuffmanTree::Decode() {
deSieve();
//開始譯碼
rewind(output);
unsigned int tmp;
read(tmp);
for (int i = 0; i < size; ++i) {
unsigned int loc = 2 * num - 2;
while ((nodes[loc].left != N-1 || nodes[loc].right != N-1) && !feof(input)) {
if (tmp == 0)loc = nodes[loc].left;
else loc = nodes[loc].right;
read(tmp);
}
fputc(index[loc], output);
}
cout << "Done!\n\n";
fclose(input);
fclose(output);
}
寫下來大概感受就是注意二進位制的長短,以及不要寫的頭暈了。。。長度各種亂。。。奇葩。。
測試部分:
#pragma warning(disable:4996)
#include<iostream>
#include<cstdio>
#include<cmath>
#include<stack>
#include<queue>
#include<cstring>
#include<sstream>
#include<set>
#include<string>
#include<iterator>
#include<vector>
#include<map>
#include<algorithm>
#include"HuffmanTree.h"
using namespace std;
int main(void) {
cout << sizeof(char) << endl;
char choose = '1';
while (choose != '3') {
HuffmanTree tree;
cout << "1.Huffman Encode" << endl;
cout << "2.Huffman Decode" << endl;
cout << "3.exit" << endl;
cin >> choose;
switch (choose) {
case'1':tree.Encode(); break;
case'2':tree.Decode(); break;
default:break;
}
}
// system("pause");
return 0;
}
測試效果:
第一波(純文字):
源:
壓縮後:
解壓後:
檔案大小對比:
第二波(圖片):
源:
壓縮後:
解壓後:
大小對比:
其實壓縮效果並不是很好,有待改進。