1. 程式人生 > >Huffman編碼壓縮解壓縮檔案

Huffman編碼壓縮解壓縮檔案

採用了哈弗曼編碼和優先佇列(最小堆)實現

標頭檔案

#ifndef HUFFMAN_H_INCLUDED
#define HUFFMAN_H_INCLUDED
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#define INIT_SIZE 1000
#define FALSE 0
#define TRUE 1
#define WEIGHT_SIZE 256
#define CODE_SIZE 40

typedef int BOOL;

typedef struct huff_node
{
    unsigned char value;
    char *code;
    struct huff_node *left_child;
    struct huff_node *right_child;
    struct huff_node *parent;
    int weight;

} huff_node,*phuff_node;
typedef struct
{
    phuff_node root;
    phuff_node *code_table;
   // int leaves_size;
}huff_tree,*phuff_tree;

typedef huff_node heap_node,*pheap_node;
typedef struct heap
{
    pheap_node *value;
    int capacity;
    int length;
} heap,*pheap;

void error(char *msg);
BOOL is_emp(pheap heap);
BOOL is_full(pheap heap);
pheap init_heap();
void insert_heap(pheap heap,pheap_node heap_node);
pheap_node delete_heap(pheap heap);
#endif // HUFFMAN_H_INCLUDED
堆定義
#include "huffman.h"

void error(char *msg)
{
    perror(msg);
    exit("-1");
}
BOOL is_emp(pheap heap)
{
    return heap->length == 0 ? TRUE : FALSE;
}
BOOL is_full(pheap heap)
{
    return  heap->length == heap->capacity ? TRUE : FALSE;
}

pheap init_heap()
{
    pheap heap = (pheap)malloc(sizeof(heap));
    if(heap  == NULL)
        error("heap malloc error\n");
    heap->value = (pheap_node*)malloc((INIT_SIZE + 1) * sizeof(pheap_node));
    if(heap->value == NULL)
        error("heap->value malloc error\n");
    heap->capacity = INIT_SIZE;
    heap->length = 0;

    return heap;
}

void insert_heap(pheap heap,pheap_node node)
{
    if(is_full(heap))
        error("heap is full\n");
    if(node->value != NULL)
    {
        node->left_child = NULL;
        node->right_child = NULL;
    }
    if(is_emp(heap))
    {
        heap->value[++heap->length] = node;
        return ;
    }
    int i = ++heap->length;
    // printf("insert heap length is %d\n",heap->length);
    for(; (i > 1) && (heap->value[i / 2]->weight > node->weight); i /= 2)
    {
        heap->value[i] = heap->value[i / 2];
        //printf("log:%d %d\n",i,heap->length);
    }
    // printf("out:%d %d\n",i,heap->length);
    heap->value[i] = node;
}

void print_node(pheap_node node)
{
    printf("value:%c weight:%d\n",node->value,node->weight);
}
void print_heap(pheap heap)
{
    int i = 1;
    for(; i <= heap->length; i++)
        print_node(heap->value[i]);
    printf("\n");
}
pheap_node delete_heap(pheap heap)
{
    if(is_emp(heap))
        error("heap is empty\n");
    int i,child;
    pheap_node result = heap->value[1];
    pheap_node current = heap->value[heap->length];
    //printf("current is %c\n",current->value);
    heap->length--;
    for(i = 1; 2 * i <= heap->length; i = child)
    {
        child = 2 * i;
        if((child != heap->length) && (heap->value[child]->weight > heap->value[child + 1]->weight))
            child ++;
        if(current->weight < heap->value[child]->weight)
            break;
        heap->value[i] = heap->value[child];
    }
    heap->value[i] = current;
    return result;
}

void reset_heap(pheap heap)
{
    int i = heap->length / 2,j,child;
    pheap_node temp_node;
    for(; i > 0; i--)
    {
        child = 2 * i;
        if((child != heap->length) && (heap->value[child]->weight > heap->value[child + 1]->weight))
            child ++;
        if(heap->value[i]->weight > heap->value[child]->weight)
        {
            temp_node = heap->value[i];
            heap->value[i] = heap->value[child];
            heap->value[child] = temp_node;
            //子樹修改了,要判斷是否需要進一步的調整
            for(j = child; 2 * j <= heap->length; j = child)
            {
                child = 2 * j;
                if((child != heap->length) && (heap->value[child]->weight > heap->value[child + 1]->weight))
                    child ++;
                if(temp_node->weight < heap->value[child]->weight)
                    break;
                heap->value[j] = heap->value[child];
            }
            heap->value[j] = temp_node;
        }
    }

}

Huffman樹定義及操作
#include "huffman.h"

void free_huff_node(phuff_node node)
{
    if(node != NULL)
    {
        free_huff_node(node->left_child);
        free_huff_node(node->right_child);
        if(node->code !=NULL)
            free(node->code);
        free(node);
    }
}
void free_huff_tree(phuff_tree huff_tree)
{
    if(huff_tree != NULL)
        free_huff_node(huff_tree->root);
    int i =0;
    for(i; i < WEIGHT_SIZE; i++)
    {
        if(huff_tree->code_table[i] != NULL)
        {
            free(huff_tree->code_table[i]);
        }

    }
    free(huff_tree);
}
phuff_tree build_huff_tree(int *weight)
{
    pheap_node node;
    pheap heap = init_heap();
    pheap_node *code_table = (pheap_node*)malloc(WEIGHT_SIZE * sizeof(pheap_node));
    int i,j;
    for(i = 0; i < WEIGHT_SIZE; i++)
    {
        if(weight[i] != 0)
        {
            node =(pheap_node)malloc(sizeof(heap_node));
            node->weight = weight[i];
            node->value = (unsigned char)i;
            code_table[i] = node;//儲存所有的葉子節點
            insert_heap(heap,node);
        }
        else code_table[i] = NULL;
    }

    phuff_tree huff_tree = (phuff_tree)malloc(sizeof(huff_tree));

    if(huff_tree == NULL) error("huff_tree malloc error\n");

    huff_tree->code_table = code_table;

    pheap_node heap_node_1,heap_node_2,merge_node;
    while(heap->length > 1)
    {
        heap_node_1 = delete_heap(heap);
        heap_node_2 = delete_heap(heap);

        merge_node = (pheap_node)malloc(sizeof(heap_node));
        if(merge_node == NULL)
            error("merge_node malloc error\n");
        merge_node->value = NULL;
        merge_node->weight = heap_node_1->weight + heap_node_2->weight;
        merge_node->left_child = heap_node_1;
        merge_node->right_child = heap_node_2;
        heap_node_1->parent = merge_node;
        heap_node_2->parent = merge_node;
        insert_heap(heap,merge_node);
    }
    merge_node->parent = NULL;
    huff_tree->root = merge_node;
    build_code_table(huff_tree);
    return huff_tree;
}


void build_code_table(phuff_tree huff_tree)
{
    char *code_buff = (char*)malloc(CODE_SIZE * sizeof(char));
    memset(code_buff,'\0',CODE_SIZE);
    build_code(huff_tree->root,code_buff,0);
}

void build_code(phuff_node cur_pos,char* code_buff,int len)
{
   // printf("current code_buff is %s\n",code_buff);
    if((cur_pos->left_child == NULL) && (cur_pos->right_child == NULL))
    {
        cur_pos->code = (char*)malloc((len + 1)* sizeof(char));
        memset(cur_pos->code,'\0',len + 1);
        memcpy(cur_pos->code,code_buff,len * sizeof(char));
    }
    if(cur_pos->left_child != NULL)
    {
        code_buff[len] = '0';
        build_code(cur_pos->left_child,code_buff,len + 1);
    }
    if(cur_pos->right_child != NULL)
    {
        code_buff[len] = '1';
        build_code(cur_pos->right_child,code_buff,len + 1);
    }
}
壓縮
#include "huffman.h"

/*統計各個字元的權重*/
int* get_weight(char *file_name)
{
    FILE *file = fopen(file_name,"r");
    if(file == NULL)
        error("file open error\n");

    int *weight = (int*)malloc(WEIGHT_SIZE * sizeof(int));
    int i;
    for(i = 0; i < WEIGHT_SIZE; i++)
        weight[i] = 0;

    unsigned char temp_char;
    while(!feof(file))
    {
        fread(&temp_char,1,1,file);
        weight[temp_char]++;
    }

    fclose(file);

    return weight;
}

/*壓縮*/
void compress(char* ori_file_name,char* com_file_name)
{
    int *weight = get_weight(ori_file_name);
    phuff_tree huff_tree = build_huff_tree(weight);

    long cur_pos = 0,file_length = 0;
    unsigned char temp_read = 0,temp_write = 0;
    int code_len = 0,i = 0,j = 0,left_size = 8;
    char *code;

    FILE *ori_file = fopen(ori_file_name,"r");
    FILE *com_file = fopen(com_file_name,"wb");

    //寫權重資訊
    for(i; i < WEIGHT_SIZE; i++)
    {
        if(weight[i] != 0)
        {
            fprintf(com_file,"%d",i);
            fputc(' ',com_file);
            fprintf(com_file,"%d",weight[i]);
            fputc(' ',com_file);
            file_length += weight[i];
        }
    }
    fputc('\n',com_file);
    fprintf(com_file,"%ld",file_length - 1);//多讀一個EOF
    fputc('\n',com_file);
    while(!feof(ori_file))
    {
        if(j >= code_len)
        {
            fread(&temp_read,sizeof(char),1,ori_file);
            //printf(" %d ",temp_read);
            code = huff_tree->code_table[temp_read]->code;
            code_len = strlen(code);
            j = 0;
        }
        for(i = 0; (i < left_size) && (j < code_len); i++,j++)
        {
            if(code[j]=='0')
                temp_write = temp_write << 1;
            else temp_write = (temp_write << 1)| 1;
        }
        if(i < left_size)//本位元組未填滿
        {
            left_size -= i;
        }
        else         //填滿
        {
            left_size = 8;//表示當前位元組剩餘空間
            fwrite(&temp_write, sizeof(char),1,com_file);
            //printf(" %d ",temp_write);
            temp_write = 0;
        }
    }
    /*將後面的補為0*/
    temp_write = temp_write << left_size;
    fwrite(&temp_write,sizeof(char),1,com_file);

    fclose(ori_file);
    fclose(com_file);
    free(huff_tree);

}
解壓縮
#include "huffman.h"

int* read_weight(FILE *com_file)
{
    int* weight = (int*)malloc(WEIGHT_SIZE * sizeof(int));
    int i = 0;
    for(; i < WEIGHT_SIZE; i++)
        weight[i] = 0;
    char *temp_read = (char*)malloc(WEIGHT_SIZE * 10 * sizeof(char));
    if(fgets(temp_read,WEIGHT_SIZE * 10,com_file) == NULL)
        error("read com_file for weight erroe\n");
    char *pre_pos = temp_read,*cur_pos = strstr(temp_read," ");
    int num;
    char str[10];
    while(cur_pos !=NULL)
    {
        memset(str,'\0',10);
        strncpy(str,pre_pos,cur_pos - pre_pos + 1);
        num = atoi(str);

        pre_pos = cur_pos;
        cur_pos = strstr(cur_pos + 1," ");
        memset(str,'\0',10);
        strncpy(str,pre_pos,cur_pos - pre_pos + 1);
        weight[num] = atoi(str);
        pre_pos = cur_pos;
        cur_pos = strstr(cur_pos + 1," ");
    }

    return weight;
}

void decompress(char* com_file_name,char* ori_file_name)
{
    FILE *com_file = fopen(com_file_name,"rb+");
    FILE *ori_file = fopen(ori_file_name,"w+");
    int *weight = read_weight(com_file);
    long file_length = 0;
    //fread(&file_length,sizeof(long),1,com_file);
    char *buf = (char*)malloc(sizeof(long) * 8 * sizeof(char));
    fgets(buf,sizeof(long) * 8,com_file);
    file_length = atol(buf);
    free(buf);

    if(file_length == 0) error("file length is 0!\n");
    phuff_tree huff_tree = build_huff_tree(weight);
    unsigned char temp_read = 0;
    pheap_node huff_pos = huff_tree->root;

    int i = 0,j = 0;
    while(!feof(com_file))
    {

        fread(&temp_read,sizeof(char),1,com_file);

        for(i = 0; i < 8; i++)
        {
            if((temp_read & 0x80) >> 1 == 64)

                huff_pos = huff_pos->right_child;
            else
                huff_pos = huff_pos->left_child;

            if(huff_pos->left_child == NULL && huff_pos->right_child == NULL)
            {
                fwrite(&(huff_pos->value),sizeof(unsigned char),1,ori_file);
                huff_pos = huff_tree->root;
                j++;
                if(j >= file_length) break;
            }
            temp_read=temp_read << 1;
        }
    }
    fclose(ori_file);
    fclose(com_file);
    free(huff_tree);
}


測試
int main()
{
    char ori_file_name[20],com_file_name[20];

    printf("請輸入要壓縮前的檔名(長度小於20):");
    scanf("%s",ori_file_name);
    printf("請輸入要壓縮後的檔名(長度小於20):");
    scanf("%s",com_file_name);
    printf("正在壓縮...\n");
    compress(ori_file_name,com_file_name);
    printf("正在解壓,解壓後的檔名為decompress...\n");
    decompress(com_file_name,"decompress");

    return 0;
}