1. 程式人生 > >哈夫曼樹編解碼

哈夫曼樹編解碼

問題 B: DS_6.14 給定報文,哈弗曼編碼、譯碼(by Yan)
時間限制: 20 Sec 記憶體限制: 256 MB
提交: 303 解決: 218
[提交][狀態][討論版]
題目描述
已知某段通訊報文內容,對該報文進行哈弗曼編碼,並計算平均碼長。
(1)統計報文中各字元出現的頻度。(字符集範圍為52個英文字母,空格,英文句號。報文長度<=200)
(2)構造一棵哈弗曼樹,依次給出各字元編碼結果。
(3)給字串進行編碼。
(4)給編碼串進行譯碼。
(5)計算平均碼長。
規定:
(1)結點統計:以ASCII碼的順序依次排列,例如:空格,英文句號,大寫字母,小寫字母。
(2)構建哈弗曼樹時:左子樹根結點權值小於等於右子樹根結點權值。
(3)選擇的根節點權值相同時,前者構建為雙親的左孩子,後者為右孩子。
(4)生成編碼時:左分支標0,右分支標1。
輸入
第一部分:報文內容,以’#'結束。
第二部分:待譯碼的編碼串。
輸出
依次輸出報文編碼串、譯碼串、平均碼長,三者之間均以換行符間隔。
平均碼長,保留小數點2位。
樣例輸入

Data structure is the way of computer storage and organization data. A data structure is a collection of data elements that exist between one or more specific relationships.#
000111111110101111110100101010000110111100010011011000001110011110011100101011010011100001101111011001011010101011001101110010101011101011110110101000110001101001010101010001111000101001110111111101000001101010100000010111111110101110110011111101001111010111011100000011110101111000100110000111011000000111101011111101001010100001101111000100110110000011100111100111011111101110010100000100001001111000100111101011101110101010110011000000111101011111100010000100110111000111101010100111001010110111110101100010110001011110010101100110000001010000110001001111011101010111010011101010100011010111010101000001110100110111100111100011110110001111110011010000010000111110100111101011101100110110101111011111001000100

樣例輸出

000111111110101111110100101010000110111100010011011000001110011110011100101011010011100001101111011001011010101011001101110010101011101011110110101000110001101001010101010001111000101001110111111101000001101010100000010111111110101110110011111101001111010111011100000011110101111000100110000111011000000111101011111101001010100001101111000100110110000011100111100111011111101110010100000100001001111000100111101011101110101010110011000000111101011111100010000100110111000111101010100111001010110111110101100010110001011110010101100110000001010000110001001111011101010111010011101010100011010111010101000001110100110111100111100011110110001111110011010000010000111110100111101011101100110110101111011111001000100
Data structure is the way of computer storage and organization data. A data structure is a collection of data elements that exist between one or more specific relationships.
4.11

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define LettersNum 6
#define TextCharTypeNum 54
typedef struct{//三叉連結串列的結點結構體定義
	int weight;
	int Parent;
	int Lchild;
	int Rchild;
}HFTreeNode;
void SelectTwoMinimumWeightNodes(HFTreeNode Tree[],int Range,int *Node1,int *Node2)//在哈夫曼樹陣列從第一個元素到第Range元素找出兩個不重複的權值最小的樹結點
{
    int cmp = 998,i;
    for(i=1;i<=Range;i++)
    {
        if(Tree[i].weight<cmp&&Tree[i].Parent==0)
        {
            cmp = Tree[i].weight;
            *Node1 = i;
        }
    }
    cmp = 998;
    for(i=1;i<=Range;i++)
    {
        if((Tree[i].weight<cmp)&&Tree[i].Parent==0&&i!=*Node1)
        {
            cmp = Tree[i].weight;
            *Node2 = i;
        }
    }
    return;
}

void CreateHuffmanTree(HFTreeNode *Tree,int weights[],int LeafsNum)//由於大小固定哈夫曼樹的儲存採用靜態三叉連結串列自底而頂構建
{
    int NodesNum = 2*LeafsNum-1;//整棵樹結點數
    int i;
    int Node1,Node2;

    for(i=1;i<=LeafsNum;i++){Tree[i].weight = weights[i-1];Tree[i].Parent=Tree[i].Lchild=Tree[i].Rchild=0;}//初始前n個元素作為根基
    for(i=LeafsNum+1;i<=NodesNum;i++){Tree[i].weight = Tree[i].Parent = Tree[i].Lchild = Tree[i].Rchild = 0;}
    for(i=LeafsNum+1;i<=NodesNum;i++)
    {
        SelectTwoMinimumWeightNodes(Tree,i-1,&Node1,&Node2);
        Tree[i].weight = Tree[Node1].weight+Tree[Node2].weight;//子母結點的連線
        Tree[i].Lchild = Node1;
        Tree[i].Rchild = Node2;
        Tree[Node1].Parent = i;
        Tree[Node2].Parent = i;
    }
    return;
}
void PrintTreeContent(HFTreeNode *Tree,int LeafsNum)
{
    int i;
    for(i = 1;i<=2*LeafsNum-1;i++)
    {
        printf("%d %d %d %d\n",Tree[i].weight,Tree[i].Parent,Tree[i].Lchild,Tree[i].Rchild);

    }
    return;
}
void SetBinaryCode(HFTreeNode *Tree,int WhichLetter,char **Aim)
{
    int Codelength = 0,i=WhichLetter,j;
    while(Tree[i].Parent!=0)
    {
        i = Tree[i].Parent;
        Codelength++;
    }
    *Aim = (char*)malloc((Codelength+1)*sizeof(char));

    i = WhichLetter;
    for(j=Codelength-1;j>=0;j--)
    {
        if(Tree[Tree[i].Parent].Lchild==i)
        {
            (*Aim)[j] = '0';
            i = Tree[i].Parent;

        }
        else if(Tree[Tree[i].Parent].Rchild==i)
        {
            (*Aim)[j] = '1';
            i = Tree[i].Parent;
        }

    }
    (*Aim)[Codelength]='\0';
    return;
}
void PrintLetterCorrespondingCodes(char *Codes[],int CodesSize,char ValidLetters[])
{
    int i;
    for(i=0;i<CodesSize;i++)
    {
        printf("%c:%s\n",ValidLetters[i],Codes[i]);
    }
    return;
}
void Encode(char *Aim,char *Codes[],int CodesSize,char ValidLetters[])//字母轉為01串
{
    int i,j;
    for(i=0;i<strlen(Aim);i++)
    {
        for(j=0;j<CodesSize;j++)
        {
            if(Aim[i]==ValidLetters[j]){printf("%s",Codes[j]);break;}
        }
    }
    return;
}
void Decode(char *Aim,char *Codes[],int CodesSize,char ValidLetters[])//01串轉為字母
{//0010000100111101測試資料
    int i,j,k,maxCodeLength=0,maxcmp,check,flag,AimLen;
    for(i=0;i<CodesSize;i++)if(strlen(Codes[i])>maxCodeLength)maxCodeLength = strlen(Codes[i]);
    //printf("最長碼長度%d\n",maxCodeLength);
    AimLen = strlen(Aim);
    //printf("二進位制串長度%d\n",AimLen);
    i=0;
    while(i<AimLen)
    {

        flag = 0;
        maxcmp = 0;
        for(k=0;k<CodesSize;k++)
        {
            j = 0;
            for(check=i;check<i+maxCodeLength&&check<AimLen;check++)
            {
                if(Aim[check]!=Codes[k][check-i])break;
                else j++;
            }
            if(j>maxcmp)
            {
                maxcmp = j;
                flag = k;

            }
        }
        printf("%c",ValidLetters[flag]);
        i+=strlen(Codes[flag]);
        //printf("此段譯碼長度%d\n",strlen(Codes[flag]));
    }
    return;
}
void CountTextCharNums(int *TextCharWeights,char Text[])
{
    int i;
    for(i=0;i<TextCharTypeNum;i++)
    {
        TextCharWeights[i]=0;
    }
    for(i=0;i<strlen(Text);i++)
    {
        if(Text[i]==' ')TextCharWeights[TextCharTypeNum-2]++;
        else if(Text[i]=='.')TextCharWeights[TextCharTypeNum-1]++;
        else if(Text[i]>='A'&&Text[i]<='Z')TextCharWeights[Text[i]-'A']++;
        else if(Text[i]>='a'&&Text[i]<='z')TextCharWeights[Text[i]-'a'+(TextCharTypeNum-2)/2]++;

    }
    return;
}
float ComputeAverageBinStrLength(char *Codes[],int CodesSize)
{
    unsigned i;
    float result,Sum=0;
    for(i=0;i<CodesSize;i++)
    {
        Sum += strlen(Codes[i]);
    }
    result = Sum/(float)CodesSize;
    return result;
}
int GetValidWeightsNum(int weights[])
{
    int i,Num=0;
    for(i=0;i<TextCharTypeNum;i++)
    {
        if(weights[i]>0)Num++;
    }
    return Num;
}
void CopytoNewWeightsArr(int *NewWeights,int OldWeights[],char *ValidLetters,int ValidWeights,char TextChar[])
{
    int i,k=0;
    for(i=0;i<ValidWeights;i++)
    {
        NewWeights[i] = 0;
    }
    for(i=0;i<TextCharTypeNum;i++)
    {
        if(OldWeights[i]>0&&k<ValidWeights)
        {
            NewWeights[k] = OldWeights[i];
            ValidLetters[k] =  TextChar[i];
            k++;
        }

    }
    return;
}
int main()
{
    int i;
	char TextChar[TextCharTypeNum];
	for(i=0;i<TextCharTypeNum;i++)
    {
        if(i<(TextCharTypeNum-2)/2)TextChar[i]='A'+i;
        else if(i<TextCharTypeNum-2)TextChar[i]='a'+i-(TextCharTypeNum-2)/2;
        else if(i==TextCharTypeNum-2)TextChar[i]=' ';
        else if(i==TextCharTypeNum-1)TextChar[i]='.';

        //printf("%c",TextChar[i]);
    }
	char Text[200],buf;
	for(i=0;i<200;i++)Text[i]='\0';
	i=0;
	while(i<200)//給Text賦值
    {
        buf = getchar();
        if(buf=='\n')continue;
        else if(buf=='#')break;
        else Text[i++]=buf;
    }

	int TextCharWeights[TextCharTypeNum];//未縮減前的weights陣列
	int ValidWeights;//weight不為零的字元數量


	CountTextCharNums(TextCharWeights,Text);//統計Text裡所有字元數量
	ValidWeights = GetValidWeightsNum(TextCharWeights);
	int Weights[ValidWeights];//僅有非零weight的weights陣列
    char ValidLetters[ValidWeights];
	char *Codes[ValidWeights];
    CopytoNewWeightsArr(Weights,TextCharWeights,ValidLetters,ValidWeights,TextChar);

    HFTreeNode HFTree[ValidWeights*2];
    CreateHuffmanTree(HFTree,Weights,ValidWeights);
    //PrintTreeContent(HFTree,TextCharTypeNum);
    for(i=0;i<ValidWeights;i++)SetBinaryCode(HFTree,i+1,&Codes[i]);
    PrintLetterCorrespondingCodes(Codes,ValidWeights,ValidLetters);
    Encode(Text,Codes,ValidWeights,ValidLetters);
    printf("\n");
    char BinStr[3000];
    i=0;
    getchar();
    while(i<3000)
    {
        buf = getchar();
        if(buf=='1'||buf=='0')BinStr[i++]=buf;
        else {BinStr[i++]='\0';break;}
    }
    //printf("\n%s\n",BinStr);
    Decode(BinStr,Codes,ValidWeights,ValidLetters);
    printf("\n%.2f\n",ComputeAverageBinStrLength(Codes,ValidWeights));
	return 0;
}