1. 程式人生 > >詞頻統計(未完成,錯誤)

詞頻統計(未完成,錯誤)

#include<iostream>
#include<cstdio>
#include<cmath>
#include<cstdlib>
#include<cstring>
#include<algorithm>
#include<vector>
using namespace std;
#define KEYLENGTH 15
#define MAXWORDLEN 80
#define MAXTABLESIZE 100000
typedef char ET[KEYLENGTH+1];
typedef 
int Index; typedef struct LNode* PtrToLNode; struct LNode{ ET Data; PtrToLNode Next; int Count; }; typedef PtrToLNode Position; typedef PtrToLNode List; typedef struct TblNode *HashTable; struct TblNode{ int TableSize; List Heads; }; int flag,cnt; vector<LNode> v; bool cmp(LNode a,LNode b){
if(a.Count > b.Count ) return true; if(a.Count ==b.Count ){ if(strcmp(a.Data,b.Data)<0) return true; } return false; } int NextPrime( int N ){ int i, p = (N%2) ? N+2 :N+1; while(p<=MAXTABLESIZE){ for(i=(int)sqrt(p); i>2; i--)
if(! (p%i)) break; if(i == 2) break; else p += 2; } return p; } HashTable CreateTable( int TableSize){ HashTable H; int i; H = (HashTable)malloc(sizeof(struct TblNode)); H->TableSize = NextPrime(TableSize); H->Heads = (List)malloc(H->TableSize *sizeof(struct LNode)); for( i=0; i<H->TableSize ; i++){ H->Heads [i].Data[0] = '\0'; H->Heads [i].Next = NULL; } return H; } int Hash(const char* Key, int TableSize){ unsigned int H=0; while(* Key !='\0') H = (H<<5) + *Key++; return H % TableSize; } Position Find(HashTable H, ET Key){ Position P; Index Pos; Pos = Hash(Key, H->TableSize ); P = H->Heads [Pos].Next; while(P && strcmp(P->Data , Key)) P = P->Next ; return P; } void InsertAndCount(HashTable H, ET Key){ if(Key[0]=='\0') return; Position P, NewCell; Index Pos; P=Find(H,Key); if(!P){ NewCell = (Position)malloc(sizeof(LNode)); strcpy(NewCell->Data ,Key); NewCell->Count =1; Pos=Hash(Key,H->TableSize); NewCell->Next =H->Heads[Pos].Next; H->Heads [Pos].Next=NewCell; H->Heads [Pos].Count++; } else P->Count ++; } bool IsWordChar(char c){ if(c>='a'&&c<='z'||c>='0'&&c<='9'||c=='_') return true; else return false; } void GetAWord(ET word){ char tempword[MAXWORDLEN+1], c; int len=0; scanf("%c",&c); if(c=='#'){ flag=0; return; } while(c!='#'){ if(c>='A'&&c<='Z') c+=32; if(IsWordChar(c)) tempword[len++]=c; scanf("%c",&c); if(len&&!IsWordChar(c)) break; } if(c=='#') flag=0; tempword[len] = '\0'; if(len>KEYLENGTH) tempword[KEYLENGTH] = '\0'; strcpy(word, tempword); //cout<<"0 "<<word<<endl; //cout<<c<<flag<<endl; } void Show(HashTable H, double percent){ int diffwordcount=0; int maxf = 0; int * diffwords; int maxCollision = 0; int minCollision = 100; Position L; int i, j ,k, lowerbound, count = 0; for(i = 0; i<H->TableSize ;i++){ diffwordcount += H->Heads [i].Count; if(maxCollision < H->Heads [i].Count) maxCollision = H->Heads [i].Count; if(minCollision > H->Heads [i].Count) minCollision = H->Heads [i].Count; L = H->Heads [i].Next; while(L){ if(maxf < L->Count ) maxf = L->Count ; L = L->Next ; } } printf("%d\n",diffwordcount); cnt=diffwordcount; diffwords = (int * )malloc((maxf+1)*sizeof(int)); for(i = 0; i <= maxf; i++) diffwords[i]=0; for(i = 0; i < H->TableSize ; i++){ L = H->Heads [i].Next; while(L){ diffwords[ L->Count ]++; L = L->Next ; } } lowerbound = (int)( diffwordcount * percent); for(i = maxf; i >= 1 && count<lowerbound; i--) count += diffwords[i]; for(j = maxf; j >= i; j--){ for(k = 0; k<H->TableSize ; k++){ L = H->Heads [k].Next; while(L){ if(j==L->Count ){ //printf("%d:%-15s\n",L->Count ,L->Data ); struct LNode temp; temp.Count =L->Count ; strcpy(temp.Data,L->Data ); v.push_back(temp); } L = L->Next ; } } } free(diffwords); } void DestoryTable(HashTable H){ int i; Position P, Tmp; for(i=0; i<H->TableSize ;i++){ P = H->Heads [i].Next; while(P){ Tmp = P->Next ; free(P); P=Tmp; } } free(H->Heads ); free(H); } int main(){ HashTable H; ET word; int TableSize = 100; int length, wordcount = 0; H = CreateTable(TableSize); flag=1; do{ GetAWord(word); wordcount++; InsertAndCount(H, word); }while(flag); Show(H, 10.0/100); sort(v.begin(),v.end(),cmp); for(int i=0;i<cnt/10;i++) printf("%d:%15s\n",v[i].Count,v[i].Data); DestoryTable(H); return 0; }
View Code