1. 程式人生 > >一個簡單C語言的詞法分析器

一個簡單C語言的詞法分析器

一個簡單C語言的詞法分析器

語言的詞法構成:

識別符號

id  同C語言識別符號

常量

num 數字  
ch  字元
str 字串

關鍵字

kw_int  int
kw_char char        
kw_void void        
kw_if   if      
kw_else else    
kw_switch   switch      
kw_case case    
kw_default  default     
kw_while    while   
kw_do   do      
kw_for  for 
kw_break
break kw_continue continue kw_return return

運算子

add +       
sub -       
mul *       
div /
mod %
inc ++
dec --
not !
and &&
or  ||
assign  =
gt  >
ge  >=
lt  <
le  <=
equ ==
nequ    !=

分界符

comma   ,
colon   :
simcon  ;
lparen  (
rparen  )
lbrac
{ rbrac }

程式碼:

#include<stdio.h>
#include<string.h>

char input[200];//存放輸入字串
char token[10];//存放構成單詞符號的字串
char toke;
char ch;     //存放當前讀入字元
int fg;     //switch標記
int num;    //存放整形值
int p=0;//input[]下標

//二維字元陣列,存放關鍵字
char character[13][10]={"int","char","void","if","else","switch","case","default"
,"while","do","for","break","continue","return"}; //二維字元陣列,存放含義定義 char index[42][15]={"id","num","str","kw_int","kw_char","kw_void","kw_if","kw_else","kw_switch","kw_case","kw_default","kw_while","kw_do","kw_for","kw_break","kw_continue","kw_return","add","sub","mul","div","mod","inc","dec","not","and","or","assign","gt","ge","lt","le","equ","nequ","comma","colon","simcon","lparen","rparen","lbrac","rbrac","ch"}; main() { printf("請輸入程式碼(結尾以'#'作為結束):\n"); do { ch=getchar(); input[p++]=ch; }while(ch!='#'); p=0; do { scaner(); switch(fg) { case 0:printf("** %s-->%s **\n",token,index[fg]);break;//識別符號 case 1:printf("** %d-->%s **\n",num,index[fg]);break;//數字 case 2:printf("** %s-->%s **\n",token,index[fg]);break;//字串 case 3:printf("** %s-->%s **\n",token,index[fg]);break;//關鍵字 case 4:printf("** %s-->%s **\n",token,index[fg]);break; case 5:printf("** %s-->%s **\n",token,index[fg]);break; case 6:printf("** %s-->%s **\n",token,index[fg]);break; case 7:printf("** %s-->%s **\n",token,index[fg]);break; case 8:printf("** %s-->%s **\n",token,index[fg]);break; case 9:printf("** %s-->%s **\n",token,index[fg]);break; case 10:printf("** %s-->%s **\n",token,index[fg]);break; case 11:printf("** %s-->%s **\n",token,index[fg]);break; case 12:printf("** %s-->%s **\n",token,index[fg]);break; case 13:printf("** %s-->%s **\n",token,index[fg]);break; case 14:printf("** %s-->%s **\n",token,index[fg]);break; case 15:printf("** %s-->%s **\n",token,index[fg]);break; case 16:printf("** %s-->%s **\n",token,index[fg]);break; case 17:printf("** %s-->%s **\n",token,index[fg]);break;//運算子 case 18:printf("** %s-->%s **\n",token,index[fg]);break; case 19:printf("** %s-->%s **\n",token,index[fg]);break; case 20:printf("** %s-->%s **\n",token,index[fg]);break; case 21:printf("** %s-->%s **\n",token,index[fg]);break; case 22:printf("** %s-->%s **\n",token,index[fg]);break; case 23:printf("** %s-->%s **\n",token,index[fg]);break; case 24:printf("** %s-->%s **\n",token,index[fg]);break; case 25:printf("** %s-->%s **\n",token,index[fg]);break; case 26:printf("** %s-->%s **\n",token,index[fg]);break; case 27:printf("** %s-->%s **\n",token,index[fg]);break; case 28:printf("** %s-->%s **\n",token,index[fg]);break; case 29:printf("** %s-->%s **\n",token,index[fg]);break; case 30:printf("** %s-->%s **\n",token,index[fg]);break; case 31:printf("** %s-->%s **\n",token,index[fg]);break; case 32:printf("** %s-->%s **\n",token,index[fg]);break; case 33:printf("** %s-->%s **\n",token,index[fg]);break; case 34:printf("** %s-->%s **\n",token,index[fg]);break; case 35:printf("** %s-->%s **\n",token,index[fg]);break; case 36:printf("** %s-->%s **\n",token,index[fg]);break; case 37:printf("** %s-->%s **\n",token,index[fg]);break; case 39:printf("** %s-->%s **\n",token,index[fg]);break; case 40:printf("** %s-->%s **\n",token,index[fg]);break; case 41:printf("** %c-->%s **\n",toke,index[fg]);break;//字元 case -1:printf("分析器不存在該詞法\n"); break; } }while(fg!=42); getch(); //用於讓程式停留在顯示頁面 } scaner() {//詞法掃描程式 int m=0;//token[]下標 int n; toke=NULL; //清空token[] for(n=0;n<5;n++) token[n]=NULL; //獲取第一個不為0字元 ch=input[p++]; while(ch==' ')ch=input[p++]; while(ch=='\n')ch=input[p++]; //數字處理 if(ch<='9'&&ch>='0') { num=0; while(ch<='9'&&ch>='0') { num=num*10+ch-'0'; ch=input[p++]; } ch=input[--p]; fg=1; } //關鍵字處理/識別符號處理 else if((ch<='z'&&ch>='a')||(ch<='Z'&&ch>='A')||(ch=='_')) { int flag=0; while((ch<='z'&&ch>='a')||(ch<='Z'&&ch>='A')||(ch<='9'&&ch>='0')||(ch=='_')) { token[m++]=ch; ch=input[p++]; } token[m++]='\0'; ch=input[--p]; for(n=0;n<14;n++)//關鍵字 { if(strcmp(token,character[n])==0)//strcmp()比較兩個字串,相等返回0 { flag=1; fg=n+3; break; } } if(flag==0) { fg=0; } } //運算子分界符處理 else { switch(ch) { case '+': m=0; token[m++]=ch; ch=input[p++]; if(ch=='+') //產生++ { fg=22; token[m++]=ch; } else //產生+ { fg=17; ch=input[--p]; } break; case '-': m=0; token[m++]=ch; ch=input[p++]; if(ch=='-') //產生-- { fg=23; token[m++]=ch; } else //產生- { fg=18; ch=input[--p]; } break; case '>': m=0; token[m++]=ch; ch=input[p++]; if(ch=='=') //產生>= { fg=29; token[m++]=ch; } else //產生> { fg=28; ch=input[--p]; } break; case '<': m=0; token[m++]=ch; ch=input[p++]; if(ch=='=') //產生<= { fg=31; token[m++]=ch; } else //產生< { fg=30; ch=input[--p]; } break; case '=': m=0; token[m++]=ch; ch=input[p++]; if(ch=='=') //產生== { fg=32; token[m++]=ch; } else //產生= { fg=27; ch=input[--p]; } break; case '!': m=0; token[m++]=ch; ch=input[p++]; if(ch=='=') //產生!= { fg=33; token[m++]=ch; } else //產生! { fg=24; ch=input[--p]; } break; case '&': m=0; token[m++]=ch; ch=input[p++]; if(ch=='&') //產生&& { fg=25; token[m++]=ch; } else //無產生 { fg=41; ch=input[--p]; } break; case '|': m=0; token[m++]=ch; ch=input[p++]; if(ch=='|') //產生|| { fg=26; token[m++]=ch; } else //無產生 { fg=41; ch=input[--p]; } break; case '*':fg=19;token[0]=ch;break; case '/':fg=20;token[0]=ch;break; case '%':fg=21;token[0]=ch;break; case ',':fg=34;token[0]=ch;break; case ':':fg=35;token[0]=ch;break; case ';':fg=36;token[0]=ch;break; case '(':fg=37;token[0]=ch;break; case ')':fg=38;token[0]=ch;break; case '{':fg=39;token[0]=ch;break; case '}':fg=40;token[0]=ch;break; case '"': ch=input[p++]; m=0; token[m++]=ch; do { ch=input[p++]; token[m++]=ch; }while(ch!='"'); fg=2; break; case '\'': ch=input[p++]; m=0; toke=ch; do { ch=input[p++]; token[m++]=ch; }while(ch!='\''); if(token[0]=='\'') { fg=41; } else { fg=-1; } break; case '#':fg=42;break; default:fg=-1; } } }

完成!