1. 程式人生 > >編譯原理實驗 —— 詞法分析器

編譯原理實驗 —— 詞法分析器

// Lexical_Analysis.cpp : 定義控制檯應用程式的入口點。
//
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "iostream"
using namespace std;
//詞法分析程式
//首先定義種別碼
/*
第一類:識別符號   letter(letter | digit)*  無窮集
第二類:常數    (digit)+  無窮集
第三類:保留字(32)
auto       break    case     char        const      continue
default    do       double   else        enum       extern
float      for      goto     if          int        long
register   return   short    signed      sizeof     static
struct     switch   typedef  union       unsigned   void
volatile    while

第四類:界符  ‘/*’、‘//’、 () { } [ ] " "  '
第五類:運算子 <、<=、>、>=、=、+、-、*、/、^、

對所有可數符號進行編碼:
<$,0>
<auto,1>
...
<while,32>
<+,33>
<-,34>
<*,35>
</,36>
<<,37>
<<=,38>
<>,39>
<>=,40>
<=,41>
<==,42>
<!=,43>
<;,44>
<(,45>
<),46>
<^,47>
<,,48>
<",49>
<',50>
<#,51>
<&,52>
<&&,53>
<|,54>
<||,55>
<%,56>
<~,57>
<<<,58>左移
<>>,59>右移
<[,60>
<],61>
<{,62>
<},63>
<\,64>
<.,65>
<?,66>
<:,67>
<!,68>
"[","]","{","}"
<常數99  ,數值>
<識別符號100 ,識別符號指標>


*/

/****************************************************************************************/
//全域性變數,保留字表
static char reserveWord[32][20] = {
    "auto", "break", "case", "char", "const", "continue",
    "default", "do", "double", "else", "enum", "extern",
    "float", "for", "goto", "if", "int", "long",
    "register", "return", "short", "signed", "sizeof", "static",
    "struct", "switch", "typedef", "union", "unsigned", "void",
    "volatile", "while"
};
//界符運算子表,根據需要可以自行增加
static char operatorOrDelimiter[36][10] = {
    "+", "-", "*", "/", "<", "<=", ">", ">=", "=", "==",
    "!=", ";", "(", ")", "^", ",", "\"", "\'", "#", "&",
    "&&", "|", "||", "%", "~", "<<", ">>", "[", "]", "{",
    "}", "\\", ".", "\?", ":", "!"
};

static  char IDentifierTbl[1000][50] = { "" };//識別符號表
/****************************************************************************************/

/********查詢保留字*****************/
int searchReserve(char reserveWord[][20], char s[])
{
    for (int i = 0; i < 32; i++)
    {
        if (strcmp(reserveWord[i], s) == 0)
        {//若成功查詢,則返回種別碼
            return i + 1;//返回種別碼
        }
    }
    return -1;//否則返回-1,代表查詢不成功,即為識別符號
}
/********查詢保留字*****************/

/*********************判斷是否為字母********************/
bool IsLetter(char letter)
{//注意C語言允許下劃線也為識別符號的一部分可以放在首部或其他地方
    if (letter >= 'a'&&letter <= 'z' || letter >= 'A'&&letter <= 'Z'|| letter=='_')
    {
        return true;
    }
    else
    {
        return false;
    }
}
/*********************判斷是否為字母********************/


/*****************判斷是否為數字************************/
bool IsDigit(char digit)
{
    if (digit >= '0'&&digit <= '9')
    {
        return true;
    }
    else
    {
        return false;
    }
}
/*****************判斷是否為數字************************/


/********************編譯預處理,取出無用的字元和註釋**********************/
void filterResource(char r[], int pProject)
{
    char tempString[10000];
    int count = 0;
    for (int i = 0; i <= pProject; i++)
    {
        if (r[i] == '/'&&r[i + 1] == '/')
        {//若為單行註釋“//”,則去除註釋後面的東西,直至遇到回車換行
            while (r[i] != '\n')
            {
                i++;//向後掃描
            }
        }
        if (r[i] == '/'&&r[i + 1] == '*')
        {//若為多行註釋“/* 。。。*/”則去除該內容
            i += 2;
            while (r[i] != '*' || r[i + 1] != '/')
            {
                i++;//繼續掃描
                if (r[i] == '$')
                {
                    printf("註釋出錯,沒有找到 */,程式結束!!!\n");
                    exit(0);
                }
            }
            i += 2;//跨過“*/”
        }
        if (r[i] != '\n'&&r[i] != '\t'&&r[i] != '\v'&&r[i] != '\r')
        {//若出現無用字元,則過濾;否則載入
            tempString[count++] = r[i];
        }
    }
    tempString[count] = '\0';
    strcpy(r, tempString);//產生淨化之後的源程式
}
/********************編譯預處理,取出無用的字元和註釋**********************/


/****************************分析子程式,演算法核心***********************/
void Scanner(int &syn, char resourceProject[], char token[], int &pProject)
{//根據DFA的狀態轉換圖設計
    int i, count = 0;//count用來做token[]的指示器,收集有用字元
    char ch;//作為判斷使用
    ch = resourceProject[pProject];
    while (ch == ' ')
    {//過濾空格,防止程式因識別不了空格而結束
        pProject++;
        ch = resourceProject[pProject];
    }
    for (i = 0; i<20; i++)
    {//每次收集前先清零
        token[i] = '\0';
    }
    if (IsLetter(resourceProject[pProject]))
    {//開頭為字母
        token[count++] = resourceProject[pProject];//收集
        pProject++;//下移
        while (IsLetter(resourceProject[pProject]) || IsDigit(resourceProject[pProject]))
        {//後跟字母或數字
            token[count++] = resourceProject[pProject];//收集
            pProject++;//下移
        }//多讀了一個字元既是下次將要開始的指標位置
        token[count] = '\0';
        syn = searchReserve(reserveWord, token);//查表找到種別碼
        if (syn == -1)
        {//若不是保留字則是識別符號
            syn = 100;//識別符號種別碼
        }
        return;
    }
    else if (IsDigit(resourceProject[pProject]))
    {//首字元為數字
        while (IsDigit(resourceProject[pProject]))
        {//後跟數字
            token[count++] = resourceProject[pProject];//收集
            pProject++;
        }//多讀了一個字元既是下次將要開始的指標位置
        token[count] = '\0';
        syn = 99;//常數種別碼
    }
    else if (ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == ';' || ch == '(' || ch == ')' || ch == '^'
        || ch == ',' || ch == '\"' || ch == '\'' || ch == '~' || ch == '#' || ch == '%' || ch == '['
        || ch == ']' || ch == '{' || ch == '}' || ch == '\\' || ch == '.' || ch == '\?' || ch == ':')
    {//若為運算子或者界符,查表得到結果
        token[0] = resourceProject[pProject];
        token[1] = '\0';//形成單字串
        for (i = 0; i<36; i++)
        {//查運算子界符表
            if (strcmp(token, operatorOrDelimiter[i]) == 0)
            {
                syn = 33 + i;//獲得種別碼,使用了一點技巧,使之呈線性對映
                break;//查到即推出
            }
        }
        pProject++;//指標下移,為下一掃描做準備
        return;
    }
    else  if (resourceProject[pProject] == '<')
    {//<,<=,<<
        pProject++;//後移,超前搜尋
        if (resourceProject[pProject] == '=')
        {
            syn = 38;
        }
        else if (resourceProject[pProject] == '<')
        {//左移
            pProject--;
            syn = 58;
        }
        else
        {
            pProject--;
            syn = 37;
        }
        pProject++;//指標下移
        return;
    }
    else  if (resourceProject[pProject] == '>')
    {//>,>=,>>
        pProject++;
        if (resourceProject[pProject] == '=')
        {
            syn = 40;
        }
        else if (resourceProject[pProject] == '>')
        {
            syn = 59;
        }
        else
        {
            pProject--;
            syn = 39;
        }
        pProject++;
        return;
    }
    else  if (resourceProject[pProject] == '=')
    {//=.==
        pProject++;
        if (resourceProject[pProject] == '=')
        {
            syn = 42;
        }
        else
        {
            pProject--;
            syn = 41;
        }
        pProject++;
        return;
    }
    else  if (resourceProject[pProject] == '!')
    {//!,!=
        pProject++;
        if (resourceProject[pProject] == '=')
        {
            syn = 43;
        }
        else
        {
            syn = 68;
            pProject--;
        }
        pProject++;
        return;
    }
    else  if (resourceProject[pProject] == '&')
    {//&,&&
        pProject++;
        if (resourceProject[pProject] == '&')
        {
            syn = 53;
        }
        else
        {
            pProject--;
            syn = 52;
        }
        pProject++;
        return;
    }
    else  if (resourceProject[pProject] == '|')
    {//|,||
        pProject++;
        if (resourceProject[pProject] == '|')
        {
            syn = 55;
        }
        else
        {
            pProject--;
            syn = 54;
        }
        pProject++;
        return;
    }
    else  if (resourceProject[pProject] == '$')
    {//結束符
        syn = 0;//種別碼為0
    }
    else
    {//不能被以上詞法分析識別,則出錯。
        printf("error:there is no exist %c \n", ch);
        exit(0);
    }
}


int main()
{
    //開啟一個檔案,讀取其中的源程式
    char resourceProject[10000];
    char token[20] = { 0 };
    int syn = -1, i;//初始化
    int pProject = 0;//源程式指標
    FILE *fp, *fp1;
    if ((fp = fopen("G:\\demo_src.txt", "r")) == NULL)
    {//開啟源程式
        cout << "can't open this file";
        exit(0);
    }
    resourceProject[pProject] = fgetc(fp);
    while (resourceProject[pProject] != '$')
    {//將源程式讀入resourceProject[]陣列
        pProject++;
        resourceProject[pProject] = fgetc(fp);
    }
    resourceProject[++pProject] = '\0';
    fclose(fp);
    cout << endl << "源程式為:" << endl;
    cout << resourceProject << endl;
    //對源程式進行過濾
    filterResource(resourceProject, pProject);
    cout << endl << "過濾之後的程式:" << endl;
    cout << resourceProject << endl;
    pProject = 0;//從頭開始讀

    if ((fp1 = fopen("G:\\demo_compile.txt", "w+")) == NULL)
    {//開啟源程式
        cout << "can't open this file";
        exit(0);
    }
    while (syn != 0)
    {
        //啟動掃描
        Scanner(syn, resourceProject, token, pProject);
        if (syn == 100)
        {//識別符號
            for (i = 0; i<1000; i++)
            {//插入識別符號表中
                if (strcmp(IDentifierTbl[i], token) == 0)
                {//已在表中
                    break;
                }
                if (strcmp(IDentifierTbl[i], "") == 0)
                {//查詢空間
                    strcpy(IDentifierTbl[i], token);
                    break;
                }
            }
            printf("(識別符號  ,%s)\n", token);
            fprintf(fp1, "(識別符號   ,%s)\n", token);
        }
        else if (syn >= 1 && syn <= 32)
        {//保留字
            printf("(%s   ,  --)\n", reserveWord[syn - 1]);
            fprintf(fp1, "(%s   ,  --)\n", reserveWord[syn - 1]);
        }
        else if (syn == 99)
        {//const 常數
            printf("(常數   ,   %s)\n", token);
            fprintf(fp1, "(常數   ,   %s)\n", token);
        }
        else if (syn >= 33 && syn <= 68)
        {
            printf("(%s   ,   --)\n", operatorOrDelimiter[syn - 33]);
            fprintf(fp1, "(%s   ,   --)\n", operatorOrDelimiter[syn - 33]);
        }
    }
    for (i = 0; i<100; i++)
    {//插入識別符號表中
        printf("第%d個識別符號:  %s\n", i + 1, IDentifierTbl[i]);
        fprintf(fp1, "第%d個識別符號:  %s\n", i + 1, IDentifierTbl[i]);
    }
    fclose(fp1);
    return 0;
}