1. 程式人生 > >基於C++的詞法分析器

基於C++的詞法分析器

實驗目的

通過設計編制除錯一個具體的詞法分析程式,加深對詞法分析原理的理解。並掌握在對程式設計語言源程式進行掃描過程中將其分解為各類單詞的詞法分析方法。

編制一個讀單詞過程,從輸入的源程式中,識別出各個具有獨立意義的單詞(token),即基本保留字、識別符號、常量、運算子、分隔符五大類,並依次輸出各個單詞的內部編碼及單詞符號自身值。(遇到錯誤時可顯示“Error”,然後跳過錯誤部分繼續顯示)

 

資源下載地址https://download.csdn.net/download/qq_41406816/10791074

 

  • 輸出結果/輸入內容

輸出內容

二、變數

基本保留字:if、int、for、while、do、return、break、continue;(還可以載入很多,但是我的程式碼裡面只寫了這幾個)

運算子:+、-、*、/、=、>、<、>=、<=、!=

分隔符:、;、{、}、(、)

常量:小數、整數、科學記數法、負號的識別

註釋:單行註釋(//)、多行註釋(/**/)

三、主要識別的功能:

  1. 識別上面的變數
  2. {}和()的匹配
  3. 空白、換行符、製表符的剔除

四、主要函式介紹

digitprocess()//識別常量

alphaprocess()//字元,通過search函式 判斷是保留字還是識別符號

search()//判斷是保留字還是識別符號

otherprocess()//其他的。運算子,註釋,分隔符

Judge()//利用出入棧,辨別{}和()是否配對

(1)註釋:

用token陣列存變數。當token[0]=’/’時,在讀取一個字元,如果token[1]=’*’,通過迴圈取出註釋。對於/**/ 判斷遇到‘/’則停止迴圈。對於// 遇到換行符 則停止迴圈。如果token[1]='/',讀取到換行符,迴圈就停止。

	if (token == "/")
		{
			fin.get(x);
			token += x;
			if (token[1] == '=')// /=
			{
				cout << "(4," << token << ")" << endl;
				fin.get(x);
				id = 4;
				return x;
			}
			if (token[1] == '*' || token[1] == '/')// 註釋 6 第一種 是遇到/就停止,第二種是換行
			{ 
				int i = 1;
				if (token[1] == '*')
				{
					while (token[i] != '/')/* */
					{
						fin.get(x);
						token += x;
						i++;
					}
				
				}
				else
				{
					while (token[i] != '\n')
					{
						fin.get(x);
						token += x;
						i++;
					}
				}
				cout << "(6," << token.substr(0,i) << ")" << endl;
				id = 6;
				fin.get(x);
				return x;
			}
		}

 

(2)識別實數(如3.2),無符號數(2.1E6)

while (isdigit(butter)||butter=='.'||butter=='E')
	{
		if (butter == 'E')
		{
			token += butter;
			fin.get(x);//讀取下一個字元
			butter = x;
			while (isdigit(butter) || butter == '+' || butter == '-')
			{
				token += butter;//連線字串
				fin.get(x);//讀取下一個字元
				butter = x;
			}
			cout << "(3," << token << ")" << endl;
			id = 3;
			return x;
		}
		token += butter;//連線字串
		fin.get(x);//讀取下一個字元
		butter = x;
	}

(3)檢查大小括號是否匹配。使用出入棧。思想大概是:只要是左括號,讓他入棧。如果讀到右括號,則與當前棧頂的括號相匹配,如果是對應的左括號,則左括號出棧,以此類推。如果括號不匹配,則退出。(網上有很多版本的程式碼,讀者可以自己去搜索)下面的程式碼也有我自己的出入棧。

(4)辨別負號

經過推理髮現,只要“-”前面是運算子,那麼這個“-”一定是負號。所以我們可以通過記錄前面的字元來辨別。我是用id來記錄每一次的資料。

四、程式碼(經供參考)

執行環境 vs2017+win10

#include<stdio.h>
#include<string.h>
#include<string>
#include <ctype.h>
#include <malloc.h>
#include <stdlib.h>
#include<iostream>
#include<fstream>
#include<stack>
using namespace std;
const char *keyword[8] = { "break","if","continue","while","do","int","for","return"};
char x, cbuffer;
ifstream fin("1.txt", ios::in);//檔案讀取
int id;//用id來記錄上一個操作是什麼
int big[2] = { 0,0 };
int small[2] = { 0,0 };
stack <char>s;
bool re;
bool Judge(string str) {  //使用棧判斷括號匹配
	for (int i = 0; i < str.length(); i++) {
		switch (str[i]) {
		case '(':
			s.push('(');
			break;
		case '[':
			s.push('[');
			break;
		case '{':
			s.push('{');
			break;

		case ')':
			if (!s.empty())
			{
				if (s.top() == '(')
					s.pop();
				else
					return false;
			}
			else
				return false;
			break;
		case ']':
			if (!s.empty())
			{
				if (s.top() == '[') {
					s.pop();
				}
				else {
					return false;
				}
			}
			else
				return false;
			break;
		case '}':
			if (!s.empty())
			{
				if (s.top() == '{') {
					s.pop();
				}
				else {
					return false;
				}
			}
			else
				return false;
			break;
		}
	}
	if (s.empty()) {
		return true;
	}
	else {
		return false;
	}
}
char digitprocess(char butter)
{
	string token="" ;
	while (isdigit(butter)||butter=='.'||butter=='E')
	{
		if (butter == 'E')
		{
			token += butter;
			fin.get(x);//讀取下一個字元
			butter = x;
			while (isdigit(butter) || butter == '+' || butter == '-')
			{
				token += butter;//連線字串
				fin.get(x);//讀取下一個字元
				butter = x;
			}
			cout << "(3," << token << ")" << endl;
			id = 3;
			return x;
		}
		token += butter;//連線字串
		fin.get(x);//讀取下一個字元
		butter = x;
	}
	cout << "(3," << token << ")" << endl;
	id = 3;
	return x;
}
bool search(string list)
{//因為有8個保留字
	bool is = false;
	for (int i = 0; i < 8; i++)
	{
		if (strcmp(keyword[i], list.c_str()) == 0)//如果匹配到了,那麼就是保留字
		{
			is = true;
			break;
		}
	}
	return is;
}
char alphaprocess(char butter)
{
	string token = "";
	//token+=butter;
	while (isalpha(butter))//讀取整個由字母組成的字串,最後通過search函式來辨別是識別符號還是保留字
	{
		token += butter;
		fin.get(x);
		butter = x;
	}
	
	if (search(token))
	{
		cout << "(1," << token << ")" << endl;
		fin.get(x);
		id = 1;
		
	}
	else if(!search(token))
	{
		
		cout << "(2," << token << ")" << endl;
		id = 2;
	}
	
	
	//fin.get(x);
	return x;
}
char otherprocess(char butter)
{
	string token = "";
	token += butter;

	//,;{}()是分隔符 5
	if (token == "," || token == ";")
	{
			
		cout << "(5," << butter << ")" << endl;
		fin.get(x);
		id = 5;
		return x;
	}
	if (token == "{" || token == "}" || token == "(" || token == ")")
	{
		re=Judge(token);
		cout << "(5," << butter << ")" << endl;
		fin.get(x);
		id = 5;
		return x;
	}
	// * or / or *= or /= 是運算子4 /**/ or // 註釋6
	if (token == "*" || token == "/")
	{
		if (token == "*")
		{
			fin.get(x);
			token += x;
			if (token[1] == '=')// *=
			{
				cout << "(4," << token << ")" << endl;
				fin.get(x);
				id = 4;
				return x;
			}
		}
		if (token == "/")
		{
			fin.get(x);
			token += x;
			if (token[1] == '=')// /=
			{
				cout << "(4," << token << ")" << endl;
				fin.get(x);
				id = 4;
				return x;
			}
			if (token[1] == '*' || token[1] == '/')// 註釋 6 第一種 是遇到/就停止,第二種是換行
			{ 
				int i = 1;
				if (token[1] == '*')
				{
					while (token[i] != '/')/* */
					{
						fin.get(x);
						token += x;
						i++;
					}
				
				}
				else
				{
					while (token[i] != '\n')
					{
						fin.get(x);
						token += x;
						i++;
					}
				}
				cout << "(6," << token.substr(0,i) << ")" << endl;
				id = 6;
				fin.get(x);
				return x;
			}
		}
		//排除了註釋和 /= *= 那麼 就只是單純的 * /了
		cout << "(4," << token << ")" << endl;
		id = 4;
		fin.get(x);
		return x;
	}
	if (token == "=" || token == "!" || token == "<" || token == ">")
	{
		fin.get(x);
		
		if (token[1] == '=')
		{	token += x;
			cout << "(4," << token << ")" << endl;
			id = 4;
			fin.get(x);
		}
		cout << "(4," << token << ")" << endl;
		id = 4;
		return x;
	}
	if (token == "+" || token == "-")
	{
		if (id == 4)//在運算子後面的-,一定是負號
		{
			int i = 1;
			fin.get(x);
			token += x;
			if (isdigit(token[1]))
			{
				while ((isdigit(token[i]) || token[i] == 'E' || token[i] == '.'))
				{
					fin.get(x);
					token += x;
					
					i++;
				}
				cout << "(3," << token.substr(0,i) << ")" << endl;
				id = 3;
				return x;
			}
			

		}
		if (token == "+")// ++ += 
		{
			fin.get(x);
			token += x;
			if (token[1] == '=' || token[1] == '+')
			{
				cout << "(4," << token << ")" << endl;
				id = 4;
				fin.get(x);
				return x;
			}
			cout << "(4," << token[0] << ")" << endl;
			id = 4;
			//fin.get(x);
			return x;
		}
		if (token == "-")//-- -=
		{
			fin.get(x);
			token += x;
			if (token[1] == '=' || token[1] == '-')
			{
				cout << "(4," << token << ")" << endl;
				id = 4;
				fin.get(x);
				return x;
			}
			cout << "(4," << token << ")" << endl;
			id = 4;
			fin.get(x);
			return x;
		}
	}
}
int main()
{
	
	//ifstream fin("1.txt", ios::in);
	if (!fin)
		cout << "error";
	fin.get(x);
	while (fin.peek()!=EOF)
	{
		//1基本保留字、2識別符號、3常量、4運算子、5分隔符. 6註釋
		if (x == ' ' || x == '\n' || x == '\t')
			fin.get(x);
			//continue;
		if (isdigit(x))//數字
		{
			x = digitprocess(x);//返回當前X所在位置
		}
		else if (isalpha(x))//字母
		{
			x = alphaprocess(x);
			//cout << "2" << x << endl;
		}
		else
		//cout << "1" <<x<< endl;
			x=otherprocess(x);
	}
	if (re)
	{
		cout << "{}與()匹配成功" << endl;
	}
	else
	{
		cout << "{}與()匹配失敗" << endl;
	}
	fin.close();
	return 0;
}

執行結果

 

歡迎大家來交流意見~~