1. 程式人生 > >一個簡單的模式字串查詢(支援萬用字元‘*’)

一個簡單的模式字串查詢(支援萬用字元‘*’)

資料結構課的一些作業還是有些難度的,對於部分有價值或下了苦工的問題還是傳上來好叻,回頭寫註釋,紀念菜雞生涯 【問題描述】 在當前目錄下的檔案string.in中查詢給定的字串,並將查詢到的字串和行號輸出到當前目錄下的檔案string.out中。要求: 1)從鍵盤輸入給定的字串,該字串中只包含大小寫字母、數字字元、中括號字元‘[’和‘]’、‘*’,以及字元‘^’。字串的長度不超過20。 2)字元‘^’只能出現在中括號內,且只能作為中括號內的第一個字元出現。除了字元‘^’,中括號中至少包含一個以上的字母或數字。 3)字元*不會出現在中括號內 4)在給定字串中,中括號最多出現一次。若中括號內未出現字元‘^’,表示該位置上的字元只要與中括號內的任一字元相同,則匹配成功;若中括號內出現字元‘^’,表示該位置上的字元與中括號內的所有字元都不相同時,匹配成功。 5)字元*可以同零個字元或者多個任意字元相匹配 6)在給定的字串中,*號最多僅出現一次 7)*號的作用範圍侷限於一行,不會跨越行進行匹配 8) 有多個字串和*號匹配時,僅僅輸出一個,並且輸出這些串中長度最短的那個 9)查詢字串時大小寫無關。 10)先輸出查到的行號(行號從1開始),行號後跟冒號‘:’,然後是查詢到的字串,多個字串之間用逗號‘,’隔開。各行之間用一個回車換行符隔開。 【輸入形式】 首先從標準輸入(鍵盤)讀入待查詢的字串。待查詢的檔案string.in位於當前目錄下。 【輸出形式】 將查詢到的結果輸出到當前目錄下的string.out中。 【樣例輸入1】 zh[ao]ng 假如string.in檔案內容為: Zhang ying ju zhu zai ZhongGuo.  Ta zheng zai du gao zhong. Bie ren dou jia ta xiao zhang. 【樣例輸出1】 string.out檔案內容為: 1:Zhang,Zhong 2:zhong 3:zhang 【樣例1說明】 給定字串中有中括號,表示第三個字元可以是a也可以是o,且大小寫無關,因此文章中第一行的Zhang和Zhong與給定字串匹配,故輸出1:Zhang,Zhong。其它類推。 【樣例輸入2】 a[^ab]a string.in檔案內容為: Do you like banana? ABA is the abbreviation of American Bankers Association. 【樣例輸出2】 string.out檔案內容為: 1:ana,ana 【樣例2說明】 給定字串中括號內有字元‘^’,表示第一個和第三個字元都為a,第二個字元不能為a或b,因此文章中第一行的banana內有兩個字串ana與給定字串匹配,故輸出1:ana,ana。第二行中ABA的第二個字元為B,由於大小寫無關,與給定字串中括號內的b相同,故不能匹配。 【樣例輸入3】 w*d string.in檔案內容為: wwwdd world is a nice word 【樣例輸出3】 string.out檔案內容為: 1:wwwd,wwd,wd 2:world,word 【樣例3說明】

給定的字串中有‘*’,表示在一行內,可以和以'w'開頭,以'd'結尾的任意字串相匹配。在一行中,對於第一個字元'w',同時有字串"wwwd"以及"wwwdd"與之相匹配,根據上述第8條規則,應該匹配"wwwd"。一次類推得到'wwd'和'wd'。同樣的規則用於第二行,得到"world"和"word"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char tolower(char s)
{
	if (s >= 'A'&&s <= 'Z')
		s += 'a' - 'A';
	return s;
}

// This function judges whether from a given position(pos_scans), in the string(scans[]),
// the following letters can match the pattern given in the regular expression(regex[]).
// If so, the string matching the pattern is to be stored in the string(prints[]), and return 1
int regex_match(char scans[], int pos_scans, char regex[], char prints[]) 
{
	int iter_regex = 0; // iter_regex records the position of scanner in regex[]
	int iter_scans = 0; // iter_scans records the position of scanner in scans[]
	int len_regex = strlen(regex); 
	char dic[81];       // dic[] stores the pattern in a wildcard box "[]"
	int i, j;

	while (iter_regex < len_regex)
	{ 
		if (regex[iter_regex] != '[' && regex[iter_regex] != '*') 
		{// the scanner in regex[] gets a letter (']' is not included. this is guaranteed in '[' case)
			if (tolower(regex[iter_regex]) == tolower(scans[pos_scans + iter_scans])) 
			{// simply check whether the same letter appears in scans[]
				iter_regex++;
				iter_scans++;
			}
			else break;
		}
		else if (regex[iter_regex] == '[')
		{// the scanner starts a wildcard box "[]"
			i = 0;
			iter_regex++;
			while (regex[iter_regex] != ']')
			{// store the pattern in this box into dic[]	
				dic[i++] = regex[iter_regex];	
				iter_regex++;
			}
			dic[i] = '\0';
			if (dic[0] == '^')
			{// if '^' is there in the box, the criteria is opposite
				for (j = 1; j < i; j++)
				{// the letter scanned in scans[] cannot appear in the box
					if (tolower(scans[pos_scans + iter_scans]) == tolower(dic[j]))
						break;
				}
				if (j == i)
				{// "j" reaches "i", meaning a success
					iter_scans++;
					iter_regex++;
				}
				else break; 
			}
			else
			{// no '^' is there in the box
				int flag = 0;
				for (j = 0; j < i; j++)
				{
					if (tolower(scans[pos_scans + iter_scans]) == tolower(dic[j]))
					{// it is a match only if the letter scanned in scans[] appears in the box
						flag = 1;
						break;
					}
				}
				if (flag)
				{
					iter_regex++;
					iter_scans++;
				}
				else break;
			}
		}
		else if (regex[iter_regex] == '*')
		{// '*' means any letter (or letters) can match
			if (iter_regex == len_regex - 1)
			{// if the scanner has already reached the end of regex[]
				iter_regex++;
				while (scans[pos_scans + iter_scans] != '\0') iter_scans++; // all the remaining letters in scans[] can match 
				break;
			}
			else if (regex[iter_regex + 1] != '[')
			{// if the scanner gets a letter following '*'
				while (tolower(scans[pos_scans + iter_scans]) != tolower(regex[iter_regex + 1]))
				{// scanner in scans[] can go forward until it gets the same letter as scanned in regex[]
					iter_scans++;
					if (scans[pos_scans + iter_scans] == '\0') break;
				}
				if (tolower(scans[pos_scans + iter_scans]) == tolower(regex[iter_regex + 1]))
				{// if the scanner in scans[] meets the same letter as scanned in regex[], the match is a success
					iter_scans++;
					iter_regex+=2;
				}
				else break;// otherwise the scanner goes to the end of scans[], meaning the match is a failure
			}
			else if (regex[iter_regex + 1] == '[')
			{// it the scanner finds a '[' following '*'
				i = 0;
				iter_regex++;
				while (regex[iter_regex] != ']')
				{// store the pattern into dic[]
					dic[i++] = regex[iter_regex];
					iter_regex++;
				}
				dic[i] = '\0';
				while (scans[pos_scans + iter_scans] != '\0')
				{// check the scanner has not reached the end of scans[]
					if (dic[0] == '^')
					{// if '^' starts this "[]" box
						for (j = 1; j < i; j++)
						{// if the letter scanned in scans[] does not appear in the box
						 // it means a success of matching "*[]"
							if (tolower(scans[pos_scans + iter_scans]) == tolower(dic[j]))
							{// if the letter appears, we should scan the next letter in scans[]
								iter_scans++;
								break;
							}
						}
						if (j == i)
						{// the letter scanned in scans[] does not appear in the box
							iter_scans++;
							iter_regex++;
							break;
						}
					}
					else
					{
						int flag = 0;
						for (j = 0; j < i; j++)
						{
							if (tolower(scans[pos_scans + iter_scans]) == tolower(dic[j]))
							{// if the letter appears in the box, meaning the match is a success
								flag = 1;
								break;
							}
						}
						if (flag)
						{
							iter_regex++;
							iter_scans++;
							break;
						}
						else iter_scans++;// if not, we scan the next letter in scans[]
					}
				}
			}
		}
		if (scans[pos_scans + iter_scans] == '\0') break; // the scanning of scans[] ends
	}

	if (iter_regex == len_regex)
	{// if the scanning of regex is finished, it means the match of regex[] is a success
		for (j = 0; j < iter_scans; j++)
			prints[j] = scans[pos_scans + j];
		prints[j] = '\0';
		return 1;
	}
	else return 0;
}

int main()
{
	FILE *fin, *fout;
	char regex[21];
	char scans[81];
	char prints[161];
	int line = 0;
	int i;

	if ((fin = fopen("string.in", "r")) == NULL)
		exit(1);
	if ((fout = fopen("string.out", "w")) == NULL)
		exit(1);
	scanf("%s",regex);
	while (fgets(scans, 81, fin) != NULL)
	{
		line++;
		int flag = 1;
		for (i = 0; scans[i] != '\0'; i++)
		{
			if (regex_match(scans, i, regex, prints))
			{
				if(flag) fprintf(fout, "%d:", line);
				else fprintf(fout, ",");
				fprintf(fout, "%s", prints);
				flag = 0;
			}
		}
		if (!flag) fprintf(fout,"\n");
	}

	fclose(fin);
	fclose(fout);
	return 0;
}