1. 程式人生 > >unity c#非法字元(髒詞)檢測

unity c#非法字元(髒詞)檢測

專案中非法字元檢測是必須的,聊天系統不遮蔽各種不文明用語

先說說我的原理吧

1.讀取非法字元表,把相同的首字元歸類到字典,類似新華字典那樣

2.然後把輸入的字串,一個個字元找對應的首字元字典,遍歷首字元字典,在當前字元後面擷取對應的字元長度得到的字串然後比較,如果字串相同則認為有非法字元

下面是測試結果

下面為完整程式碼,有註釋應該比較容易看懂


using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using UnityEngine;

/// <summary>
/// 非法關鍵詞過濾(自動忽略漢字數字字母間的其他字元)
/// </summary>
public class FilterWord
{
    public FilterWord()
    {
        TextAsset asset = Resources.Load("dirtywords") as TextAsset;
        m_AllFilterWord = asset.text;
    }

    private string m_AllFilterWord = string.Empty;
    /// <summary>
    /// 詞庫路徑
    /// </summary>
    public string AllFilterWord
    {
        get { return m_AllFilterWord; }
        set { m_AllFilterWord = value; }
    }

    /// <summary>
    /// 記憶體詞典
    /// </summary>
    private WordGroup[] MEMORYLEXICON = new WordGroup[(int)char.MaxValue];

    private string sourctText = string.Empty;
    private bool m_IsInitalize = false;
    /// <summary>
    /// 檢測源
    /// </summary>
    public string SourceText
    {
        get { return sourctText; }
        set { sourctText = value; }
    }

    /// <summary>
    /// 檢測源遊標
    /// </summary>
    int cursor = 0;

    /// <summary>
    /// 匹配成功後偏移量
    /// </summary>
    int wordlenght = 0;

    /// <summary>
    /// 檢測詞遊標
    /// </summary>
    int nextCursor = 0;

    private List<string> illegalWords = new List<string>();

    /// <summary>
    /// 檢測到的非法詞集
    /// </summary>
    public List<string> IllegalWords
    {
        get { return illegalWords; }
    }

    /// <summary>
    /// 判斷是否是中文
    /// </summary>
    /// <param name="character"></param>
    /// <returns></returns>
    private bool isCHS(char character)
    {
        //  中文表意字元的範圍 4E00-9FA5
        int charVal = (int)character;
        return (charVal >= 0x4e00 && charVal <= 0x9fa5);
    }

    /// <summary>
    /// 判斷是否是數字
    /// </summary>
    /// <param name="character"></param>
    /// <returns></returns>
    private bool isNum(char character)
    {
        int charVal = (int)character;
        return (charVal >= 48 && charVal <= 57);
    }

    /// <summary>
    /// 判斷是否是字母
    /// </summary>
    /// <param name="character"></param>
    /// <returns></returns>
    private bool isAlphabet(char character)
    {
        int charVal = (int)character;
        return ((charVal >= 97 && charVal <= 122) || (charVal >= 65 && charVal <= 90));
    }

    /// <summary>
    /// 轉半形小寫的函式(DBC case)
    /// </summary>
    /// <param name="input">任意字串</param>
    /// <returns>半形字串</returns>
    ///<remarks>
    ///全形空格為12288,半形空格為32
    ///其他字元半形(33-126)與全形(65281-65374)的對應關係是:均相差65248
    ///</remarks>
    private string ToDBC(string input)
    {
        char[] c = input.ToCharArray();
        for (int i = 0; i < c.Length; i++)
        {
            if (c[i] == 12288)
            {
                c[i] = (char)32;
                continue;
            }
            if (c[i] > 65280 && c[i] < 65375)
                c[i] = (char)(c[i] - 65248);
        }
        return new string(c).ToLower();
    }

    /// <summary>
    /// 載入記憶體詞庫
    /// </summary>
    public void LoadDictionary()
    {
        if (m_IsInitalize)
        {
            return;
        }

        m_IsInitalize = true;
        List<string> wordList = new List<string>();
        Array.Clear(MEMORYLEXICON, 0, MEMORYLEXICON.Length);
        string[] words = AllFilterWord.Split('\n');
        foreach (string word in words)
        {
            string str = word.Replace("\r", "");
            string key = this.ToDBC(str);
            wordList.Add(key);
        }
        Comparison<string> cmp = delegate (string key1, string key2)
        {
            return key1.CompareTo(key2);
        };
        wordList.Sort(cmp);
        for (int i = wordList.Count - 1; i > 0; i--)
        {
            if (wordList[i].ToString() == wordList[i - 1].ToString())
            {
                wordList.RemoveAt(i);
            }
        }
        foreach (var word in wordList)
        {
            if (string.IsNullOrEmpty(word))
            {
                continue;
            }
            WordGroup group = MEMORYLEXICON[word[0]];
            if (group == null)
            {
                group = new WordGroup();
                MEMORYLEXICON[(int)word[0]] = group;

            }
            group.Add(word.Substring(1));
        }
    }

    /// <summary>
    /// 檢測
    /// </summary>
    /// <param name="blackWord"></param>
    /// <returns></returns>
    private bool Check(string blackWord)
    {
        wordlenght = 0;
        //檢測源下一位遊標
        nextCursor = cursor + 1;
        bool found = false;
        string tempStr = ToDBC(sourctText);
        //遍歷詞的每一位做匹配
        for (int i = 0; i < blackWord.Length; i++)
        {
            //特殊字元偏移遊標
            int offset = 0;
            if (nextCursor >= tempStr.Length)
            {
                break;
            }
            else
            {
                if (i >= blackWord.Length
                    || nextCursor + offset >= tempStr.Length)
                {
                    found = false;
                    break;
                }
                if ((int)blackWord[i] == (int)tempStr[nextCursor + offset])
                {
                    if (isAlphabet(tempStr[nextCursor + offset]))
                    {
                        if(tempStr.Length < blackWord.Length)
                        {
                            found = false;
                            break;
                        }
                        if (i >= blackWord.Length - 1)
                        {
                            int temp = nextCursor + offset + 1;
                            if(tempStr.Length > temp)
                            {
                                if(isAlphabet(tempStr[temp]))
                                {
                                    found = false;
                                    break;
                                }
                                else
                                {
                                    found = true;
                                }
                            }
                            else
                            {
                                found = true;
                            }
                        }
                    }
                    else
                    {
                        if (i >= blackWord.Length - 1)
                        {
                            found = true;
                        }
                    }
                }
                else
                {
                    found = false;
                    break;
                }
            }

            nextCursor = nextCursor + 1 + offset;
            wordlenght++;
        }
        return found;
    }

    /// <summary>
    /// 查詢並替換
    /// </summary>
    /// <param name="replaceChar"></param>
    public string Filter(char replaceChar)
    {
        cursor = 0;
        nextCursor = 0;
        LoadDictionary();
        if (sourctText != string.Empty)
        {
            //sourctText = sourctText.Replace("\n", "");
            //sourctText = sourctText.Trim();
            char[] tempString = sourctText.ToCharArray();
            for (int i = 0; i < SourceText.Length; i++)
            {
                //查詢以該字為首字元的片語
                WordGroup group = MEMORYLEXICON[(int)ToDBC(SourceText)[i]];
                if (group != null)
                {
                    for (int z = 0; z < group.Count(); z++)
                    {
                        string word = group.GetWord(z);
                        if (word.Length == 0 || Check(word))
                        {
                            string blackword = string.Empty;
                            for (int pos = 0; pos < wordlenght + 1; pos++)
                            {
                                blackword += tempString[pos + cursor].ToString();
                                tempString[pos + cursor] = replaceChar;
                            }
                            illegalWords.Add(blackword);
                            cursor = cursor + wordlenght;
                            i = i + wordlenght;
                        }
                    }
                }
                cursor++;
            }
            return new string(tempString);
        }
        else
        {
            return string.Empty;
        }
    }
}

/// <summary>
/// 具有相同首字元的片語集合
/// </summary>
class WordGroup
{
    /// <summary>
    /// 集合
    /// </summary>
    private List<string> groupList;

    public WordGroup()
    {
        groupList = new List<string>();
    }

    /// <summary>
    /// 新增詞
    /// </summary>
    /// <param name="word"></param>
    public void Add(string word)
    {
        groupList.Add(word);
    }

    /// <summary>
    /// 獲取總數
    /// </summary>
    /// <returns></returns>
    public int Count()
    {
        return groupList.Count;
    }

    /// <summary>
    /// 根據下標獲取詞
    /// </summary>
    /// <param name="index"></param>
    /// <returns></returns>
    public string GetWord(int index)
    {
        return groupList[index];
    }
}

下面是抽出一個統一方法來呼叫檢測

主要兩個方法

1.檢測是否有非法字元,返回bool

2.把非法字元轉成*號,返回string

using System.Collections;
using System.Collections.Generic;
using UnityEngine;

public class SystemUtil
{
    /// <summary>
    /// 判斷是否非法字元
    /// </summary>
    /// <param name="str"></param>
    /// <returns></returns>
    public static bool IsInvaild(string str)
    {
        string source = Filter(str);
        return str != source;
    }

    /// <summary>
    /// 把非法字元變成*號
    /// </summary>
    /// <param name="str"></param>
    /// <returns></returns>
    public static string Filter(string str)
    {
        filterWord.SourceText = str;
        return filterWord.Filter('*');
    }

    public static FilterWord filterWord
    {
        get
        {
            if (null == m_FilterWord)
            {
                m_FilterWord = new FilterWord();
            }
            return m_FilterWord;
        }
    }

    private static FilterWord m_FilterWord;
}

下面是工程下載地址

連結:https://pan.baidu.com/s/1x1RyEugV6N4D_Sj2_JgkUQ 
提取碼:lvc3