1. 程式人生 > >Java敏感詞彙過濾

Java敏感詞彙過濾

package cn.yzh.ws.utils;

import java.io.*; import java.util.ArrayList; import java.util.List;

import org.apache.commons.io.FileUtils; import org.apache.commons.io.LineIterator; import org.apache.commons.lang.StringUtils;

/**

Authorl:Mr.王

DateTime:2018/3/23 0023上午 11:19

Function: */ public class BadWord { private final static File wordfilter = new File(PropertiesUtils.get("txt").toString());//詞彙庫txt地址,自行更改

private static long lastModified = 0L; private static List words = new ArrayList();

private static void checkReload() { if (wordfilter.lastModified() > lastModified) { synchronized (BadWord.class) { try { lastModified = wordfilter.lastModified(); LineIterator lines = FileUtils.lineIterator(wordfilter, "utf-8"); while (lines.hasNext()) { String line = lines.nextLine(); if (StringUtils.isNotBlank(line)) words.add(StringUtils.trim(line).toLowerCase()); } } catch (IOException e) { e.printStackTrace(); } } } }

/**
    檢查敏感字內容
    @param contents */ public static String check(String... contents) { if (!wordfilter.exists()) return null; checkReload(); for (String word : words) { for (String content : contents) if (content != null && content.indexOf(word) >= 0) return word; } return null; }

/**
    檢查字串是否包含敏感詞
    @param content
    @return */ public static boolean isContain(String content) { if (!wordfilter.exists()) return false; checkReload(); for (String word : words) { if (content != null && content.indexOf(word) >= 0) return true; } return false; }

/**
    替換掉字串中的敏感詞
    @param str 等待替換的字串
    @param replaceChar 替換字元
    @return */ public static String replace(String str, String replaceChar) { checkReload(); for (String word : words) { if (str.indexOf(word) >= 0) { String reChar = ""; for (int i = 0; i < word.length(); i++) { reChar += replaceChar; } str = str.replaceAll(word, reChar); } } return str; }

public static List lists() { checkReload(); return words; }

/**
    新增敏感詞
    @param word
    @throws IOException */ public static void add(String word) throws IOException { word = word.toLowerCase(); if (!words.contains(word)) { words.add(word); FileWriter fw = null; try { //如果檔案存在,則追加內容;如果檔案不存在,則建立檔案 File f = new File(PropertiesUtils.get("txt").toString()); fw = new FileWriter(f, true); } catch (IOException e) { e.printStackTrace(); } PrintWriter pw = new PrintWriter(fw); pw.println(word); pw.flush(); try { fw.flush(); pw.close(); fw.close(); } catch (IOException e) { e.printStackTrace(); } lastModified = wordfilter.lastModified(); } }

/**
    刪除敏感詞
    @param word
    @throws IOException */ public static void delete(String word) throws IOException { word = word.toLowerCase(); words.remove(word); FileUtils.writeLines(wordfilter, "UTF-8", words); lastModified = wordfilter.lastModified(); }

/**
    功能:Java讀取txt檔案的內容 步驟:
    1:先獲得檔案控制代碼
    2:獲得檔案控制代碼當做是輸入一個位元組碼流,需要對這個輸入流進行讀取
    3:讀取到輸入流後,需要讀取生成位元組流
    4:一行一行的輸出。readline()。 備註:需要考慮的是異常情況
    Function:去除Txx文件中的重複內容 */ public static void readTxtFile() { try { String encoding = "UTF-8"; File file = new File(PropertiesUtils.get("txt").toString()); if (file.isFile() && file.exists()) { // 判斷檔案是否存在 InputStreamReader read = new InputStreamReader( new FileInputStream(file), encoding);// 考慮到編碼格式 BufferedReader bufferedReader = new BufferedReader(read); String lineTxt = null; ArrayList ayyarsListText = new ArrayList(); // 得到檔案中的資料 while ((lineTxt = bufferedReader.readLine()) != null) { ayyarsListText.add(lineTxt); } read.close(); ArrayList resultList = new ArrayList();//結果集將被儲存在這個集合中 // 去除檔案中的重複資料 for (String item : ayyarsListText) { if (!resultList.contains(item)) { resultList.add(item); } } for (Object resultLists : resultList) { BadWord.addHeavy(String.valueOf(resultLists)); } } else { System.err.println("找不到指定的檔案"); } } catch (Exception e) { System.err.println("讀取檔案內容出錯"); e.printStackTrace(); } }

/**
    Authorl:Mr.王
    DateTime:2018/3/23 0023 下午 18:02
    Function:去重成功以後,呼叫該方法重新寫入,請勿自主呼叫該方法.
    Parameter: */ public static void addHeavy(String word) throws IOException { word = word.toLowerCase(); if (!words.contains(word)) { words.add(word); FileUtils.writeLines(wordfilter, "UTF-8", words); lastModified = wordfilter.lastModified(); } }

public static void main(String[] args) throws Exception { // System.out.println(BadWord.replace("釣魚島是中國的","*")); // BadWord.add("釣魚島"); // BadWord.readTxtFile(); } }