計算文字相似度-java實現
阿新 • • 發佈:2019-02-20
原始碼:
Computeclass.java:
/** * @author Caiyong * @version 1.0 * * */ package pack; import java.text.NumberFormat; import java.util.Locale; public class Computeclass { /* * 計算相似度 * */ public static double SimilarDegree(String strA, String strB){ String newStrA = removeSign(strA); String newStrB = removeSign(strB); //用較大的字串長度作為分母,相似子串作為分子計算出字串相似度 int temp = Math.max(newStrA.length(), newStrB.length()); int temp2 = longestCommonSubstring(newStrA, newStrB).length(); return temp2 * 1.0 / temp; } /* * 將字串的所有資料依次寫成一行 * */ public static String removeSign(String str) { StringBuffer sb = new StringBuffer(); //遍歷字串str,如果是漢字數字或字母,則追加到ab上面 for (char item : str.toCharArray()) if (charReg(item)){ sb.append(item); } return sb.toString(); } /* * 判斷字元是否為漢字,數字和字母, * 因為對符號進行相似度比較沒有實際意義,故符號不加入考慮範圍。 * */ public static boolean charReg(char charValue) { return (charValue >= 0x4E00 && charValue <= 0X9FA5) || (charValue >= 'a' && charValue <= 'z') || (charValue >= 'A' && charValue <= 'Z') || (charValue >= '0' && charValue <= '9'); } /* * 求公共子串,採用動態規劃演算法。 * 其不要求所求得的字元在所給的字串中是連續的。 * * */ public static String longestCommonSubstring(String strA, String strB) { char[] chars_strA = strA.toCharArray(); char[] chars_strB = strB.toCharArray(); int m = chars_strA.length; int n = chars_strB.length; /* * 初始化矩陣資料,matrix[0][0]的值為0, * 如果字元陣列chars_strA和chars_strB的對應位相同,則matrix[i][j]的值為左上角的值加1, * 否則,matrix[i][j]的值等於左上方最近兩個位置的較大值, * 矩陣中其餘各點的值為0. */ int[][] matrix = new int[m + 1][n + 1]; for (int i = 1; i <= m; i++) { for (int j = 1; j <= n; j++) { if (chars_strA[i - 1] == chars_strB[j - 1]) matrix[i][j] = matrix[i - 1][j - 1] + 1; else matrix[i][j] = Math.max(matrix[i][j - 1], matrix[i - 1][j]); } } /* * 矩陣中,如果matrix[m][n]的值不等於matrix[m-1][n]的值也不等於matrix[m][n-1]的值, * 則matrix[m][n]對應的字元為相似字元元,並將其存入result陣列中。 * */ char[] result = new char[matrix[m][n]]; int currentIndex = result.length - 1; while (matrix[m][n] != 0) { if (matrix[n] == matrix[n - 1]) n--; else if (matrix[m][n] == matrix[m - 1][n]) m--; else { result[currentIndex] = chars_strA[m - 1]; currentIndex--; n--; m--; } } return new String(result); } /* * 結果轉換成百分比形式 * */ public static String similarityResult(double resule){ return NumberFormat.getPercentInstance(new Locale( "en ", "US ")).format(resule); } }
Simicalcu.java:
/** * @author Caiyong * @version 1.0 * * */ package pack; import java.awt.BorderLayout; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.awt.event.KeyAdapter; import java.awt.event.KeyEvent; import java.io.FileReader; import java.io.IOException; import javax.swing.BorderFactory; import javax.swing.Box; import javax.swing.JButton; import javax.swing.JFileChooser; import javax.swing.JFrame; import javax.swing.JLabel; import javax.swing.JOptionPane; import javax.swing.JScrollPane; import javax.swing.JTextArea; import javax.swing.filechooser.FileNameExtensionFilter; import javax.swing.text.DefaultEditorKit; public class Simicalcu { public static void main(final java.lang.String[] args) { java.awt.EventQueue.invokeLater(new java.lang.Runnable(){//awt是單執行緒模式的,所有awt的元件只能在推薦方式的事件處理執行緒中訪問,從而保證元件狀態的正確性 public void run(){ final JFrame frame = new JFrame("字串相似度計算");//宣告JFrame final JLabel tag = new JLabel("提示:請點選選擇檔案按鈕選擇待比較檔案或者直接在文字框中輸入檔案。");//提示Label //檔案一 final JButton load = new JButton("選擇檔案一:");//選擇檔案Button final JLabel filename = new JLabel("");//檔案路徑 final JTextArea textarea = new JTextArea(6, 20);//文字框 textarea.setLineWrap(true);//設定為自動換行 textarea.setWrapStyleWord(true);//超長行在邊距處自動換行 final JScrollPane scroller = new JScrollPane(textarea);//滾動條效果 //載入檔案一的事件監聽 load.addActionListener(new ActionListener(){ private JFileChooser filechooser = null; private DefaultEditorKit kit = new DefaultEditorKit(); public void actionPerformed(ActionEvent e){ if (filechooser == null) { //設定預設檔案選擇路徑為桌面路徑 filechooser = new JFileChooser(System.getProperty("user.home")); } //過濾檔案型別,允許 開啟txt檔案和doc文件 filechooser.setFileFilter(new FileNameExtensionFilter("text file","txt","text","doc","docs")); if (filechooser.showOpenDialog(frame) == JFileChooser.APPROVE_OPTION) { //顯示檔案路徑 filename.setText(filechooser.getSelectedFile().getAbsolutePath()); FileReader reader = null; //將檔案內容讀取到textarea裡面&異常處理 try { reader = new FileReader(filechooser.getSelectedFile()); textarea.setText(""); kit.read(reader,textarea.getDocument(),0); } catch (Exception xe) { System.err.println(xe.getMessage()); } finally { if (reader != null) { try { reader.close(); } catch (IOException ioe) { System.err.println(ioe.getMessage()); } } } textarea.setCaretPosition(0);//滑鼠焦點 } return; } }); //檔案二 final JButton load2 = new JButton("選擇檔案二:"); final JLabel filename2 = new JLabel(""); final JTextArea textarea2 = new JTextArea(6, 20); textarea2.setLineWrap(true); textarea2.setWrapStyleWord(true); final JScrollPane scroller2 = new JScrollPane(textarea2); //載入檔案二的事件監聽 load2.addActionListener(new ActionListener(){ private JFileChooser filechooser2 = null; private DefaultEditorKit kit2 = new DefaultEditorKit(); public void actionPerformed(ActionEvent e){ if (filechooser2 == null) { filechooser2 = new JFileChooser(System.getProperty("user.home")); } filechooser2.setFileFilter(new FileNameExtensionFilter("text file","txt","text","doc","docs")); if (filechooser2.showOpenDialog(frame) == JFileChooser.APPROVE_OPTION) { filename2.setText(filechooser2.getSelectedFile().getAbsolutePath()); FileReader reader2 = null; try { reader2 = new FileReader(filechooser2.getSelectedFile()); textarea2.setText(""); kit2.read(reader2,textarea2.getDocument(),0); } catch (Exception xe2) { System.err.println(xe2.getMessage()); } finally { if (reader2 != null) { try { reader2.close(); } catch (IOException ioe2) { System.err.println(ioe2.getMessage()); } } } textarea2.setCaretPosition(0); } return; } }); //顯示相似內容的textarea final JTextArea textarea_res = new JTextArea(6, 20); textarea_res.setLineWrap(true); textarea_res.setWrapStyleWord(true); final JScrollPane scroller_res = new JScrollPane(textarea_res); //設定textarea_res透明 textarea_res.setOpaque(false); scroller_res.setOpaque(false); scroller_res.getViewport().setOpaque(false); //textarea和textarea2內容改變事件,刪除檔案路徑和相似內容 textarea.addKeyListener(new KeyAdapter(){ public void keyPressed(KeyEvent e) { filename.setText(""); textarea_res.setText(""); } }); textarea2.addKeyListener(new KeyAdapter(){ public void keyPressed(KeyEvent e) { filename2.setText(""); textarea_res.setText(""); } }); //計算,退出按鈕 final JButton start = new JButton("開始計算"); //開始計算相似度事件 start.addActionListener(new ActionListener(){ public void actionPerformed(ActionEvent e) { String temp_strA = textarea.getText(); String temp_strB = textarea2.getText(); String strA,strB; //如果兩個textarea都不為空且都不全為符號,則進行相似度計算,否則提示使用者進行輸入資料或選擇檔案 if(!(Computeclass.removeSign(temp_strA).length() == 0 && Computeclass.removeSign(temp_strB).length() == 0)){ if(temp_strA.length() >= temp_strB.length()) { strA = temp_strA; strB = temp_strB; }else{ strA = temp_strB; strB = temp_strA; } double result = Computeclass.SimilarDegree(strA, strB); //顯示相似內容於textarea_res textarea_res.setText("相似的內容為:"+Computeclass.longestCommonSubstring(strA, strB)); //結果 JOptionPane.showMessageDialog(null, " 相似度為:" + Computeclass.similarityResult(result), "計 算 結 果", JOptionPane.PLAIN_MESSAGE); }else{ JOptionPane.showMessageDialog(null, " 您好,請輸入正確內容! ", "提 示", JOptionPane.ERROR_MESSAGE); } } }); final JButton cancle = new JButton("退 出"); //退出事件 cancle.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) { frame.dispose();//釋放窗體所佔資源 System.exit(0);//退出程式 } }); //總佈局 //檔案一north final Box north = Box.createVerticalBox();//豎排列 north.setBorder(BorderFactory.createEmptyBorder(5,5,5,5));//邊距 north.add(tag); north.add(Box.createVerticalStrut(10)); north.add(load); north.add(Box.createVerticalStrut(5)); north.add(filename); north.add(scroller); frame.add(north,BorderLayout.NORTH); //檔案二center final Box center = Box.createVerticalBox(); center.setBorder(BorderFactory.createEmptyBorder(5,5,5,5)); center.add(load2); center.add(Box.createVerticalStrut(5)); center.add(filename2); center.add(scroller2); center.add(scroller_res); frame.add(center,BorderLayout.CENTER); //south final Box south = Box.createHorizontalBox(); south.setBorder(BorderFactory.createEmptyBorder(5,5,5,5)); south.add(Box.createHorizontalGlue());//按鈕居中顯示 south.add(start); south.add(Box.createHorizontalStrut(20));//水平間距 south.add(cancle); south.add(Box.createVerticalStrut(5)); frame.add(south,BorderLayout.SOUTH); frame.pack(); frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);//窗體預設退出形式 frame.setLocationRelativeTo(null); frame.setVisible(true); } }); } }