1. 程式人生 > >Java8新特性——lambda表達式.(案例:詞頻統計)

Java8新特性——lambda表達式.(案例:詞頻統計)

word ont lose args list lower cep stream spa

需求:讀入一個文本文件,確定所有單詞的使用頻率並從高到低排序,打印出所有單詞及其頻率的排序列表

先用傳統方法解:

 1 package cn._1.wordfrequency;
 2 
 3 import java.util.HashSet;
 4 import java.util.Map;
 5 import java.util.Set;
 6 import java.util.TreeMap;
 7 import java.util.regex.Matcher;
 8 import java.util.regex.Pattern;
 9 
10 /*
11  * Functional Thinking by Neal Ford(O‘Reilly).
12 */ 13 public class Word { 14 15 @SuppressWarnings("serial") 16 //統計除了以下單詞的其他單詞的使用頻率 17 private Set<String> NON_WORDS = new HashSet<String>() {{ 18 //匿名內部類+初始化塊的初始化方式 19 add("the");add("and");add("of");add("to");add("a"); 20 add("i");add("it");add("
in");add("or");add("is"); 21 add("as");add("so");add("but");add("be"); 22 }}; 23 public Map<String, Integer> wordFreq(String words) { 24 TreeMap<String,Integer> wordMap = new TreeMap<>(); 25 Matcher m = Pattern.compile("\\w+").matcher(words);
26 while(m.find()){ 27 String word = m.group().toLowerCase(); 28 if (!NON_WORDS.contains(word)) { 29 if (wordMap.get(word) == null) { 30 wordMap.put(word, 1); 31 }else { 32 wordMap.put(word, wordMap.get(word)+1); 33 } 34 } 35 } 36 return wordMap; 37 } 38 }

再使用Java8的新特性解:

 1 package cn._1.wordfrequency;
 2 
 3 import java.util.ArrayList;
 4 import java.util.HashSet;
 5 import java.util.List;
 6 import java.util.Map;
 7 import java.util.Set;
 8 import java.util.TreeMap;
 9 import java.util.regex.Matcher;
10 import java.util.regex.Pattern;
11 
12 /*
13  * Functional Thinking by Neal Ford(O‘Reilly).
14  */
15 public class Word2 {
16     @SuppressWarnings("serial")
17     private Set<String> NON_WORDS = new HashSet<String>() {{
18         //匿名內部類+初始化塊的初始化方式
19             add("the");add("and");add("of");add("to");add("a");
20             add("i");add("it");add("in");add("or");add("is");
21             add("as");add("so");add("but");add("be");
22         }};
23     /*
24      * 使用正則表達式獲得包含所有單詞的List
25      */
26     private List<String> regexToList(String words,String regex){
27         List<String> wordList = new ArrayList<>();
28         Matcher m = Pattern.compile(regex).matcher(words);
29         while(m.find())
30             wordList.add(m.group());
31         return wordList;
32     }
33     public Map<String, Integer> wordFreq(String words){
34         TreeMap<String, Integer> wordMap = new TreeMap<>();//使用TreeMap是為了使輸出結果自然排序
35         /*
36          * java.util.stream.Stream:A sequence of elements supporting sequential and parallel aggregate operations.
37          * map:Returns a stream consisting of the results of applying the given function to the elements of this stream.
38          * filter:Returns a stream consisting of the elements of this stream that match the given predicate.
39          * forEach:Performs an action for each element of this stream.
40          */
41         regexToList(words, "\\w+").stream()//將collection對象變為元素流
42             .map(w -> w.toLowerCase())//返回一個經過小寫處理的元素流
43             .filter(w -> !NON_WORDS.contains(w))//過濾,使流中的元素都是NON_WORDS集合中不包含的元素
44             .forEach(w -> wordMap.put(w, wordMap.getOrDefault(w, 0)+1));//遍歷執行操作
45         return wordMap;
46     }
47 }

測試類:

 1 package cn._1.wordfrequency;
 2 
 3 import java.io.FileInputStream;
 4 import java.io.IOException;
 5 import java.util.ArrayList;
 6 import java.util.Collections;
 7 import java.util.Comparator;
 8 import java.util.List;
 9 import java.util.Map;
10 import java.util.Map.Entry;
11 
12 public class Mmain {
13 
14     public static void main(String[] args) throws IOException {
15         String str = readText("/home/yanshaochen/workspace/Functional_Thinking_Examples/mflie/sucai.txt");
16         //調用老方法
17         /*Map<String, Integer> map = new Word().wordFreq(str);*/
18         //調用新方法:
19         Map<String, Integer> map = new Word2().wordFreq(str);
20         //自然排序:
21         for (Entry<String, Integer> item : map.entrySet()) {
22             System.out.println(item.getKey()+","+item.getValue());
23         }
24         //按照value進行排序(摘自網絡):
25         /*List<Map.Entry<String, Integer>> infoIds = new ArrayList<>(map.entrySet());
26         Collections.sort(infoIds, new Comparator<Map.Entry<String, Integer>>() {
27             public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {      
28                 return (o2.getValue() - o1.getValue()); 
29                 //return (o1.getKey()).toString().compareTo(o2.getKey());
30                 }
31             }); 
32         for (Entry<String, Integer> item : infoIds) {
33             System.out.println(item.getKey()+","+item.getValue());
34         }*/
35     }
36 
37     /*
38      * IO流
39      */
40     private static String readText(String path) throws IOException {
41         FileInputStream fis = new FileInputStream(path);
42         byte[] bytes = new byte[1024];
43         int data;
44         String str ="";
45         while((data = fis.read(bytes))!=-1){
46             str += new String(bytes, 0, data);
47         }
48         fis.close();
49         return str;
50     }
51 }

Java8新特性——lambda表達式.(案例:詞頻統計)