1. 程式人生 > >Hadoop 統計檔案中單詞出現的次數

Hadoop 統計檔案中單詞出現的次數

pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>cn.et</groupId>
  <artifactId>MavenProject</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <packaging>war</packaging>
  <!-- 新增專案jdk編譯外掛 -->
  <build>
     <plugins>
	   <plugin>
	     <groupId>org.apache.maven.plugins</groupId>
	  	 <artifactId>maven-compiler-plugin</artifactId>
	  	 <configuration>
	  	   <source>1.7</source>
	  	   <target>1.7</target>
	  	   <encoding>UTF-8</encoding>
	  	 </configuration>
	   </plugin>
     </plugins>
  </build>
</project>
web.xml

<?xml version="1.0" encoding="UTF-8"?>
<web-app xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://java.sun.com/xml/ns/javaee" xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd" version="2.5">
  <display-name>MavenProject</display-name>
  <welcome-file-list>
    <welcome-file>index.html</welcome-file>
    <welcome-file>index.htm</welcome-file>
    <welcome-file>index.jsp</welcome-file>
    <welcome-file>default.html</welcome-file>
    <welcome-file>default.htm</welcome-file>
    <welcome-file>default.jsp</welcome-file>
  </welcome-file-list>
</web-app>
########## 我的演示原始檔位置

[[email protected] localhost]# cd /mnt/hgfs/VMwareShare/

[[email protected] VMwareShare]# ll

總用量 218756

-rwxrwxrwx. 1 root root   7073612 9月  24 2016 apache-tomcat-6.0.45.tar.gz

-rwxrwxrwx. 1 root root 216929574 1月   6 09:43 hadoop-2.7.5.tar.gz

-rwxrwxrwx. 1 root root       757 1月   5 20:19 pom.xml

-rwxrwxrwx. 1 root root       640 1月   5 20:11 web.xml


########## 把xml檔案複製到我的/home/localhost目錄中

[[email protected] VMwareShare]# cp /mnt/hgfs/VMwareShare/*.xml /home/localhost/


########## 開始統計

[[email protected] VMwareShare]# hadoop jar /usr/local/hadoop-2.7.5/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.5.jare/hadoop-mapreduce-examples-2.7.5.jar wordcount /home/localhost/ /mnt/hgfs/VMwareShare/result

#注意/mnt/hgfs/VMwareShare/result為我定義的統計結果存放目錄 result目錄不能先前建立,hadoop會自動建立


##########  檢視統計結果

[[email protected] /]# cd /mnt/hgfs/VMwareShare/result

[[email protected] result]# ll

總用量 0

-rwxrwxrwx. 1 root root 1298 1月   6 18:53 part-r-00000

-rwxrwxrwx. 1 root root    0 1月   6 18:53 _SUCCESS

[[email protected] result]# more part-r-00000

--> 1

<!-- 1

</build> 1

</configuration> 1

</plugin> 1

</plugins> 1

</project> 1

</web-app> 1

</welcome-file-list> 1

<?xml 1

<artifactId>MavenProject</artifactId> 1

<artifactId>maven-compiler-plugin</artifactId> 1

<build> 1

<configuration> 1

<display-name>MavenProject</display-name> 1

<encoding>UTF-8</encoding> 1

<groupId>cn.et</groupId> 1

<groupId>org.apache.maven.plugins</groupId> 1

<modelVersion>4.0.0</modelVersion> 1

<packaging>war</packaging> 1

<plugin> 1

<plugins> 1

<project 1

<source>1.7</source> 1

<target>1.7</target> 1

<version>0.0.1-SNAPSHOT</version> 1

<web-app 1

<welcome-file-list> 1

<welcome-file>default.htm</welcome-file> 1

<welcome-file>default.html</welcome-file> 1

<welcome-file>default.jsp</welcome-file> 1

<welcome-file>index.htm</welcome-file> 1

<welcome-file>index.html</welcome-file> 1

<welcome-file>index.jsp</welcome-file> 1

encoding="UTF-8"?> 1

http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd" 1

http://maven.apache.org/xsd/maven-4.0.0.xsd"> 1

version="1.0" 1

version="2.5"> 1

xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 2

xmlns="http://java.sun.com/xml/ns/javaee" 1

xmlns="http://maven.apache.org/POM/4.0.0" 1

xsi:schemaLocation="http://java.sun.com/xml/ns/javaee 1

xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 1

新增專案jdk編譯外掛 1