Java版運算元彙總(包括filter,collect,take,first,sample等)【Java純程式碼】
阿新 • • 發佈:2019-01-26
package com.bjsxt; import java.util.List; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.VoidFunction; public class Scala { public static void main(String[] args) { /** * 配置 */ SparkConf conf = new SparkConf().setMaster("local").setAppName("test"); /** * context上下文 */ JavaSparkContext sc = new JavaSparkContext(conf); /** * sc.textFile() * 用於傳入上下文 */ JavaRDD<String> line = sc.textFile("./words"); /** * filter運算元 */ JavaRDD<String> filter = line.filter(new Function<String, Boolean>() { @Override public Boolean call(String lines) throws Exception { // TODO Auto-generated method stub return "hello Hive".equals(lines); } }); System.out.println(filter); filter.foreach(new VoidFunction<String>() { @Override public void call(String arg0) throws Exception { System.out.println(arg0); } }); /** * collect * 集合或者陣列 */ List<String> collect = filter.collect(); for(String s:collect) System.out.println(s); /** * first運算元 */ String first = line.first(); System.out.println(first); /** * take * 拿幾個 */ List<String> take = line.take(3); for(String s:take) System.out.println(s); /** * sample運算元 * 取樣 * true代表什麼 * false又代表什麼 */ JavaRDD<String> sample = line.sample(false, 0.1,100); sample.foreach(new VoidFunction<String>() { @Override public void call(String arg0) throws Exception { System.out.println(arg0); } }); } }