spark學習記錄(八、廣播變數和累加器)
阿新 • • 發佈:2019-01-13
一、廣播變數
public class JavaExample { public static void main(String[] args) { SparkConf conf = new SparkConf(); conf.setMaster("local").setAppName("JavaExample"); JavaSparkContext sc = new JavaSparkContext(conf); final List<String> list = Arrays.asList("hello world", "hello spark"); //廣播變數 final Broadcast<List<String>> broadcast = sc.broadcast(list); JavaRDD<String> rdd1 = sc.parallelize(Arrays.asList("hello world", "hello spark", "hello java")); JavaRDD<String> rdd2 = rdd1.filter(new Function<String, Boolean>() { public Boolean call(String line) throws Exception { return !broadcast.value().contains(line); } }); rdd2.foreach(new VoidFunction<String>() { public void call(String s) throws Exception { System.out.println(s); } }); } }
二、累加器
object ScalaExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setMaster("local").setAppName("ScalaExample") val sc = new SparkContext(conf); val rdd1 = sc.textFile("C://words.txt") // 累加器 val accumulator = sc.accumulator(0); rdd1.map(line =>{ accumulator.add(1) println(accumulator) line }).collect() println(" i = " +accumulator.value) } }