累加器理解图:
Spark 累加器解析_apache

  1. scala
package examples

import org.apache.spark.{SparkConf, SparkContext}

/**
* @Author yqq
* @Date 2021/12/12 00:22
* @Version 1.0
*/
object AccumulatorTest1 {
def main(args: Array[String]): Unit = {
val context = new SparkContext(
new SparkConf()
.setMaster("local")
.setAppName("test11")
)
context.setLogLevel("Error")
val accumulator = context.longAccumulator
context.textFile("data/words")
.map(e=>{
accumulator.add(1L)
e
}).collect()
println(s"累加器的值:${accumulator.value}")
}
}

Spark 累加器解析_scala_02
2. java

package examples;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.util.LongAccumulator;

/**
* @Author yqq
* @Date 2021/12/12 00:04
* @Version 1.0
*/
public class AccumulatorTest {
public static void main(String[] args) {
JavaSparkContext context = new JavaSparkContext(
new SparkConf()
.setMaster("local")
.setAppName("test")
);
context.setLogLevel("Error");
LongAccumulator accumulator = context.sc().longAccumulator();
context.textFile("data/words")
.map(e->{
accumulator.add(1L);
return e;
}).collect();
System.out.println("累加器的值:"+accumulator.value());
}
}

Spark 累加器解析_apache_03
注意事项:

  • 累加器在 Driver 端定义赋初始值,累加器只能在 Driver 端读取, 在 Excutor 端更新。