作用在 K,V 格式的 RDD 上。根据 Key 进行分组。作用在(K,V),返回(K, Iterable )。

  1. java
package transformations;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;

import java.util.Arrays;

/**
* @Author yqq
* @Date 2021/12/09 23:49
* @Version 1.0
*/
public class GroupByKeyTest {
public static void main(String[] args) {
JavaSparkContext context = new JavaSparkContext(
new SparkConf()
.setMaster("local")
.setAppName("groupbykey")
);
context.setLogLevel("Error");
context.parallelizePairs(Arrays.asList(
new Tuple2<String,Integer>("科比",24),
new Tuple2<String,Integer>("科比",23),
new Tuple2<String,Integer>("威斯布鲁克",0),
new Tuple2<String,Integer>("保罗",3),
new Tuple2<String,Integer>("保罗",12)
)).groupByKey().foreach(e-> System.out.println(e));
}
}

Spark Transformation算子->groupByKey_scala

  1. scala
package transformation

import org.apache.spark.{SparkConf, SparkContext}

/**
* @Author yqq
* @Date 2021/12/09 23:55
* @Version 1.0
*/
object GroupByKeyTest {
def main(args: Array[String]): Unit = {
val context = new SparkContext(
new SparkConf()
.setAppName("groupbykey")
.setMaster("local")
)
context.setLogLevel("Error")
context.parallelize(Array[(String,Int)](
("科比",24),
("科比", 23),
("威斯布鲁克", 0),
("保罗", 3),
("保罗", 12)
)).groupByKey().foreach(println)
}
}

Spark Transformation算子->groupByKey_java_02