大数据中很多排序场景是需要先根据一列进行排序,如果当前列数据相同,再对 其他某列进行排序的场景,这就是二次排序场景。例如:要找出网站活跃的前 10 名用户,活跃用户的评测标准就是用户在当前季度中登录网站的天数最多, 如果某些用户在当前季度登录网站的天数相同,那么再比较这些用户的当前登录 网站的时长进行排序,找出活跃用户。这就是一个典型的二次排序场景。
解决二次排序问题可以采用封装对象的方式,对象中实现对应的比较方法。
- 数据源
200 10
200 90
200 88
200 70
100 20
100 80
100 44
100 50
90 100
80 1000
80 300
90 400
- java
//1封装排序实体类
package action;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.io.Serializable;
/**
* @Author yqq
* @Date 2021/12/10 16:46
* @Version 1.0
*/
public class SecondSort implements Comparable<SecondSort>, Serializable {
private int first;
private int second;
public int compareTo(SecondSort that) {
if (this.first == that.first)
return this.second- that.second;
else
return this.first- that.first;
}
}
//2.实现类
package action;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;
/**
* @Author yqq
* @Date 2021/12/10 16:30
* @Version 1.0
*/
public class SecondSortTest {
public static void main(String[] args) {
JavaSparkContext context = new JavaSparkContext(
new SparkConf()
.setMaster("local")
.setAppName("sort")
);
context.setLogLevel("Error");
context.textFile("data/sort.txt")
.mapToPair(e->{
String first = e.split("\t")[0];
String second = e.split("\t")[1];
return new Tuple2<>(
new SecondSort(Integer.valueOf(first),Integer.valueOf(second)),e);
}).sortByKey()
.foreach(e-> System.out.println(e._2));
}
}
3. scala
package action
import org.apache.spark.{SparkConf, SparkContext}
/**
* @Author yqq
* @Date 2021/12/10 17:00
* @Version 1.0
*/
case class SecondSortKey(first:Int,second:Int) extends Ordered[SecondSortKey] {
override def compare(that: SecondSortKey): Int = {
if (this.first == that.first)
this.second-that.second
else
this.first-that.first
}
}
object SecondSortTest1 {
def main(args: Array[String]): Unit = {
val context = new SparkContext(
new SparkConf()
.setMaster("local")
.setAppName("SecondSortTest1")
)
context.setLogLevel("Error")
context.textFile("data/sort.txt").map(line=>{
val first = line.split("\t")(0).toInt
val second = line.split("\t")(1).toInt
(SecondSortKey(first = first, second = second),line)
}).sortByKey(false).foreach(e=>println(e._2))//false:降序
}
}