一、聚合函数

对一组值进行计算,并返回单个值,也被称为组函数;

聚合计算过程:选定分组字段 – 分组 – 计算:

1.选定分组字段:在 select 里;

2.分组:用 group by;

3.计算:根据实际情况,选定聚合函数;

聚合函数

1.计数:count()

2.求和:sum()

3.最大值/最小值:max/min()

4.平均值:avg()

聚合值过滤

聚合值过滤:having,不是where!

目前SQL执行顺序:from – where – group by – having – select

eg:按省份和高考分数分段,统计高分段的人数:

安徽学生,高考分数>620;

江苏学生,高考分数>610;

其他省份学生,高考分数>600;

二、举例练习

表例

--常规聚合函数使用:

--注意事项:hive中distinct和group by 不能一起用

--1、查询每个学生考试单科最高分是多少?字段:学号、姓名、单科最高分;

select

stu_id as 学号

,name as 姓名

,max(score) as 单科最高分

from score_info

group by stu_id,name;

--2、查询每个学生考试单科最低分是多少?字段:学号、姓名、单科最低分;

select

stu_id as 学号

,name as 姓名

,min(score) as 单科最低分

from score_info

group by stu_id,name;

--3、查询每个学生考试平均分是多少?字段:学号、姓名、考试平均分;

select

stu_id as 学号

,name as 姓名

,avg(score) as 考试平均分

from score_info

group by stu_id,name;

--4、统计每个学生考了几科?字段:学号、姓名、考试科数;

select

stu_id as 学号

,name as 姓名

,count(distinct subject_id) as 考试科数

from score_info

group by stu_id,name;

--5、统计每个学生考试总分是多少?字段:学号、姓名、考试总分;

select

stu_id as 学号

,name as 姓名

,sum(score) as 考试总分

from score_info

group by stu_id,name;

--6、查询每个省份,不同学院的人数有多少?字段:省份、学院、人数;

select

from_where as 省份

,college as 学院

,count(distinct stu_id) as 人数

from student_info

group by from_where,college

order by from_where,college;

--7、查询每个班级人数有多少?字段:学院、专业、班级、人数;

select

college as 学院

,major as 专业

,class as 班级

,count(stu_id) as 人数

from class_info

group by college,major,class

order by college,major;

--与其他函数结合使用:

--1、查询高数=100分 和 物理=100分的人数有多少?字段:高数满分人数,物理满分人数;

--if

select

count(if(subject='高数' and score=100,stu_id,null)) as 高数满分人数

,count(if(subject='物理' and score=100,stu_id,null)) as 物理满分人数

from score_info;

--case when

count(distinct (case when subject='高数' and score=100 then stu_id else null end)) as 高数满分人数

,count(distinct (case when subject='物理' and score=100 then stu_id else null end)) as 物理满分人数

from score_info;

--2、查询每个班级,高数和物理不及格的人数有多少(<60分)?字段:学院、专业、班级、高数不及格人数、物理不及格人数;

select

college as 学院

,major as 专业

,class as 班级

,count(if(subject='高数' and score<60,subject,null)) as 高数不及格人数

,count(if(subject='物理' and score<60,subject,null)) as 物理不及格人数

from score_info join class_info

on score_info.stu_id=class_info.stu_id

group by college,major,class

order by college,major;

--

select

subject

,case when score>0 and score<60 then '0-60'

when score>=60 and score <70 then '60-70'

when score>=70 and score <80 then '70-80'

when score>=80 and score <80 then '80-90'

when score>=90 and score <100 then '90-100'

end as diff_part

,count(distinct stu_id) as 人数

from score_info

group by subject,diff_part;

--hive中,group by不能用别名(diff_part),需要全部复制过来