一、系统内置函数
~~~ [Hive函数之日期函数]
~~~ [Hive函数字符串函数及数学函数]
~~~ [Hive函数条件函数]
~~~ [Hive函数explode用法]
~~~ [Hive函数explode案例]
~~~ [Hive函数之窗口函数]
~~~ [Hive函数之windows子句]
~~~ [Hive函数之排名函数]
~~~ [Hive函数之序列函数]
~~~ [Hive函数之面试题之连续值]
~~~ [Hive函数之面试题TopN]
~~~ [Hive函数之行列互转]
~~~ [Hive函数之用户自定义函数]
二、查看系统函数
### --- 查看系统函数
~~~ # Hive内置函数:
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF#LanguageManualUDF-Built-inFunctions
~~~ # 查看系统自带函数
show functions;
~~~ # 显示自带函数的用法
desc function upper;
desc function extended upper;
### --- 日期函数【重要】
~~~ # 当前前日期
hive (mydb)> select current_date;
2021-08-23
hive (mydb)> select unix_timestamp();
1629726123
~~~ # 建议使用current_timestamp,有没有括号都可以
hive (mydb)> select current_timestamp();
2021-08-23 21:42:25.408
~~~ # 时间戳转日期
hive (mydb)> select from_unixtime(1505456567);
2017-09-15 14:22:47
hive (mydb)> select from_unixtime(1505456567, 'yyyyMMdd');
20170915
hive (mydb)> select from_unixtime(1505456567, 'yyyy-MM-dd HH:mm:ss');
2017-09-15 14:22:47
~~~ # 日期转时间戳
hive (mydb)> select unix_timestamp('2019-09-15 14:23:00');
1568528580
~~~ # 计算时间差
hive (mydb)> select datediff('2020-04-18','2019-11-21');
149
hive (mydb)> select datediff('2019-11-21', '2020-04-18');
-149
~~~ # 查询当月第几天
hive (mydb)> select dayofmonth(current_date);
23
~~~ # 计算月末:
hive (mydb)> select last_day(current_date);
2021-08-31
~~~ # 当月第1天:
hive (mydb)> select date_sub(current_date, dayofmonth(current_date)-1);
2021-08-01
~~~ # 下个月第1天:
hive (mydb)> select add_months(date_sub(current_date, dayofmonth(current_date)-1), 1);
2021-09-01
~~~ # 字符串转时间(字符串必须为:yyyy-MM-dd格式)
hive (mydb)> select to_date('2020-01-01');
2020-01-01
hive (mydb)> select to_date('2020-01-01 12:12:12');
2020-01-01
~~~ # 日期、时间戳、字符串类型格式化输出标准时间格式
hive (mydb)> select date_format(current_timestamp(), 'yyyy-MM-dd HH:mm:ss');
2021-08-23
hive (mydb)> select date_format(current_date(), 'yyyyMMdd');
20210823
hive (mydb)> select date_format('2020-06-01', 'yyyy-MM-dd HH:mm:ss');
2020-06-01 00:00:00
三、字符串函数
~~~ # 转小写。lower
hive (mydb)> select lower("HELLO WORLD");
hello world
~~~ # 转大写。upper
hive (mydb)> select lower(ename), ename from emp;
smith SMITH
allen ALLEN
~~~ # 求字符串长度。length
hive (mydb)> select length(ename), ename from emp;
5 SMITH
5 ALLEN
~~~ # 字符串拼接。 concat / ||
hive (mydb)> select empno || " " ||ename idname from emp;
7369 SMITH
7499 ALLEN
hive (mydb)> select concat(empno, " " ,ename) idname from emp;
7369 SMITH
7499 ALLEN
~~~ # 指定分隔符。concat_ws(separator, [string | array(string)]+)
hive (mydb)> SELECT concat_ws('.', 'www', array('yanqi', 'com'));
www.yanqi.com
hive (mydb)> select concat_ws(" ", ename, job) from emp;
ALLEN SALESMAN
WARD SALESMAN
~~~ # 求子串。substr
hive (mydb)> SELECT substr('www.yanqi.com', 5);
yanqi.com
hive (mydb)> SELECT substr('www.yanqi.com', -5);
i.com
hive (mydb)> SELECT substr('www.yanqi.com', 5, 5);
yanqi
~~~ # 字符串切分。split,注意 '.' 要转义
hive (mydb)> select split("www.yanqi.com", "\\.");
["www","yanqi","com"]
四、数学函数
~~~ # 四舍五入。round
hive (mydb)> select round(314.15926);
314
hive (mydb)> select round(314.15926, 2);
314.16
hive (mydb)> select round(314.15926, -2);
300
~~~ # 向上取整。ceil
hive (mydb)> select ceil(3.1415926);
4
~~~ # 向下取整。floor
hive (mydb)> select floor(3.1415926);
3
~~~ # 其他数学函数包括:绝对值、平方、开方、对数运算、三角运算等
五、条件函数【重要】
~~~ # if (boolean testCondition, T valueTrue, T valueFalseOrNull)
hive (mydb)> select sal, if (sal<1500, 1, if (sal < 3000, 2, 3)) from emp;
sal _c1
800 1
1600 2
1250 1
~~~ # CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END
~~~ 将emp表的员工工资等级分类:0-1500、1500-3000、3000以上
hive (mydb)> select sal, if (sal<=1500, 1, if (sal <= 3000, 2, 3)) from emp;
sal _c1
800 1
1600 2
~~~ # CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END
~~~ 复杂条件用 case when 更直观
hive (mydb)> select sal, case when sal<=1500 then 1
when sal<=3000 then 2
else 3 end sallevel
from emp;
--输出参数
sal sallevel
800 1
1600 2
1250 1
~~~ # 以下语句等价
hive (mydb)> select ename, deptno,
case deptno when 10 then 'accounting'
when 20 then 'research'
when 30 then 'sales'
else 'unknown' end deptname
from emp;
--输出参数
ename deptno deptname
SMITH 20 research
ALLEN 30 sales
WARD 30 sales
hive (mydb)> select ename, deptno,
case when deptno=10 then 'accounting'
when deptno=20 then 'research'
when deptno=30 then 'sales'
else 'unknown' end deptname
from emp;
--输出参数
ename deptno deptname
SMITH 20 research
ALLEN 30 sales
WARD 30 sales
~~~ # COALESCE(T v1, T v2, ...)。返回参数中的第一个非空值;如果所有值都为
~~~ NULL,那么返回NULL
hive (mydb)> select sal, coalesce(comm, 0) from emp;
--输出参数
sal _c1
800 0
1600 300
1250 500
~~~ # isnull(a) isnotnull(a)
hive (mydb)> select * from emp where isnull(comm);
--输出参数
emp.empno emp.ename emp.job emp.mgr emp.hiredate emp.sal emp.comm emp.deptno
7369 SMITH CLERK 7902 2010-12-17 800 NULL 20
7566 JONES MANAGER 7839 2011-04-02 2975 NULL 20
7698 BLAKE MANAGER 7839 2011-05-01 2850 NULL 30
hive (mydb)> select * from emp where isnotnull(comm);
--输出参数
emp.empno emp.ename emp.job emp.mgr emp.hiredate emp.sal emp.comm emp.deptno
7499 ALLEN SALESMAN 7698 2011-02-20 1600 300 30
7521 WARD SALESMAN 7698 2011-02-22 1250 500 30
7654 MARTIN SALESMAN 7698 2011-09-28 1250 1400 30
7844 TURNER SALESMAN 7698 2011-09-08 1500 0 30
~~~ # nvl(T value, T default_value)
hive (mydb)> select empno, ename, job, mgr, hiredate, deptno, sal + nvl(comm,0) sumsal from emp;
--输出参数
empno ename job mgr hiredate deptno sumsal
7369 SMITH CLERK 7902 2010-12-17 20 800
7499 ALLEN SALESMAN 7698 2011-02-20 30 1900
7521 WARD SALESMAN 7698 2011-02-22 30 1750
7566 JONES MANAGER 7839 2011-04-02 20 2975
~~~ # nullif(x, y) 相等为空,否则为a
hive (mydb)> SELECT nullif("b", "b"), nullif("b", "a");
--输出参数
_c0 _c1
NULL b
Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
——W.S.Landor