1.自定义UTF函数

举例:实现add()函数

1.继承一个类:UDF

2.在里面开发多个重载的方法:evaluate()

3.注册这个函数,将打包的jar包放到hdfs上

4.通过create function创建这个函数,drop function删除这个函数,删除后会有缓存,在重新登陆一次。

1.1新建maven项目

添加依赖:

<!-- https://mvnrepository.com/artifact/org.apache.hive/hive-service -->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
<version>1.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.7</version>
</dependency>

1.2开发一个类继承UDF

package cn.udf;
import org.apache.hadoop.hive.ql.exec.UDF;
//1:
public class AddUDF extends UDF {
//2:实现evaluate的某个方法,重载多个方法
public int evaluate(int a,int b){
System.out.println("实现加运算:"+a+","+b);
int c = a+b;
return c;
}
}

1.3生成jar包上传到linux上

生成的jar包命名为hive.jar

把jar包从本地移动到hdfs上$:hdfs dfs -moveFromLocal hive.jar /udf/

1.4启动Hive,添加jar包

hive> add jar hdfs://hadoop31:8020/udf/hive.jar;

1.5创建function

hive>create function fadd as 'cn.udf.AddUDF';

测试 hive>select add(2,3); >>5

第四步和第五部可以合成一步

hive>create function fadd as 'cn.udf.AddUDF'
>using jar 'hdfs://hadoop31:8020/udf/hive.jar';

2.UDAF(聚合函数)

模拟开发sum函数需要开发五个方法,对应mapreduce五个过程:

以类继承自GenericUDAFEvaluate类

init       Iterate   terminatePartial     iterate   terminate
 Mapreduce - > mapper - partitioner      ->      merge - > Reducer

2.1:开发一个类,继承UDAF函数(已过时)

package cn.udf;
 
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.io.LongWritable;
 
@SuppressWarnings("deprecation") //去掉警告
public class FSumUDAF extends UDAF {
//2:开发一个内部类,继承
public static class FSumEval implements UDAFEvaluator {
private static LongWritable sum;
 
@Override
public void init() {
System.out.println("1:初始化"); //在合并数据时调用 - shuffle之前
sum = new LongWritable(0);
}
 
//开发其他的四个方法mapper,每读取一行,就进行求和
//第二个方法,用于计算某一个分区中的数据
public boolean iterate(LongWritable value) throws HiveException {
System.out.println("2:接收到:" +sum+","+value);
if (value == null) {
return true;//必须总是返回true,否则会阻止后面的执行
}
if(sum==null){
sum=new LongWritable(0);//为什么会是null
}
sum.set(sum.get() + value.get());
return true;
}
//第三个方法,用于合并每一个分区中的最后结果。返回这个分区的数据
public LongWritable terminatePartial() throws HiveException {
System.out.println("3:合并一个分区中的数据,返回这个分区中的数据");
return sum;
}
//合并多个分区计算的结果
public boolean merge(LongWritable value) throws HiveException {
System.out.println("4:在merge合并数据:" + value);
if(value==null){
return true;
}
sum.set(sum.get() + value.get());
return true;
}
//第5个方法,用于返回最后的结果
public LongWritable terminate() throws HiveException {
System.out.println("5:返回最后的数据");
return sum;
}
}
}

2.2使用新的API去开发UDAF:

请用:

UDAF
Abstract..
- fcount
- favg
package cn.udf;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
//1:继承类
public class FSumUDAF2 extends AbstractGenericUDAFResolver {
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] info) throws SemanticException {
//2:添加 这个方法
return new MySumEval();
}
 
//AggregationBuffer - 中间计算的结果的缓存对象
//3:开发GenericUDAFEvaluator接口的子类
@SuppressWarnings("deprecation")
public static class MySumEval extends GenericUDAFEvaluator {
 
//4:声明一个对象,用于保存中间的结果
public static class MyBuffer implements AggregationBuffer {
Long sum;
 
@Override
public String toString() {
return "MyBuffer{" +
"sum=" + sum +
'}';
}
}
 
//声明类型
//5:声明基本的类型,用于获取数据的
private PrimitiveObjectInspector objectInspector;
 
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
System.out.println("1:初始化");
//声明返回的类型
objectInspector = (PrimitiveObjectInspector) parameters[0];
return PrimitiveObjectInspectorFactory.javaLongObjectInspector;
}
 
//6:获取一个新的对象
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
System.out.println("新的缓存的对象");
MyBuffer myBuffer = new MyBuffer();
myBuffer.sum = 0L;
return myBuffer;
}
 
@Override
public void reset(AggregationBuffer agg) throws HiveException {
MyBuffer my = (MyBuffer) agg;
my.sum = 0L;
my = null;
}
 
//null,34
@Override
public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
System.out.println("获取数据:" + agg + "," + parameters[0]);
Object value = parameters[0];
if (value != null) {
//用基本类型的工具类获取数据
long val = PrimitiveObjectInspectorUtils.getLong(value, objectInspector);
//进行加
MyBuffer my = (MyBuffer) agg;
my.sum += val;
}
}
 
@Override
public Object terminatePartial(AggregationBuffer agg) throws HiveException {
System.out.println("每一个分区合并数据:" + agg);
MyBuffer my = (MyBuffer) agg;
return my.sum;
}
 
@Override
public void merge(AggregationBuffer agg, Object partial) throws HiveException {
System.out.println("shuffle...");
if (partial != null) {
//用基本类型的工具类获取数据
long val = PrimitiveObjectInspectorUtils.getLong(partial, objectInspector);
//进行加
MyBuffer my = (MyBuffer) agg;
my.sum += val;
}
}
 
@Override
public Object terminate(AggregationBuffer agg) throws HiveException {
System.out.println("最后输出:" + agg);
MyBuffer my = (MyBuffer) agg;
return my.sum;
}
}
}

3.JDBC连接数据库

开启服务:$hive --service hiveserver2 & //&表示后台启动

hive add 字段 hive add jar_hive

登陆:$:hive --service beeline

连接:

hive add 字段 hive add jar_hive add 字段_02

4.java代码连接

依赖

<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>1.2.2</version>
</dependency>
 
package cn.hive;
 
import org.junit.Test;
 
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
 
public class HiveJdbc {
 
@Test
public void test1() throws Exception{
//1:注册驱动
Class.forName("org.apache.hive.jdbc.HiveDriver");
String url = "jdbc:hive2://hadoop31:10000/db01";
Connection con =
DriverManager.getConnection(url,"wangjian","888888");
Statement st =con.createStatement();
ResultSet rs = st.executeQuery("select * from stud01");
while(rs.next()){
String id = rs.getString("id");
String name = rs.getString("name");
int age = rs.getInt("age");
System.out.println(id+","+name+","+age);
}
rs.close();
st.close();
con.close();
 
}
@Test
public void test2() throws Exception{
//1:注册驱动
Class.forName("org.apache.hive.jdbc.HiveDriver");
String url = "jdbc:hive2://hadoop31:10000/db01";
Connection con =
DriverManager.getConnection(url,"wangjian","888888");
Statement st =con.createStatement();
ResultSet rs = st.executeQuery("select count(1) from stud01");
if(rs.next()){
Long size = rs.getLong(1);
System.out.println("行数:"+size);
}
rs.close();
st.close();
con.close();
 
}
}

5.关于用户名和密码

打开

hive add 字段 hive add jar_hive add 字段_03

其中1文件会把2文件覆盖

找到2文件下的如下图

hive add 字段 hive add jar_apache_04

选择复制到1文件中修改如下

hive add 字段 hive add jar_hive_05

package cn.cn.hive;
import org.apache.hive.service.auth.PasswdAuthenticationProvider;
import javax.security.sasl.AuthenticationException;
//Web的加密方式 - Shiro - > Realm -
public class PasswordAuth implements PasswdAuthenticationProvider {
@Override
public void Authenticate(String user, String password) throws AuthenticationException {
if(!user.equals("keys") || !password.equals("123456")){
System.out.println("用户名或是密码错误...");
throw new AuthenticationException("用户名或密码错误");
}
System.out.println("登录成功");
}
}

将此类打包放到lib文件下,注意命名(区分系统jar包和自己开发的jar包)

以后登陆就可以指定用户名密码登陆。