保存select查询结果的几种方式:

1、将查询结果保存到一张新的hive表中

create table t_tmp

as

select * from t_p;


2、将查询结果保存到一张已经存在的hive表中

insert into  table t_tmp

select * from t_p;


3、将查询结果保存到指定的文件目录(可以是本地,也可以是hdfs)

(本地)

insert overwrite local directory '/home/hadoop/test'


select * from t_p;

(hdfs)

insert overwrite directory '/aaa/test'


select * from t_p;


-----------------------------------

关于hive中的各种join


准备数据

1,a

2,b

3,c

4,d

7,y

8,u


2,bb

3,cc

7,yy

9,pp




建表:

create table a(id int,name string)

row format delimited fields terminated by ',';


create table b(id int,name string)

row format delimited fields terminated by ',';


导入数据:

load data local inpath '/home/hadoop/a.txt' into table a;

load data local inpath '/home/hadoop/b.txt' into table b;



实验:

** inner join

select * from a inner join b on a.id=b.id;

+-------+---------+-------+---------+--+

| a.id  | a.name  | b.id  | b.name  |

+-------+---------+-------+---------+--+

| 2     | b       | 2     | bb      |

| 3     | c       | 3     | cc      |

| 7     | y       | 7     | yy      |

+-------+---------+-------+---------+--+






**left join

select * from a left join b on a.id=b.id;

+-------+---------+-------+---------+--+

| a.id  | a.name  | b.id  | b.name  |

+-------+---------+-------+---------+--+

| 1     | a       | NULL  | NULL    |

| 2     | b       | 2     | bb      |

| 3     | c       | 3     | cc      |

| 4     | d       | NULL  | NULL    |

| 7     | y       | 7     | yy      |

| 8     | u       | NULL  | NULL    |

+-------+---------+-------+---------+--+






**right join

select * from a right join b on a.id=b.id;






**

select * from a full outer join b on a.id=b.id;

+-------+---------+-------+---------+--+

| a.id  | a.name  | b.id  | b.name  |

+-------+---------+-------+---------+--+

| 1     | a       | NULL  | NULL    |

| 2     | b       | 2     | bb      |

| 3     | c       | 3     | cc      |

| 4     | d       | NULL  | NULL    |

| 7     | y       | 7     | yy      |

| 8     | u       | NULL  | NULL    |

| NULL  | NULL    | 9     | pp      |

+-------+---------+-------+---------+--+



**

select * from a left semi join b on a.id = b.id;

+-------+---------+--+

| a.id  | a.name  |

+-------+---------+--+

| 2     | b       |

| 3     | c       |

| 7     | y       |

+-------+---------+--+



-------------

多重插入:


from student

insert into table student_p partition(part='a')

select * where Sno<95011;

insert into table student_p partition(part='a')

select * where Sno<95011;


---------------

级联求和

create table t_access_times(username string,month string,salary int)

row format delimited fields terminated by ',';


load data local inpath '/home/hadoop/t_access_times.dat' into table t_access_times;


A,2015-01,5

A,2015-01,15

B,2015-01,5

A,2015-01,8

B,2015-01,25

A,2015-01,5

A,2015-02,4

A,2015-02,6

B,2015-02,10

B,2015-02,5

要得出的结果

+-------------+----------+-----------+-------------+

| 用户        | 月份     | 月份统计  | 累计统计    |

+-------------+----------+-----------+-------------+

| A           | 2015-01  | 33        | 33          |

| A           | 2015-02  | 10        | 43          |

| B           | 2015-01  | 30        | 30          |

| B           | 2015-02  | 15        | 45          |

+-------------+----------+-----------+-------------+


1、第一步,先求个用户的月总金额

select username,month,sum(salary) as salary from t_access_times group by username,month


+-----------+----------+---------+--+

| username  |  month   | salary  |

+-----------+----------+---------+--+

| A         | 2015-01  | 33      |

| A         | 2015-02  | 10      |

| B         | 2015-01  | 30      |

| B         | 2015-02  | 15      |

+-----------+----------+---------+--+


2、第二步,将月总金额表 自己连接 自己连接

select A.username,A.month,max(A.salary) as salary,sum(B.salary) as accumulate

from

(select username,month,sum(salary) as salary from t_access_times group by username,month) A

inner join

(select username,month,sum(salary) as salary from t_access_times group by username,month) B

on

A.username=B.username

+-------------+----------+-----------+-------------+----------+-----------+--+

| a.username  | a.month  | a.salary  | b.username  | b.month  | b.salary  |

+-------------+----------+-----------+-------------+----------+-----------+--+

| A           | 2015-01  | 33        | A           | 2015-01  | 33        |

| A           | 2015-01  | 33        | A           | 2015-02  | 10        |

| A           | 2015-02  | 10        | A           | 2015-01  | 33        |

| A           | 2015-02  | 10        | A           | 2015-02  | 10        |

| B           | 2015-01  | 30        | B           | 2015-01  | 30        |

| B           | 2015-01  | 30        | B           | 2015-02  | 15        |

| B           | 2015-02  | 15        | B           | 2015-01  | 30        |

| B           | 2015-02  | 15        | B           | 2015-02  | 15        |

+-------------+----------+-----------+-------------+----------+-----------+--+


3、第三步,从上一步的结果中

进行分组查询,分组的字段是a.username a.month

求月累计值:  将b.month <= a.month的所有b.salary求和即可

select A.username,A.month,max(A.salary) as salary,sum(B.salary) as accumulate

from

(select username,month,sum(salary) as salary from t_access_times group by username,month) A

inner join

(select username,month,sum(salary) as salary from t_access_times group by username,month) B

on

A.username=B.username

where B.month <= A.month

group by A.username,A.month

order by A.username,A.month;