1.操作库
基本语法

CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name
   [COMMENT database_comment]
   [LOCATION hdfs_path]
   [WITH DBPROPERTIES (property_name=property_value, ...)];

说明:

  • IF NOT EXISTS:如果不存在则创建
  • COMMENT:注释
  • LOCATION:数据库存放目录
  • WITH DBPROPERTIES:拓展信息,key/value
    例子:
CREATE DATABASE database_name;
//创建数据库如果数据库不存在则创建数据库
CREATE DATABASE IF NOT EXISTS database_name;
//创建数据库并添加注释
CREATE DATABASE COMMENT'zhushi;
//创建数据库,添加扩展信息
create database testdb_otherinfo WITH DBPROPERTIES('creator'='zhangcheng','date'='2015-11-30');

(2)显示数据库

show databases;
(3) 切换数据库
use database_name;

(4) //如果数据库存在就删除数据库

drop database if exists traffic;
drop database traffic cascade;

2.操作表
(1) 创建表
a. 基本语法
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
[(col_name data_type [COMMENT col_comment], …)]
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment], …)]
[[ROW FORMAT row_format]
[STORED AS file_format]]
[LOCATION hdfs_path]
[AS select_statement];

说明:

  • EXTERNAL:外部表
  • IF NOT EXISTS:表不存在创建
  • db_name:表所属数据库
  • COMMENT col_comment:列注释
  • COMMENT table_comment:表注释
  • PARTITIONED BY:分区字段
  • ROW FORMAT row_format:行的数据格式
  • STORED AS file_format:文件存储格式
  • STORED AS file_format
  • LOCATION hdfs_path:存放路径
  • AS select_statement:查询语句为结果集
    建表支持的数据类型
    基本数据类型
    tinyint / smalint / int /bigint
    float / double
    boolean
    string
    复杂数据类型
    Array/Map/Struct
    没有date /datetime
    列子:
CREEATE TABLE IF NOT EXISTS testdb.pepole(
   id int COMMENT 'user id',
   name string COMMENT 'user name',
   phone string COMMENT 'user phone')
   COMMENT 'pepele info'
   ROW FORMAT DELIMITED FIELDS TERMINATED BY','
   STORED AS textfile;

b.语法:
CREATE TABLE [IF NOT EXISTS] [db_name.]table_name
LIKE existing_table_or_view_name
[LOCATION hdfs_path];

说明:

  • IF NOT EXISTS:表不存在创建
  • db_name:表所属数据库
  • existing_table_or_view_name:结果集为存在的表或者师徒
  • LOCATION hdfs_path:存放路径
    列子:
create table if not exists testdb.pepole_like like testdb.pepole;
create table if not exists testdb.pepole_select as select id,name from testdb.pepole;

c.创建表时指定的partition的分区空间
功能在Hive Select查询中一般会扫描整个表内容,会消耗很多时间做没必要的工作。有时候只需要扫描表中关心的一部分数据,因此建表时引入了partition概念。如果需要创建有分区的表,需要在create表的时候调用可选参数partitioned by,详见表创建的语法结构。分区表指的是在创建表时指定的partition的分区空间。
创建

CREATE TABLE IF NOT EXISTS testdb.emp_partition(
  empno int,
   ename string,
   job string,
   mgr int,
   hiredate string,
   sal double,
   comm double,
   deptno int
)
PARTITIONED BY(monthstring,daystring)
ROW FORMAT DELIMITED FIELDS TERMINATED BY'\t'
STORED AS textfile;

(2) 查看有那些表

show tables;
show tables in database_name;

(3) 查看表结构

desc table_name;

(4) 查看表详细属性

desc formatted test;

(5)查询数据

select*from emp_partition where month='201512' and day='02';

(6)修改表
a.修改表名称

alter table person rename to student;

b.修改表分区

hive>alter table hive.logs add partition (dt='2018-9-1',country='beijng'); 
hive>alter table hive.logs drop partition (dt='2018-9-1',country='beijng');

c.添加列

hive>alter table hive.logs add columns(id int,name string);

d.删除和替换列

hive>alter table hive.logs replace columns (id int,name string);

(7) 向管理表装载数据
a.装载数据

hive>load data [local] inpath '/data/hive' [overwrite] into table tbname;

说明:含local关键字:加载本地目录数据到warehouse下;如果不含local关键字,加载数据目录默认为hdfs路径。
b.通过查询语句向表中插入数据:在原始数据的基础上,进行业务分析后生成的表数据

hive>insert overwrite table new_table select [column字段] from 原始数据表 where 条件;

hive> from 原始数据表 别名 insert overwrite table new_table select [column字段] where 条件;

c.创建表时,也可以执行插入数据

hive>create table new_table as select id,name,age from student where age>23;

d.导出数据

hive>insert overwrite directory '/data/stocks/2009-6-23' select * from stocks where ymd ='2009-6-23';

(8)查询
a.对array查询

hive>select name,subordinates[1] from hive.employees where name='John Doe';

b.对map查询

hive>select name,deductions['State Taxes'] from hive.employees where name = 'John Doe';

c.对struct查询

hive>select name,address.state from hive.employees where name ='John Doe';

d.支持运算符
e.内置函数,比如sum(),count(),avg()等。
f.limit

hive>select upper(name),salary,deductions['Federal Taxes'],round(salary*(1-deductions['Federal Taxes'])) from hive.employees limit 6;

g.列别名

hive>select upper(name),salary,deductions['Federal Taxes'] as Taxes,round(salary*(1-deductions['Federal Taxes'])) as sal from hive.employees limit 6;

h.嵌套

hive>from (select upper(name) as name,salary,deductions['Federal Taxes'] as Taxes,round(salary*(1-deductions['Federal Taxes'])) as sal from hive.employees) e select e.name,e.Taxes,e.sal where e.sal >70000;

(9) 分组查询

hive>select year(ymd),avg(price_close) from stocks where exchange1='NASDAQ' and symbol = 'AAPL' group by year(ymd);

说明:分组属性列在mysql中一定要出现在select子句之后;但在hive中不需要。
(10) 删除表

DROP TABLE [IF EXISTS] table_name ;

(11)清空表

TRUNCATE TABLE table_name [PARTITION partition_spec];

EXTERNAL不管删除内部表(管理表)还是外部表(托管表)都会删除元数据 ,删除外部表(托管表)不会删除数据文件。内部表(管理表)反之。共用数据时使用外部表(创建表时加上 LOCATION hdfs_path 参数)。

CREATE EXTERNAL TABLE IF NOT EXISTS testdb.emp_ext(
    empno int,
   ename string,
   job string,
   mgr int,
   hiredate string,
   sal double,
   comm double,
   deptno int
 )
ROW FORMAT DELIMITED FIELDS TERMINATED BY'\t'
STORED AS textfile;

调优方案:建议将hive.exec.mode.local.auto=true;加入到hive的环境变量(.hiverc)