bin/hive -e “select id from student;”
bin/hive -f hivef.sql
执行文件中的 sql 语句并将结果写入文件中
bin/hive -f hivef.sql > hive_result.txt
TINYINT SMALINT INT BIGINT
BOOLEAN
FLOAT DOUBLE
STRING
TIMESTAMP
BINARY
集合类型
STRUCT 通过 . 方式访问
MAP 通过['key'] 访问
ARRAY 通过[1]访问
songsong,bingbing_lili,xiao song:18_xiaoxiao song:19,hui long guan_beijing
yangyang,caicai_susu,xiao yang:18_xiaoxiao yang:19,chao yang_beijing
create table test(
name string,
friends array,
children map,
address structstreet:string,city:string
)
row format delimited fields terminated by “,”
collection items terminated by “_”
map keys terminated by “:”
lines terminated by “\n”;
row format delimited fields terminated by ‘,’ – 列分隔符
collection items terminated by ‘_’ --MAP STRUCT 和 ARRAY 的分隔符(数据分割符号) map keys terminated by ‘:’ – MAP 中的 key 与 value 的分隔符
lines terminated by ‘\n’; – 行分隔符
测试 load data local inpath ‘/opt/module/hive/datas/test.txt’ into table test;
select friends[0] as f1,children[‘xiao song’] as xiaosong,address.street as st from test;
(1)任何整数类型都可以隐式地转换为一个范围更广的类型,如 TINYINT 可以转换成
INT,INT 可以转换成 BIGINT。
(2)所有整数类型、FLOAT 和 STRING 类型都可以隐式地转换成 DOUBLE。
(3)TINYINT、SMALLINT、INT 都可以转换为 FLOAT。
(4)BOOLEAN 类型不可以转换为任何其它的类型。
例如 CAST(‘1’ AS INT)将把字符串’1’ 转换成整数 1;如果强制类型转换失败,如执行 CAST(‘X’ AS INT),表达式返回空值 NULL。
create database db_hive2 location ‘/db_hive2.db’;
create table test_clazz(
id int,
name string)
row format delimited fields terminated by ","
lines terminated by "\n"
location "/home/atguigu/clazz.txt";
create external table 删除表时,不会删除数据,默认不加 是管理表,会删除数据
alter table student2 set tblproperties('EXTERNAL'='TRUE'); 修改表类型
load data [local] inpath ‘数据的 path’ [overwrite] into table
student [partition (partcol1=val1,…)];
insert overwrite table student_par
select id, name from student where month=‘201709’;
create table if not exists student3 as select id, name from student;
Sort By:对于大规模的数据集 order by 的效率非常低。在很多情况下,并不需要全局排 序,此时可以使用 sort by
分区内排序
当 distribute by 和 sorts by 字段相同时,可以使用 cluster by 方式。
增加分区
alter table dept_partition add partition(day=‘20200404’);
alter table dept_partition add partition(day=‘20200405’) partition(day=‘20200406’);