https://www.elastic.co/pt/downloads/hadoop
/user/hive/jars/elasticsearch-hadoop-6.6.0.jar;
- ADD JAR hdfs://uat01/user/hive/jars/elasticsearch-hadoop-6.6.0.jar;
- ADD JAR hdfs://uat01/user/hive/jars/commons-httpclient-3.1.jar;
--注意,部分不加载“commons-httpclient-3.1.jar”可能会报错error in initSerDe: java.lang.NoClassDefFoundError org/apache/commons/httpclient,可以直接把包放在hive的lib下一样效果
- PUT user_info
- {
- "mappings": {
- "user":{
- "properties": {
- "id":{
- "type": "long"
- },
- "name":{
- "type": "text"
- },
- "birthday":{
- "type": "text"
- },
- "email":{
- "type": "text"
- }
- }
- }
- }
- }
- drop table user_info_es;
- create external table user_info_es(
- id string,
- name string,
- birthday string,
- gender string,
- email string)
- STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
- TBLPROPERTIES(
- 'es.resource' = 'user_info/user', -- 索引名及type
- 'es.nodes'='192.168.139.101:9200', -- es地址
- 'es.mapping.id'='id' , -- 声明主键
- 'es.write.operation'='upsert', -- 声明更新
- 'es.index.refresh_interval' = '-1',
- 'es.index.number_of_replicas' = '0',
- 'es.batch.write.retry.count' = '6',
- 'es.batch.write.retry.wait' = '60s',
- 'es.mapping.name' = 'id:id,name:name,birthday:birthday,gender:gender,email:email' -- 字段映射
- );
- insert overwrite table user_info_es
- select id,name,birthday, gender,email from user_info_es_temp limit 100;
步骤1~5同上
- drop table user_info_es;
- create external table user_info_es(
- jsonString string
- )
- STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
- TBLPROPERTIES(
- 'es.resource' = 'user_info/user', -- 索引名及type
- 'es.nodes'='192.168.139.101:9200', -- es地址
- 'es.mapping.id'='id' , -- 声明主键
- 'es.write.operation'='upsert', -- 声明更新
- 'es.index.refresh_interval' = '-1',
- 'es.index.number_of_replicas' = '0',
- 'es.batch.write.retry.count' = '6',
- 'es.batch.write.retry.wait' = '600s',
- 'es.input.json' = 'yes' -- 必加 自动识别JSON
- );
- -- concat会把value为空的数据,concat置为空
- -- 最后一个字段必须不含空值,或者使用concat_ws代替,否则会多个','逗号,导致JSON格式出现问题
- select concat_ws('', '{', id, birthday, gender, email, name, '}') jsonString
- from (select concat('\"id\":\"', id, '\",') id,
- concat('\"birthday\":\"', birthday, '\",') birthday,
- concat('\"gender\":\"', gender, '\",') gender,
- concat('\"email\":\"', email, '\",') email,
- concat('\"name\":\"', name, '\"') name
- from user_info_es_temp
- ) t;
- insert overwrite table user_info_es
- -- concat会把value为空的数据,concat置为空
- -- 最后一个字段必须不含空值,或者使用concat_ws代替,否则会多个','逗号,导致JSON格式出现问题
- select concat_ws('', '{', id, birthday, gender, email, name, '}') jsonString
- from (select concat('\"id\":\"', id, '\",') id,
- concat('\"birthday\":\"', birthday, '\",') birthday,
- concat('\"gender\":\"', gender, '\",') gender,
- concat('\"email\":\"', email, '\",') email,
- concat('\"name\":\"', name, '\"') name
- from user_info_es_temp
- ) t;
11、此操作对于update数据时可能存在风险,如原数据有值,需要更新为null,则此方式无法更新