UDF用于对单个输入记录进行处理并返回单个输出值。例如,可以编写一个UDF来实现字符串的反转。
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
public class ReverseStringUDF extends UDF {
public Text evaluate(Text input) {
if (input == null) {
return null;
}
return new Text(new StringBuilder(input.toString()).reverse().toString());
}
}
ADD JAR /path/to/your/hive-udfs.jar;
CREATE TEMPORARY FUNCTION reverse_string AS 'com.example.hive.udf.ReverseStringUDF';
SELECT reverse_string(column_name) FROM your_table;
UDAF用于对一组输入记录进行处理并返回一个单一值。例如,实现一个计算平均值的UDAF。
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
public class AverageUDAF extends UDAF {
public static class AverageEvaluator implements UDAFEvaluator {
private long count;
private double sum;
public AverageEvaluator() {
init();
}
public void init() {
count = 0;
sum = 0;
}
public boolean iterate(Double value) {
if (value != null) {
count++;
sum += value;
}
return true;
}
public Double terminatePartial() {
return (count == 0) ? null : (sum / count);
}
public boolean merge(Double other) {
if (other != null) {
sum += other;
count++;
}
return true;
}
public Double terminate() {
return (count == 0) ? null : (sum / count);
}
}
}
ADD JAR /path/to/your/hive-udafs.jar;
CREATE TEMPORARY FUNCTION average_udaf AS 'com.example.hive.udaf.AverageUDAF';
SELECT average_udaf(column_name) FROM your_table;
UDTF用于将单个输入记录生成多个输出记录。例如,实现一个将逗号分隔的字符串拆分为多行的UDTF。
import org.apache.hadoop.hive.ql.exec.UDTF;
import org.apache.hadoop.io.Text;
public class ExplodeUDTF extends UDTF {
public void process(Object[] args) {
String input = args[0].toString();
String[] parts = input.split(",");
for (String part : parts) {
forward(new Object[]{part});
}
}
public void close() {
}
}
ADD JAR /path/to/your/hive-udtfs.jar;
CREATE TEMPORARY FUNCTION explode_udtf AS 'com.example.hive.udtf.ExplodeUDTF';
SELECT explode_udtf(column_name) FROM your_table;
Hive支持多种Join操作,如Inner Join、Left Join、Right Join、Full Outer Join。以下是一个简单的Join示例:
SELECT a.*, b.*
FROM table_a a
JOIN table_b b ON a.id = b.id;
Hive提供了丰富的内置函数和聚合函数,以下是一些常见的内置函数和聚合函数示例:
SELECT CONCAT('Hello', ' ', 'World'), SUBSTR('Hello World', 1, 5), LENGTH('Hello World') FROM your_table;
SELECT CURRENT_DATE, YEAR('2024-06-04'), MONTH('2024-06-04') FROM your_table;
SELECT ROUND(3.14159, 2), CEIL(3.14159), FLOOR(3.14159) FROM your_table;
SUM、AVG、COUNT:
聚合函数
SUM、AVG、COUNT:
GROUP BY:
SELECT category, SUM(sales) FROM sales_table GROUP BY category;
