Spark on Hive: 语法是Spark SQL语法,实际上是在IDEA上编写java叠加SQL的代码。
Hive on Spark: 只是替换了Hadoop的MR,改为了Spark的计算引擎。
RDD => DataFrame => DataSet:
json文件:spark数据读取时,读取后会自动解析JSON,并且附加上列名和属性类型。并且兼容RDD的算子操作,
public class SQL_Test {
public static void main(String[] args) {
SparkConf sparkConf = new SparkConf().setAppName("SparkSQL").setMaster("local[*]");
SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate();
DataFrameReader read = spark.read();
//读取后会自动解析JSON,并且附加上列名和属性类型
Dataset<Row> userJSON = read.json("input/user.json");
//打印数据类型
userJSON.printSchema();
userJSON.show();//即收集又打印
spark.close();
}
}
import org.apache.spark.SparkConf;
import org.apache.spark.sql.*;
import org.apache.spark.sql.api.java.UDF2;
import org.apache.spark.sql.types.DataTypes;
public class SQL_UDF {
public static void main(String[] args) {
SparkConf sparkConf = new SparkConf().setAppName("SparkSQL").setMaster("local[*]");
SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate();
DataFrameReader read = spark.read();
//读取后会自动解析JSON,并且附加上列名和属性类型
Dataset<Row> userJSON = read.json("input/user.json");
userJSON.createOrReplaceTempView("t1");
//注册函数
spark.udf().register("myudf", new UDF2<String, Long, String>() {
@Override
public String call(String s, Long integer) throws Exception {
if(integer >= 18){
return s+"大侠";
}else{
return s+"小虾米";
}
//return null;
}
}, DataTypes.StringType);//
spark.sql("select myudf(name,age) from t1").show();
spark.close();
}
}
import org.apache.spark.SparkConf;
import org.apache.spark.sql.*;
import org.apache.spark.sql.api.java.UDF2;
import org.apache.spark.sql.types.DataTypes;
import static org.apache.spark.sql.functions.udaf;
public class SQL_UDAF {
public static void main(String[] args) {
SparkConf sparkConf = new SparkConf().setAppName("SparkSQL").setMaster("local[*]");
SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate();
DataFrameReader read = spark.read();
//读取后会自动解析JSON,并且附加上列名和属性类型
Dataset<Row> userJSON = read.json("input/user.json");
userJSON.createOrReplaceTempView("t1");
//注册函数
spark.udf().register("ageAVG", udaf(new AgeAvg(), Encoders.LONG()));//
spark.sql("select name,ageAVG(age) from t1 group by name").show();
spark.close();
}
}
System.setProperty("HADOOP_USER_NAME","atguigu");
import org.apache.spark.SparkConf;
import org.apache.spark.sql.SparkSession;
/**
* title:
*
* @Author 浪拍岸
* @Create 19/10/2023 下午3:35
* @Version 1.0
*/
public class HiveTest {
public static void main(String[] args) {
System.setProperty("HADOOP_USER_NAME","atguigu");
SparkConf sparkConf = new SparkConf().setAppName("SparkSQL").setMaster("local[*]");
SparkSession spark = SparkSession.builder().enableHiveSupport().config(sparkConf).getOrCreate();
// spark.sql("show tables").show();
spark.sql("select * from stu where id = 1").createOrReplaceTempView("t1");
spark.sql("select * from t1").show();
spark.close();
}
}