[hadoop@hadoop102 software]$ tar -zxvf spark-3.0.0-bin-hadoop3.2.tgz -C /opt/module
cd /opt/module/spark-3.0.0-bin-hadoop3.2/conf
[hadoop@hadoop102 conf]$ cp spark-env.sh.template spark-env.sh
[hadoop@hadoop102 conf]$ vim spark-env.sh
内容:
export JAVA_HOME=/opt/module/jdk1.8.0_291
YARN_CONF_DIR=/opt/module/hadoop-3.1.3/etc/hadoop
[hadoop@hadoop102 ~]$ cd /opt/module/spark-3.0.0-bin-hadoop3.2/
[hadoop@hadoop102 spark-3.0.0-bin-hadoop3.2]$ bin/spark-submit \
> --class org.apache.spark.examples.SparkPi \
> --master yarn \
> --deploy-mode cluster \
> ./examples/jars/spark-examples_2.12-3.0.0.jar \
> 10
到YARN WEB页面查看任务提交情况
[hadoop@hadoop102 conf]$ cd /opt/module/spark-3.0.0-bin-hadoop3.2/conf/
[hadoop@hadoop102 conf]$ vim hive-site.xml
内容:
<configuration>
<property>
<name>hive.metastore.warehouse.dirname>
<value>/warehousevalue>
property>
<property>
<name>hive.metastore.localname>
<value>falsevalue>
property>
<property>
<name>hive.metastore.urisname>
<value>thrift://hadoop102:9083value>
property>
configuration>
cd /opt/module/apache-hive-3.1.2-bin/conf
vim hive-site.xml
[hadoop@hadoop102 spark-3.0.0-bin-hadoop3.2]$ bin/spark-sql --master yarn
spark-sql> show databases;
spark-sql> select count(1)
> from dw_ods.ods_activity_info_full
> where dt='2023-12-07';
[hadoop@hadoop102 apache-hive-3.1.2-bin]$ bin/hive
hive> show databases;
hive> select count(1)
> from dw_ods.ods_activity_info_full
> where dt='2023-12-07';
优势在哪里??