主机名:cmcc01为例
操作系统:centos7
| 安装部署软件 | 版本 | 部署方式 |
| centos | 7 | |
| zookeeper | zookeeper-3.4.10 | 伪分布式 |
| hadoop | hadoop-3.1.3 | 伪分布式 |
| hive | hive-3.1.3-bin | 伪分布式 |
| clickhouse | 21.11.10.1-2 | 单节点多实例 |
| dolphinscheduler | 3.0.0 | 单节点 |
| kettle | pdi-ce-9.3.0.0 | 单节点 |
| sqoop | sqoop-1.4.7 | 单节点 |
| seatunnel | seatunnel-incubating-2.1.2 | 单节点 |
| spark | spark- 2.4.8-bin-hadoop2.7.tgz | 单节点 |
seatunnel:https://seatunnel.incubator.apache.org/download
spark:https://archive.apache.org/dist/spark/
解压:
- # 解压
- tar zxf apache-seatunnel-incubating-2.1.2-bin.tar.gz -C /opt/software/
- tar zxf spark-2.4.8-bin-hadoop2.7.tgz -C /opt/software/
-
- vim ~/.bash_profile
-
- # spark
- export SPARK_HOME=/opt/software/spark-2.4.8-bin-hadoop2.7
- export PATH=$PATH:${SPARK_HOME}/bin
-
- # seatunnel
- export SEATUNNEL_HOME=/opt/software/apache-seatunnel-incubating-2.1.2
- export PATH=$PATH:${SEATUNNEL_HOME}/bin
- # 使用环境变量生效
- source ~/.bash_profile
- cd /opt/software/spark-2.4.8-bin-hadoop2.7/conf
-
- # 复制配置文件
- cp spark-env.sh.template spark-env.sh
-
- vim spark-env.sh
-
- # 添加以下内容
- export HADOOP_CONF_DIR=/opt/software/hadoop-3.1.3/etc/hadoop
- export YARN_CONF_DIR=/opt/software/hadoop-3.1.3/etc/hadoop
- export HADOOP_OPTS="-Djava.library.path=/opt/software/hadoop-3.1.3/lib/native"
-
- # 修改hive配置文件
- vim /opt/software/hive-3.1.3-bin/conf/hive-site.xml
-
- 添加元数据库配置
- <property>
- <name>hive.metastore.urisname>
-
- <value>thrift://cmcc01:9083value>
- property>
- # 创建hive配置文件软连接
- ln -s /opt/software/hive-3.1.3-bin/conf/hive-site.xml /opt/software/spark-2.4.8-bin-hadoop2.7/conf
-
- # 复制mysql 驱动包到spark的jar目录下
- cp /opt/package/mysql-connector-java-8.0.20.jar /opt/software/spark-2.4.8-bin-hadoop2.7/jars
- # 启动metastore
- nohup hive --service metastore > ${HIVE_HOME}/logs/metastore.log 2>&1 &
- # 添加启动命令到启动脚本
- vim /opt/software/start_hiveserver2.sh
- # 添加以下内容
-
- #!bin/bash
-
- # 启动hiveserver2
- nohup ${HIVE_HOME}/bin/hiveserver2 > ${HIVE_HOME}/logs/hiveserver2.log 2>&1 &
-
- # 启动metastore
- nohup hive --service metastore > ${HIVE_HOME}/logs/metastore.log 2>&1 &
-
- # beeline -u jdbc:hive2://cmcc01:10000/default -n root

- vim /opt/software/apache-seatunnel-incubating-2.1.2/config/hive-console.conf
- # 添加以下内容
- env {
- spark.app.name = "SeaTunnel"
- spark.executor.instances = 1
- spark.executor.cores = 1
- spark.num.executors=1
- spark.executor.memory = "1g"
- execution.parallelism = 1
-
- }
-
- source {
- hive {
- pre_sql = "select id, name,age from stg.student01"
- result_table_name = "student01_log"
- }
-
- }
-
- transform {
-
- }
-
- sink {
- Console{}
- }
准备hive数据集
- CREATE TABLE `stg.student01`
- (
- `id` int,
- `name` string,
- `age` int
- )
- row format delimited fields terminated by ","
- STORED AS textfile;
-
- INSERT INTO `stg`.`student01` VALUES (1, '张三', 20),(2, '李四', 21),(3, '五王', 22);
start-seatunnel-spark.sh --master yarn --deploy-mode client --config /opt/software/apache-seatunnel-incubating-2.1.2/config/hive-console.conf
测试成功截图
