Local模式的本质就是启动一个JVM Process进程(里面有多个进程),执行任务Task
Local角色分布:
tar -zxvf spark-3.2.0-bin-hadoop3.2.tgz -C /export/server/
mv spark-3.2.0-bin-hadoop3.2/ spark-3.2.0
vim /etc/profile
#SPARK_HOME
export HADOOP_HOME=/export/server/spark-3.2.0
#pyspark虚拟环境
export PYSPARK_PYTHON=/export/server/anaconda3/envs/pyspark/bin/python3.8
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
source /etc/profile
#同样也需要在~/.bashrc上配置
cd /export/server/spark-3.2.0/bin
./pyspark
#所有进程执行
./pyspark --master local[*]
4. 测试数据
sc.parallelize([1,2,3,4,5]).map(lambda x: 2 * x + 1).collect()
#输出
[3, 5, 7, 9, 11]
6. Scala环境测试
./spark-shell
sc.parallelize(Array(1,2,3,4,5)).map(x => 2*x + 1).collect()
#输出
res0: Array[Int] = Array(3, 5, 7, 9, 11)
7. spark-submit提交代码测试
./spark-submit --master local[*] /export/server/spark-3.2.0/examples/src/main/python/pi.py 10