在pom.xml文件中添加所需插件
插入内容如下:
-
-
src/main/scala -
-
src/test/scala -
-
-
-
-
-
net.alchim31.maven -
-
scala-maven-plugin -
-
3.2.2 -
-
-
-
-
-
-
-
compile -
-
testCompile -
-
-
-
-
-
-
-
-dependencyfile -
-
${project.build.directory}/.scala_dependencies -
-
-
-
-
-
-
-
-
-
-
-
-
-
org.apache.maven.plugins -
-
maven-shade-plugin -
-
2.4.3 -
-
-
-
-
-
package -
-
-
-
shade -
-
-
-
-
-
-
-
-
-
*:* -
-
-
-
META-INF/*.SF -
META-INF/*.DSA -
META-INF/*.RSA -
-
-
-
-
- "org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
-
-
-
-
-
-
-
-
等待加载
# 3.读取数据文件,RDD可以简单的理解为是一个集合,集合中存放的元素是String类型
val data : RDD[String] = sparkContext.textFile(args(0))
# 7.把结果数据保存到HDFS上
result.saveAsTextFile(args(1))
# 修改以上这2行代码
打包成功标志: 显示BUILD SUCCESS,可以看到target目录下的2个jar包
$ start-all.sh
$ hadoop fs -rm /spark/test/words.txt
#先将解压的两个jar包复制出来
$ bin/spark-submit \
--master spark://master:7077 \
--executor-memory 1g \
--total-executor-cores 1 \
/opt/software/spark_chapter02-1.0-SNAPSHOT.jar \
/spark/test/words.txt \
/spark/test/out