- "1.0" encoding="UTF-8"?>
- <project xmlns="http://maven.apache.org/POM/4.0.0"
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0modelVersion>
-
- <groupId>com.wakedatagroupId>
- <artifactId>codeartifactId>
- <version>1.0-SNAPSHOTversion>
-
-
- <properties>
- <maven.compiler.source>8maven.compiler.source>
- <maven.compiler.target>8maven.compiler.target>
- <encoding>UTF-8encoding>
- <spark.version>3.4.1spark.version>
- <scala.version>2.12.14scala.version>
- properties>
-
- <dependencies>
-
- <dependency>
- <groupId>org.scala-langgroupId>
- <artifactId>scala-libraryartifactId>
- <version>${scala.version}version>
- dependency>
-
-
- <dependency>
- <groupId>org.apache.sparkgroupId>
- <artifactId>spark-core_2.12artifactId>
- <version>${spark.version}version>
- dependency>
- dependencies>
-
- <build>
-
- <sourceDirectory>src/main/scalasourceDirectory>
- <testSourceDirectory>src/test/scalatestSourceDirectory>
-
- <plugins>
- <plugin>
- <groupId>net.alchim31.mavengroupId>
- <artifactId>scala-maven-pluginartifactId>
- <version>3.2.2version>
- <executions>
- <execution>
- <goals>
- <goal>compilegoal>
- <goal>testCompilegoal>
- goals>
- <configuration>
- <args>
-
- <arg>-dependencyfilearg>
- <arg>${project.build.directory}/.scala_dependenciesarg>
- args>
- configuration>
- execution>
- executions>
- plugin>
-
-
- <plugin>
- <groupId>org.apache.maven.pluginsgroupId>
- <artifactId>maven-shade-pluginartifactId>
- <version>2.4.3version>
- <executions>
- <execution>
- <phase>packagephase>
- <goals>
- <goal>shadegoal>
- goals>
- <configuration>
- <filters>
- <filter>
- <artifact>*:*artifact>
- <excludes>
- <exclude>META-INF/*.SFexclude>
- <exclude>META-INF/*.DSAexclude>
- <exclude>META-INF/*.RSAexclude>
- excludes>
- filter>
- filters>
- <transformers>
- <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
- <resource>reference.confresource>
- transformer>
- <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
- <mainClass>cn.itcast.rpc.MastermainClass>
- transformer>
- transformers>
- configuration>
- execution>
- executions>
- plugin>
- plugins>
- build>
-
- project>

- package sparkCore
-
- import org.apache.spark.rdd.RDD
- import org.apache.spark.{SparkConf, SparkContext}
-
-
- /***
- * 1. 创建SparkContext
- * 2. 创建RDD
- * 3. 调用RDD的Transformation算子
- * 4. 调用Action
- * 5. 释放资源
- */
-
- object wordcount_01 {
-
- def main(args: Array[String]): Unit = {
-
- val conf:SparkConf = new SparkConf().setAppName("WordCount").setMaster("local")
- //创建SparkContext,使⽤SparkContext来创建RDD
- val sc: SparkContext = new SparkContext(conf)
-
- //spark写Spark程序,就是对抽象的神奇的⼤集合【RDD】编程,调⽤它⾼度封装的API //使⽤SparkContext创建RDD
- val lines: RDD[String] = sc.textFile("./data/words.txt")
-
- //切分压平
- val words: RDD[String] = lines.flatMap(_.split(" "))
-
- 将单词和⼀组合放在元组中
- val wordsAndOne: RDD[(String, Int)] = words.map((_, 1))
-
- //分组聚合,reduceByKey可以先局部聚合再全局聚合
- val reduced: RDD[(String, Int)] = wordsAndOne.reduceByKey(_ + _)
-
- //排序
- val sorted: RDD[(String, Int)] = reduced.sortBy(_._2, false)
-
- //打印结果
- sorted.foreach(line => print(line))
-
- //释放资源
- sc.stop()
-
- }
-
- }
运行结果:
