• Spark(1)-wordCount入门


    1. 创建Maven项目

    1. "1.0" encoding="UTF-8"?>
    2. <project xmlns="http://maven.apache.org/POM/4.0.0"
    3. xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    4. xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    5. <modelVersion>4.0.0modelVersion>
    6. <groupId>com.wakedatagroupId>
    7. <artifactId>codeartifactId>
    8. <version>1.0-SNAPSHOTversion>
    9. <properties>
    10. <maven.compiler.source>8maven.compiler.source>
    11. <maven.compiler.target>8maven.compiler.target>
    12. <encoding>UTF-8encoding>
    13. <spark.version>3.4.1spark.version>
    14. <scala.version>2.12.14scala.version>
    15. properties>
    16. <dependencies>
    17. <dependency>
    18. <groupId>org.scala-langgroupId>
    19. <artifactId>scala-libraryartifactId>
    20. <version>${scala.version}version>
    21. dependency>
    22. <dependency>
    23. <groupId>org.apache.sparkgroupId>
    24. <artifactId>spark-core_2.12artifactId>
    25. <version>${spark.version}version>
    26. dependency>
    27. dependencies>
    28. <build>
    29. <sourceDirectory>src/main/scalasourceDirectory>
    30. <testSourceDirectory>src/test/scalatestSourceDirectory>
    31. <plugins>
    32. <plugin>
    33. <groupId>net.alchim31.mavengroupId>
    34. <artifactId>scala-maven-pluginartifactId>
    35. <version>3.2.2version>
    36. <executions>
    37. <execution>
    38. <goals>
    39. <goal>compilegoal>
    40. <goal>testCompilegoal>
    41. goals>
    42. <configuration>
    43. <args>
    44. <arg>-dependencyfilearg>
    45. <arg>${project.build.directory}/.scala_dependenciesarg>
    46. args>
    47. configuration>
    48. execution>
    49. executions>
    50. plugin>
    51. <plugin>
    52. <groupId>org.apache.maven.pluginsgroupId>
    53. <artifactId>maven-shade-pluginartifactId>
    54. <version>2.4.3version>
    55. <executions>
    56. <execution>
    57. <phase>packagephase>
    58. <goals>
    59. <goal>shadegoal>
    60. goals>
    61. <configuration>
    62. <filters>
    63. <filter>
    64. <artifact>*:*artifact>
    65. <excludes>
    66. <exclude>META-INF/*.SFexclude>
    67. <exclude>META-INF/*.DSAexclude>
    68. <exclude>META-INF/*.RSAexclude>
    69. excludes>
    70. filter>
    71. filters>
    72. <transformers>
    73. <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
    74. <resource>reference.confresource>
    75. transformer>
    76. <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
    77. <mainClass>cn.itcast.rpc.MastermainClass>
    78. transformer>
    79. transformers>
    80. configuration>
    81. execution>
    82. executions>
    83. plugin>
    84. plugins>
    85. build>
    86. project>

    2.目录结构

    3. 代码实现

    1. package sparkCore
    2. import org.apache.spark.rdd.RDD
    3. import org.apache.spark.{SparkConf, SparkContext}
    4. /***
    5. * 1. 创建SparkContext
    6. * 2. 创建RDD
    7. * 3. 调用RDD的Transformation算子
    8. * 4. 调用Action
    9. * 5. 释放资源
    10. */
    11. object wordcount_01 {
    12. def main(args: Array[String]): Unit = {
    13. val conf:SparkConf = new SparkConf().setAppName("WordCount").setMaster("local")
    14. //创建SparkContext,使⽤SparkContext来创建RDD
    15. val sc: SparkContext = new SparkContext(conf)
    16. //spark写Spark程序,就是对抽象的神奇的⼤集合【RDD】编程,调⽤它⾼度封装的API //使⽤SparkContext创建RDD
    17. val lines: RDD[String] = sc.textFile("./data/words.txt")
    18. //切分压平
    19. val words: RDD[String] = lines.flatMap(_.split(" "))
    20. 将单词和⼀组合放在元组中
    21. val wordsAndOne: RDD[(String, Int)] = words.map((_, 1))
    22. //分组聚合,reduceByKey可以先局部聚合再全局聚合
    23. val reduced: RDD[(String, Int)] = wordsAndOne.reduceByKey(_ + _)
    24. //排序
    25. val sorted: RDD[(String, Int)] = reduced.sortBy(_._2, false)
    26. //打印结果
    27. sorted.foreach(line => print(line))
    28. //释放资源
    29. sc.stop()
    30. }
    31. }

    运行结果: 

  • 相关阅读:
    2023 届校招薪资爆料汇总
    SQL底层执行原理
    SpringBoot实现过滤器
    C语言测试题:用冒泡法对输入的10个字符由小到大排序 ,要求数组做为函数参数。
    第三十七篇 Vue中封装Swiper组件
    Elastic:使用 Grafana 监视 Elasticsearch
    React Native Webview 中input type=file accept=“image/*“ 无法调起相机问题排查及解决
    杭州-区块链前瞻性论坛邀请函​
    Verilog写状态机的三种描述方式之三段式
    面试官:关于网络IO模型的原理如何理解,说说你的分析
  • 原文地址:https://blog.csdn.net/weixin_37901366/article/details/136420578