• 一零一九、岗位数据分析(Spark)


    目录

    建表语句

    原始数据

    数据分析

    完整代码


    分析岗位数据如下要求:

    分析不同学历的平均薪资(每个学历的平均估值(最高薪资平均值、最低薪资平均值求平均

    分析不同岗位的平均薪资(求每个学历的平均估值(最高薪资平均值、最低薪资平均值求平均

    分析各公司提供的岗位

    建表语句

    1. DROP TABLE IF EXISTS `job`;
    2. CREATE TABLE `job` (
    3. `address` varchar(255) DEFAULT NULL,
    4. `company` varchar(255) DEFAULT NULL,
    5. `edu` varchar(255) DEFAULT NULL,
    6. `jobName` varchar(255) DEFAULT NULL,
    7. `salary` varchar(255) DEFAULT NULL,
    8. `size` varchar(255) DEFAULT NULL
    9. ) ENGINE=InnoDB DEFAULT CHARSET=utf8;

    原始数据

    数据分析

    1、获取原始数据

    1. val source: RDD[Row] = sprak.read
    2. .format("jdbc")
    3. .option("url", "jdbc:mysql://localhost:3306/crawler")
    4. .option("dbtable", "job")
    5. .option("user", "root")
    6. .option("password", "123456")
    7. .load()
    8. .rdd
    9. source.foreach(println)

     可见,题目要求为最低工资平均值 与 最高工资平均值,先对数据格式进行处理一下

     2、格式化数据

    1. val data: RDD[(String, String, String, String, Float, Float, String)] = source.map(item => {
    2. val salary: Any = item(4)
    3. val min: String = salary.toString.split("-")(0)
    4. val minsalary: String =
    5. if (min.contains("面议")) min.substring(4)
    6. else min
    7. val max: Int = salary.toString.split("-")(1).length - 1
    8. val maxsalary: String = salary.toString.split("-")(1).substring(0, max)
    9. (item(0).toString, item(1).toString, item(2).toString, item(3).toString, minsalary.toFloat, maxsalary.toFloat, item(5).toString)
    10. })
    11. data.foreach(println)

     处理完数据后如图,并将其转为小数,方便后续运算

    3、转为DF,创建表

    1. import sprak.implicits._
    2. val dataDF: DataFrame = data.toDF("address", "company", "edu", "jobName", "minsalary", "maxsalary", "size")
    3. dataDF.createOrReplaceTempView("salary")

    4、分析不同学历的平均薪资每个学历的平均估值(最高薪资平均值、最低薪资平均值求平均))

    1. val sql1 =
    2. """
    3. |select s.edu,(s.avg_minsalary + s.avg_maxsalary)/2 as avgSalary from
    4. |(select edu,avg(minsalary) as avg_minsalary,avg(maxsalary) as avg_maxsalary from salary group by edu) s
    5. |""".stripMargin
    6. sprak.sql(sql1).show()

    5、分析不同岗位的平均薪资(求每个学历的平均估值(最高薪资平均值、最低薪资平均值求平均))

    1. val sql2 =
    2. """
    3. |select s.jobName,(s.avg_minsalary+s.avg_maxsalary)/2 as avgSalary from
    4. |(select jobName,avg(minsalary) as avg_minsalary,avg(maxsalary) as avg_maxsalary from salary group by jobName) s
    5. |""".stripMargin
    6. sprak.sql(sql2).show()

     6、分析各公司提供的岗位

    1. val sql3 =
    2. """
    3. |select company,count(jobName) as jobNum from salary group by company
    4. |""".stripMargin //分析不同岗位的平均薪资
    5. sprak.sql(sql3).show()

    完整代码

    1. package example.spark.test
    2. import org.apache.spark.rdd.RDD
    3. import org.apache.spark.sql.{DataFrame, Row, SparkSession}
    4. object WordCount {
    5. def main(args: Array[String]): Unit = {
    6. val sprak: SparkSession = SparkSession.builder().master("local[6]").appName("job")
    7. .config("spark.sql.warehouse.dir", "E:/")
    8. .getOrCreate()
    9. sprak.sparkContext.setLogLevel("ERROR")
    10. val source: RDD[Row] = sprak.read
    11. .format("jdbc")
    12. .option("url", "jdbc:mysql://localhost:3306/crawler")
    13. .option("dbtable", "job")
    14. .option("user", "root")
    15. .option("password", "123456")
    16. .load()
    17. .rdd
    18. source.foreach(println)
    19. println("------------------------------------------------------------------------------------")
    20. // val data: RDD[(String, String, String, String, Float, Float, String)] = source.map(item => {
    21. // var salary = item(4)
    22. // val minsalary: String = if (salary.toString.split("-")(0).contains("面议"))
    23. // salary.toString.split("-")(0).substring(4)
    24. // else salary.toString.split("-")(0)
    25. // val maxsalary: String = salary.toString.split("-")(1).substring(0, salary.toString.split("-")(1).length - 1)
    26. // (item(0).toString, item(1).toString, item(2).toString, item(3).toString, minsalary.toFloat, maxsalary.toFloat, item(5).toString)
    27. // })
    28. val data: RDD[(String, String, String, String, Float, Float, String)] = source.map(item => {
    29. val salary: Any = item(4)
    30. val min: String = salary.toString.split("-")(0)
    31. val minsalary: String =
    32. if (min.contains("面议")) min.substring(4)
    33. else min
    34. val max: Int = salary.toString.split("-")(1).length - 1
    35. val maxsalary: String = salary.toString.split("-")(1).substring(0, max)
    36. (item(0).toString, item(1).toString, item(2).toString, item(3).toString, minsalary.toFloat, maxsalary.toFloat, item(5).toString)
    37. })
    38. data.foreach(println)
    39. println("------------------------------------------------------------------------------------")
    40. import sprak.implicits._
    41. val dataDF: DataFrame = data.toDF("address", "company", "edu", "jobName", "minsalary", "maxsalary", "size")
    42. dataDF.createOrReplaceTempView("salary")
    43. val sql1 =
    44. """
    45. |select s.edu,(s.avg_minsalary + s.avg_maxsalary)/2 as avgSalary from
    46. |(select edu,avg(minsalary) as avg_minsalary,avg(maxsalary) as avg_maxsalary from salary group by edu) s
    47. |""".stripMargin
    48. sprak.sql(sql1).show()
    49. println("------------------------------------------------------------------------------------")
    50. val sql2 =
    51. """
    52. |select s.jobName,(s.avg_minsalary+s.avg_maxsalary)/2 as avgSalary from
    53. |(select jobName,avg(minsalary) as avg_minsalary,avg(maxsalary) as avg_maxsalary from salary group by jobName) s
    54. |""".stripMargin
    55. sprak.sql(sql2).show()
    56. println("------------------------------------------------------------------------------------")
    57. val sql3 =
    58. """
    59. |select company,count(jobName) as jobNum from salary group by company
    60. |""".stripMargin //分析不同岗位的平均薪资
    61. sprak.sql(sql3).show()
    62. }
    63. }

  • 相关阅读:
    meta视口标签
    Linux之(9)shell基础概念(1)
    3DConvCaps:3DUNet与卷积胶囊编码器用于医学图像分割
    【考研】操作系统——同步互斥问题(P、V操作)1
    LoadingCache
    【Linux基础】Linux的基本指令使用(超详细解析,小白必看系列)
    如何使用CSS实现一个带有动画效果的折叠面板(Accordion)?
    如何在opensea批量发布NFT(Rinkeby测试网)
    springcloud feign配置及调用
    软考高级之系统架构师之计算机基础
  • 原文地址:https://blog.csdn.net/m0_54925305/article/details/126039041