一、爬虫爬取的招聘网站数据
二、在MySQL中创建空表
- SET FOREIGN_KEY_CHECKS=0;
-
- -- ----------------------------
- -- Table structure for jd_jobs
- -- ----------------------------
- DROP TABLE IF EXISTS `jd_jobs`;
- CREATE TABLE `jd_jobs` (
- `job_name` text,
- `job_date` text,
- `minSale` text,
- `maxSale` text,
- `job_city` text,
- `job_area` text,
- `company_nature` text,
- `company_size_min` text,
- `company_size_max` text,
- `company_Industry` text
- ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
三、通过Spark导入招聘数据到MySQL中
以下是完整代码:
-
- import java.util.Properties
-
-
-
- import com.niit.util.{LoggerLevel, SparkUnit}
-
-
-
- object SparkSQL_MySQL extends LoggerLevel {
-
- def main(args: Array[String]): Unit = {
-
- /**
- * 读取数据,写入MySQL
- *
- * 实现:
- * 1. 读取文件
- * 2. 转为table操作对象
- * 3. 写SQL查询
- * 4. 保存查询SQL的对象
- **/
-
-
-
- val ss = SparkUnit.getLocalSparkSession("OffLine")
-
-
-
- // 读取两个文件
-
- val path = "D:\\temp\\"
-
- val jd_jobs = ss.read
-
- .option("header", false)
-
- .option("encoding","gbk")
-
- .option("delimiter", ",")
-
- .csv(path + "51_jobs_data.csv").toDF("job_name", "job_date", "minSale","maxSale","job_city","job_area","company_nature","company_size_min","company_size_max","company_Industry")
-
- jd_jobs.show()
-
- // SQL式编程需要转为table结构
-
- jd_jobs.createTempView("jd_jobs")
-
-
-
- // select
-
- val jobs = ss.sql(
-
- """
- | SELECT job_name, job_date, minSale, maxSale,job_city,job_area,
- | company_nature,company_size_min,company_size_max,company_Industry
- | from jd_jobs
- |""".stripMargin)
-
-
-
- // JDBC
-
- val url = "jdbc:mysql://localhost:3306/bigdata?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&serverTimezone=UTC"
-
- val tb = "jd_jobs"
-
- val properties = new Properties()
-
- properties.setProperty("user", "root")
-
- properties.setProperty("password", "123456")
-
- properties.setProperty("driverClass", "com.mysql.jdbc.Driver")
-
-
-
- // save
-
- jobs.write.mode("overwrite").jdbc(url, tb, properties)
-
-
-
- // tips :除了查询SQL返回对象,然后保存对象
-
- // 还可以直接create table as select ... 一步到位
-
- }
-
- }
四、导入MySQL