• Java 爬虫 jvppeteer


    开源地址:https://gitee.com/fanyong920/jvppeteer

    maven依赖

    1. <!-- java爬虫 -->
    2. <dependency>
    3. <groupId>io.github.fanyong920</groupId>
    4. <artifactId>jvppeteer</artifactId>
    5. <version>1.1.3</version>
    6. </dependency>

    首次先下载Chrome启动器:

    //自动下载,第一次下载后不会再下载
    //BrowserFetcher.downloadIfNotExist(null);

    如果失败按命令提示手动下载安装即可。

    常用示例代码:

    1. package com.java.jvppeteer.controller;
    2. import com.java.jvppeteer.service.CommonService;
    3. import com.ruiyun.jvppeteer.core.Puppeteer;
    4. import com.ruiyun.jvppeteer.core.browser.Browser;
    5. import com.ruiyun.jvppeteer.core.page.Page;
    6. import com.ruiyun.jvppeteer.options.*;
    7. import org.springframework.beans.factory.annotation.Autowired;
    8. import org.springframework.http.HttpStatus;
    9. import org.springframework.http.ResponseEntity;
    10. import org.springframework.web.bind.annotation.GetMapping;
    11. import org.springframework.web.bind.annotation.PostMapping;
    12. import org.springframework.web.bind.annotation.RequestMapping;
    13. import org.springframework.web.bind.annotation.RestController;
    14. import java.util.ArrayList;
    15. @RestController
    16. @RequestMapping(value = "/common")
    17. public class CommonController {
    18. @PostMapping("/hello")
    19. public ResponseEntity<?> sayHello() {
    20. String value = "hello world!";
    21. return new ResponseEntity<>(value, HttpStatus.OK);
    22. }
    23. /**
    24. * 启动浏览器
    25. */
    26. @GetMapping("/start")
    27. public void start() {
    28. try {
    29. //设置基本的启动配置,这里选择了‘有头’模式启动
    30. ArrayList<String> argList = new ArrayList<>();
    31. //自动下载,第一次下载后不会再下载
    32. //BrowserFetcher.downloadIfNotExist(null);
    33. LaunchOptions options = new LaunchOptionsBuilder().withArgs(argList).withHeadless(false).build();
    34. argList.add("--no-sandbox");
    35. argList.add("--disable-setuid-sandbox");
    36. Puppeteer.launch(options);
    37. } catch (Exception e) {
    38. throw new RuntimeException(e);
    39. }
    40. }
    41. /**
    42. * 页面跳转
    43. * @param
    44. * @return void
    45. */
    46. @GetMapping("/go")
    47. public void go() {
    48. try {
    49. //自动下载,第一次下载后不会再下载
    50. //BrowserFetcher.downloadIfNotExist(null);
    51. ArrayList<String> argList = new ArrayList<>();
    52. // withHeadless 是否开启无头模式,无头模式不会显示浏览器
    53. LaunchOptions options = new LaunchOptionsBuilder().withArgs(argList).withHeadless(false).build();
    54. argList.add("--no-sandbox");
    55. argList.add("--disable-setuid-sandbox");
    56. Browser browser = Puppeteer.launch(options);
    57. Page page = browser.newPage();
    58. page.goTo("https://www.baidu.com/");
    59. } catch(Exception e){
    60. e.printStackTrace();
    61. }
    62. }
    63. /**
    64. * 生成pdf
    65. * @param
    66. * @return void
    67. */
    68. @GetMapping("/pdf")
    69. public void pdf() {
    70. try {
    71. ArrayList<String> arrayList = new ArrayList<>();
    72. //生成pdf必须在无头模式下才能生效
    73. LaunchOptions options = new LaunchOptionsBuilder().withArgs(arrayList).withHeadless(true).build();
    74. arrayList.add("--no-sandbox");
    75. arrayList.add("--disable-setuid-sandbox");
    76. Browser browser = Puppeteer.launch(options);
    77. Page page = browser.newPage();
    78. page.goTo("https://gitee.com/fanyong920/jvppeteer");
    79. PDFOptions pdfOptions = new PDFOptions();
    80. pdfOptions.setPath("/Users/mac/BOOTCAMP/jayce/jvppeteer/test.pdf");
    81. page.pdf(pdfOptions);
    82. page.close();
    83. } catch(Exception e){
    84. e.printStackTrace();
    85. }
    86. }
    87. /**
    88. * 性能分析
    89. * @param
    90. * @return void
    91. */
    92. @GetMapping("/tracing")
    93. public void tracing() {
    94. try {
    95. ArrayList<String> argList = new ArrayList<>();
    96. LaunchOptions options = new LaunchOptionsBuilder().withArgs(argList).withHeadless(true).build();
    97. argList.add("--no-sandbox");
    98. argList.add("--disable-setuid-sandbox");
    99. Browser browser = Puppeteer.launch(options);
    100. Page page = browser.newPage();
    101. //开启追踪
    102. page.tracing().start("/Users/mac/BOOTCAMP/jayce/jvppeteer/trace.json");
    103. page.goTo("https://www.baidu.com/?tn=98012088_10_dg&ch=3");
    104. page.tracing().stop();
    105. } catch(Exception e){
    106. e.printStackTrace();
    107. }
    108. }
    109. /**
    110. * 页面截图
    111. */
    112. @GetMapping("/screenshot")
    113. public void screenshot() {
    114. try {
    115. ArrayList<String> arrayList = new ArrayList<>();
    116. LaunchOptions options = new LaunchOptionsBuilder().withArgs(arrayList).withHeadless(true).build();
    117. arrayList.add("--no-sandbox");
    118. arrayList.add("--disable-setuid-sandbox");
    119. Browser browser = Puppeteer.launch(options);
    120. Page page = browser.newPage();
    121. page.goTo("https://www.baidu.com/?tn=98012088_10_dg&ch=3");
    122. ScreenshotOptions screenshotOptions = new ScreenshotOptions();
    123. //设置截图范围
    124. Clip clip = new Clip(1.0,1.56,400,400);
    125. screenshotOptions.setClip(clip);
    126. //设置存放的路径
    127. screenshotOptions.setPath("/Users/mac/BOOTCAMP/jayce/jvppeteer/test.png");
    128. page.screenshot(screenshotOptions);
    129. } catch(Exception e){
    130. e.printStackTrace();
    131. }
    132. }
    133. }

  • 相关阅读:
    20-SpringCloudAlibaba-2
    数聚携手永达汽车集团强势入选爱分析《商业智能实践案例》
    React - 路由 NavLink 使用 与 NavLink 组件封装使用(路由高亮)
    Arduino UNO + DS1302简单获取时间并串口打印
    Tomcat安装与配置
    AI大语言模型时代构建全新数据平台
    [office] excel成绩表格数据排名次的教程 #职场发展#知识分享#媒体
    微积分在神经网络中的本质
    目标检测介绍以及自动驾驶场景应用
    elementplus中el-checkout绑定0或者1时未渲染选中
  • 原文地址:https://blog.csdn.net/qq_32647655/article/details/136190106