• 下载xlsx中的URL到指定目录


      爱情是灯,友情是影子,当灯灭了,你会发现你的周围都是影子。朋友,是在最后可以给你力量的人

    1. import org.apache.poi.ss.usermodel.*;
    2. import org.apache.poi.xssf.usermodel.XSSFWorkbook;
    3. import java.io.*;
    4. import java.net.HttpURLConnection;
    5. import java.net.URL;
    6. import java.util.ArrayList;
    7. import java.util.List;
    8. import java.util.concurrent.ExecutorService;
    9. import java.util.concurrent.Executors;
    10. public class XLSXDownloader {
    11. public static void main(String[] args) {
    12. String xlsxFilePath = "C:\\Users\\Administrator\\Desktop\\123.xlsx"; // 指定XLSX文件路径
    13. String downloadDirectory = "F:\\123\\"; // 指定下载目录
    14. int maxThreads = 5; // 最大线程数
    15. long startTime = System.currentTimeMillis(); // 记录开始时间
    16. List failedRows = new ArrayList<>(); // 用于存储下载失败的行号
    17. List failedUrls = new ArrayList<>(); // 用于存储下载失败的URL
    18. try {
    19. FileInputStream fileInputStream = new FileInputStream(xlsxFilePath);
    20. Workbook workbook = new XSSFWorkbook(fileInputStream);
    21. Sheet sheet = workbook.getSheetAt(0); // 假设数据在第一个工作表中
    22. int totalRows = sheet.getPhysicalNumberOfRows(); // 总表行数
    23. ExecutorService executorService = Executors.newFixedThreadPool(maxThreads);
    24. for (int currentRow = 0; currentRow < totalRows; currentRow++) {
    25. Row row = sheet.getRow(currentRow);
    26. for (Cell cell : row) {
    27. if (cell.getCellType() == CellType.STRING) {
    28. String cellValue = cell.getStringCellValue();
    29. // 检查单元格内容是否包含 "http://" 或 "https://"
    30. if (cellValue.contains("http://") || cellValue.contains("https://")) {
    31. final int finalCurrentRow = currentRow;
    32. executorService.submit(() -> {
    33. try {
    34. downloadFile(cellValue, downloadDirectory, finalCurrentRow, failedUrls);
    35. } catch (IOException e) {
    36. System.err.println("下载失败:" + cellValue);
    37. failedRows.add(finalCurrentRow); // 记录下载失败的行号
    38. failedUrls.add(cellValue); // 记录下载失败的URL
    39. }
    40. });
    41. }
    42. }
    43. }
    44. }
    45. executorService.shutdown();
    46. while (!executorService.isTerminated()) {
    47. // 等待所有线程完成
    48. }
    49. if (!failedRows.isEmpty()) {
    50. generateErrorTable(failedRows, failedUrls, downloadDirectory);
    51. }
    52. long endTime = System.currentTimeMillis(); // 记录结束时间
    53. long totalTime = endTime - startTime; // 计算总耗时时间(毫秒)
    54. long totalTimeInMinutes = totalTime / (60 * 1000); // 转换成分钟
    55. System.out.println("下载完成。总耗时时间:" + totalTimeInMinutes + " 分钟");
    56. } catch (IOException e) {
    57. e.printStackTrace();
    58. }
    59. }
    60. private static void downloadFile(String urlString, String downloadDirectory, int currentRow, List failedUrls) throws IOException {
    61. URL url = new URL(urlString);
    62. HttpURLConnection connection = (HttpURLConnection) url.openConnection();
    63. connection.setRequestMethod("GET");
    64. int responseCode = connection.getResponseCode();
    65. if (responseCode == HttpURLConnection.HTTP_OK) {
    66. String fileName = urlString.substring(urlString.lastIndexOf('/') + 1);
    67. String subDirectory = urlString.substring(urlString.indexOf(".com/") + 5, urlString.lastIndexOf('/') + 1);
    68. String filePath = downloadDirectory + File.separator + subDirectory + fileName;
    69. File file = new File(filePath);
    70. file.getParentFile().mkdirs(); // 创建目录层级
    71. // 输出当前线程下载的是总表的第几行和总表的进度
    72. System.out.println("线程正在下载总表的第 " + (currentRow + 1) + " 行,总进度 " + (currentRow + 1) + "/" + failedUrls.size());
    73. try (InputStream inputStream = connection.getInputStream();
    74. FileOutputStream outputStream = new FileOutputStream(filePath)) {
    75. byte[] buffer = new byte[1024];
    76. int bytesRead;
    77. while ((bytesRead = inputStream.read(buffer)) != -1) {
    78. outputStream.write(buffer, 0, bytesRead);
    79. }
    80. }
    81. } else {
    82. throw new IOException("下载失败,HTTP响应码:" + responseCode);
    83. }
    84. }
    85. private static void generateErrorTable(List failedRows, List failedUrls, String downloadDirectory) throws IOException {
    86. String errorTableFilePath = downloadDirectory + File.separator + "error_table.xlsx";
    87. Workbook workbook = new XSSFWorkbook();
    88. Sheet sheet = workbook.createSheet("Errors");
    89. int rowNum = 0;
    90. for (int i = 0; i < failedRows.size(); i++) {
    91. int row = failedRows.get(i);
    92. String url = failedUrls.get(i);
    93. Row newRow = sheet.createRow(rowNum++);
    94. Cell cell1 = newRow.createCell(0);
    95. cell1.setCellValue("下载失败的行:" + (row + 1));
    96. Cell cell2 = newRow.createCell(1);
    97. cell2.setCellValue("下载失败的URL:" + url);
    98. }
    99. try (FileOutputStream outputStream = new FileOutputStream(errorTableFilePath)) {
    100. workbook.write(outputStream);
    101. }
    102. }
    103. }

  • 相关阅读:
    【Linux】进程优先级PRI
    4.2 Serializable Concept
    MCU测试科普|如何进行MCU芯片测试,具体流程是什么?
    蛋白纯化-实验设计
    【用unity实现100个游戏之12】unity制作一个俯视角2DRPG《类星露谷物语》资源收集游戏demo
    【SQL server】数据库入门基本操作教学
    pytorch从零实现resnet
    为什么私有化的知识库平台对企业来说更靠谱呢?
    JavaWeb面试
    Nginx快速上手
  • 原文地址:https://blog.csdn.net/s_sos0/article/details/132843898