实在是受不了内外网导出导入jar包了,心一横,写了一个安26个字母排序扒maven中央仓所有jar的代码。
- "1.0" encoding="UTF-8"?>
"http://maven.apache.org/POM/4.0.0" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-
4.0.0 -
-
org.bullgod -
MavenRepoBaLaLa -
1.0-SNAPSHOT -
-
-
17 -
17 -
UTF-8 -
-
-
-
-
-
-
org.jsoup -
jsoup -
1.16.1 -
-
-
org.apache.commons -
commons-collections4 -
4.4 -
-
-
commons-io -
commons-io -
2.11.0 -
-
-
commons-lang -
commons-lang -
2.1 -
-
-
org.slf4j -
slf4j-api -
2.0.5 -
-
-
-
org.slf4j -
slf4j-reload4j -
2.0.5 -
-
-
-
- package org.bullgod;
-
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
-
- import java.io.*;
- import java.net.SocketTimeoutException;
- import java.net.URL;
- import java.text.SimpleDateFormat;
- import java.util.Date;
- import java.util.concurrent.ExecutorService;
- import java.util.concurrent.Executors;
-
- public class MavenBalabalaThread {
-
-
- public static void main(String[] args) throws IOException {
-
-
- ExecutorService es = Executors.newFixedThreadPool(30 );
-
- char cc = 'a';
- for (int i = 0; i < 26; i++) {
- char dd = (char) (cc + i);//强制类型转化
- String ccc = String.valueOf(dd);
- es.submit(new Task(ccc));
- }
- // 关闭线程池:
- es.shutdown();
- }
- }
-
- class Task implements Runnable {
-
- /**
- * 爬取根目录
- */
- //private static final String ROOT = "https://repo.maven.apache.org/maven2/";
- private static final String ROOT = "https://repo1.maven.org/maven2/";
- /**
- * 硬盘存取根目录
- */
- private static final String DiskROOT = "D:\\maven2\\";
- /**
- * maven-metadata.xml文件名
- */
- private static final String MAVEN_METADATA_XML_FILENAME = "maven-metadata.xml";
-
- /**
- * 全部顶层索引文件
- */
- private static final String indexfilename = "maven2Indexall.txt";
-
- String firstAlpaca = "all"; //all 全部爬取,失败概率大,建议分字母 a,b,c...爬取
-
- public Task(String args) throws IOException {
-
- firstAlpaca = args;
- }
-
- /**
- * 查询子url
- *
- * @param url 当前url
- * @param sleepMillis 睡眠毫秒数
- */
- private static void findSubUrl(String url, int sleepMillis) {
- try {
- Thread.sleep(sleepMillis);
- Document doc = null;
- boolean needreconnect = true;
- while (needreconnect) {
- try {
- doc = Jsoup.connect(url).userAgent("Mozilla").timeout(5000).get();
- } catch (SocketTimeoutException te) {
- //链接超时,等待重连,10秒
- Thread.sleep(10 * 1000);
- //System.out.println("链接超时,等待重连,10秒");
- System.out.println("链接超时,等待10秒重连");
- needreconnect = true;
- continue;
- }
- needreconnect = false;
- }
-
- Elements links = doc.select("#contents a");
- for (Element link : links) {
- String pathorfilename = link.attr("href");
- if (pathorfilename.equals("../")) {
- //上级目录,不处理
- continue;
- }
- //创建文件夹
- //获得绝对URL
- String absUrl = link.absUrl("href");
- System.out.println(absUrl);
- System.out.println("{}" + absUrl);
-
- //获得保存文件路径
- int urllen = ROOT.length();
- String pathName = absUrl.substring(urllen);
- java.util.Date day = new Date();
- SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- String nowtime = sdf.format(day);
- System.out.println("[" + nowtime + "]: " + pathName);
- System.out.println("[{}]: " + nowtime + "{}" + pathName);
-
- //判断是目录还是文件
- int ret = pathorfilename.indexOf("/");
- if (ret == -1) {
- String saveFile = DiskROOT + pathName;
- File f1 = null;
- //是文件,不是目录
- //储存网络文件到硬盘
- while (true) {
- try {
- f1 = new File(saveFile);
- if (!f1.exists()) {
- //文件不存在才下载
- URL httpurl = new URL(absUrl);
- BufferedInputStream bis = new BufferedInputStream(httpurl.openStream());
- FileOutputStream fis = new FileOutputStream(saveFile);
- byte[] buffer = new byte[1024];
- int count = 0;
- while ((count = bis.read(buffer, 0, 1024)) != -1) {
- fis.write(buffer, 0, count);
- }
- fis.close();
- bis.close();
- break;
- }
- } catch (IOException e) {
- System.out.println("下载文件失败:{}" + saveFile);
- if (f1.exists()) {
- f1.delete();
- }
- Thread.sleep(10 * 1000);
- //System.out.println("链接超时,等待重连,10秒");
- System.out.println("文件下载失败,等待10秒重新下载");
- //重新下载
- continue;
- }
- }
- } else {
- //目录
- //创建硬盘目录
- String filePath = DiskROOT + pathName;
- File f2 = new File(filePath);
- if (!f2.exists()) {
- boolean flag2 = f2.mkdir();
- if (!flag2) {
- //System.out.println( "文件夹创建失败:"+filePath);
- System.out.println("创建文件失败:{}" + filePath);
- }
- }
- //递归处理
- findSubUrl(absUrl, sleepMillis);
- }
- }
- } catch (IOException | InterruptedException e) {
- e.printStackTrace();
- }
- }
-
- private static void searchdir(String rooturl, String dir, int sleepMillis) {
- String filePath = DiskROOT + dir;
- File f = new File(filePath);
- if (!f.exists()) {
- boolean flag2 = f.mkdir();
- if (!flag2) {
- System.out.println("文件夹创建失败:{}" + filePath);
- }
- }
- String suburl = rooturl + dir;
- findSubUrl(suburl, sleepMillis);
- }
-
- @Override
- public void run() {
-
- System.out.println("Beging crawler:beging with {}" + firstAlpaca);
- int sleepMillis = 100;
- String rooturl = ROOT;
- // findSubUrl(rooturl, sleepMillis); //直接爬取全部
-
- File file = new File(DiskROOT + indexfilename);
- try {
- BufferedReader br = new BufferedReader(new FileReader(file));
- String st;
- while ((st = br.readLine()) != null) {
- System.out.println(st);
- String dir = st.trim();
- if (firstAlpaca.equals("all") || firstAlpaca.equals("ALL")) {
- searchdir(rooturl, dir, sleepMillis);
- } else {
- int index = dir.toLowerCase().indexOf(firstAlpaca);
- if (index == 0) {
- //首字母合格
- searchdir(rooturl, dir, sleepMillis);
- }
- }
- }
- } catch (FileNotFoundException e) {
- System.out.println("找不到文件:{}" + indexfilename);
- } catch (IOException ie) {
- System.out.println("使用文件失败:{}" + indexfilename);
- }
- System.out.println("End crawler");
- }
- }
-