- <dependency>
- <groupId>us.codecraft</groupId>
- <artifactId>webmagic-core</artifactId>
- <version>${webmagic.core.version}</version>
- <exclusions>
- <exclusion>
- <artifactId>slf4j-api</artifactId>
- <groupId>org.slf4j</groupId>
- </exclusion>
- </exclusions>
- </dependency>
- Request request = new Request();
- request.setMethod("GET");
- request.setUrl("http://***");
- request.addHeader("client_signature", content);
- Spider.create(new MPageProcessor())
- .addRequest(request)
- .addPipeline(new MPipeline(start))
- .thread(1)
- .start();
当然还有更多html内容的分析,看其它文章
- public class MPageProcessor implements PageProcessor {
- private Site site = Site.me().setSleepTime(3000).setTimeOut(120000);
- @Override
- public void process(Page page) {
- String text = page.getRawText();
- // System.out.println(text);
- JSONObject result = JSONUtil.parseObj(text);
- page.putField("data", result.getJSONObject("data"));
- }
-
- @Override
- public Site getSite() {
- return site;
- }
- }
- public class MPipeline implements Pipeline {
-
- private Date start;
- public MPipeline(Date start){
- this.start = start;
- }
-
- @Override
- public void process(ResultItems resultItems, Task task) {
- JSONObject data = resultItems.get("data");
- int total = data.getJSONObject("queryBean").getInt("total");
- System.out.println(total);
- Date end = new Date();
- System.out.println(DateUtil.format(end, "yyyy-MM-dd HH:mm:ss.SSSSSS"));
- System.out.println("总耗时:"+DateUtil.formatBetween(start, end, BetweenFormatter.Level.MILLISECOND));
- }
- }