• idea联合es 做出jd爬虫


    pom

     
            
                org.springframework.boot
                spring-boot-starter-data-elasticsearch
            
            
                org.springframework.boot
                spring-boot-starter-web
            
            
                org.projectlombok
                lombok
            
            
                com.alibaba
                fastjson
                1.2.62
            
    
            
                org.jsoup
                jsoup
                1.11.3
            
            
                org.springframework.boot
                spring-boot-starter-test
                test
            
        

    text

    package qywwc;
    
    import com.alibaba.fastjson.JSON;
    import net.minidev.json.JSONArray;
    import org.apache.lucene.search.join.QueryBitSetProducer;
    import org.apache.lucene.util.QueryBuilder;
    import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
    import org.elasticsearch.action.bulk.BulkRequest;
    import org.elasticsearch.action.bulk.BulkResponse;
    import org.elasticsearch.action.delete.DeleteRequest;
    import org.elasticsearch.action.delete.DeleteResponse;
    import org.elasticsearch.action.get.GetRequest;
    import org.elasticsearch.action.get.GetResponse;
    import org.elasticsearch.action.index.IndexRequest;
    import org.elasticsearch.action.index.IndexResponse;
    import org.elasticsearch.action.search.SearchRequest;
    import org.elasticsearch.action.search.SearchResponse;
    import org.elasticsearch.action.support.master.AcknowledgedResponse;
    import org.elasticsearch.action.update.UpdateRequest;
    import org.elasticsearch.action.update.UpdateResponse;
    import org.elasticsearch.client.Request;
    import org.elasticsearch.client.RequestOptions;
    import org.elasticsearch.client.RestHighLevelClient;
    import org.elasticsearch.client.indices.CreateIndexRequest;
    import org.elasticsearch.client.indices.CreateIndexResponse;
    import org.elasticsearch.client.indices.GetIndexRequest;
    
    import org.elasticsearch.common.xcontent.XContentType;
    import org.elasticsearch.index.query.MatchQueryBuilder;
    import org.elasticsearch.index.query.QueryBuilders;
    import org.elasticsearch.index.query.TermQueryBuilder;
    import org.elasticsearch.search.SearchHit;
    import org.elasticsearch.search.builder.SearchSourceBuilder;
    import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
    import org.junit.jupiter.api.Test;
    import org.springframework.beans.factory.annotation.Autowired;
    import org.springframework.boot.test.context.SpringBootTest;
    import qywwc.entity.User;
    
    import javax.naming.directory.SearchControls;
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.List;
    
    
    @SpringBootTest
    class QyWwcApplicationTests {
        @Autowired
        private RestHighLevelClient client;
        //模糊搜索
        @Test
        public void TextSearch()throws Exception{
            SearchRequest searchRequest = new SearchRequest("qy151-index");
            //创建条件对象
            SearchSourceBuilder builder = new SearchSourceBuilder();
            TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("name", "你");
            builder.query(termQueryBuilder);
    //分页
            builder.from(0);
            builder.size(1);
            //排序
    //        builder.sort("age");
            //高亮
            HighlightBuilder highlightBuilder=new HighlightBuilder();
            highlightBuilder.field("name");
            highlightBuilder.preTags("");
            highlightBuilder.postTags("");
            builder.highlighter(highlightBuilder);
    
            searchRequest.source(builder);
            SearchResponse search = client.search(searchRequest, RequestOptions.DEFAULT);
            System.out.println("总条数:"+search.getHits().getTotalHits().value);
            SearchHit[] hits = search.getHits().getHits();
            Arrays.stream(hits).forEach(item-> System.out.println(item.getSourceAsString()));
        }
        //批量添加
        @Test
        void testBuck()throws Exception{
            BulkRequest bulkItemResponses = new BulkRequest("qy151-index");
            List users = new ArrayList<>();
            users.add(new User("2","你","上海1",151));
            users.add(new User("3","你1","上海2",152));
            users.add(new User("4","你2","上海3",153));
            users.add(new User("5","你3","上海4",154));
            users.add(new User("6","你4","上海5",5));
            //users.stream().forEach(item->bulkItemResponses.add(new IndexRequest().id(item.getId()).source(JSON.toJSONString(item),XContentType.JSON)))
            for (User user:users){
                IndexRequest indexRequest = new IndexRequest();
                indexRequest.id(user.getId());
                indexRequest.source(JSON.toJSONString(user),XContentType.JSON);
                bulkItemResponses.add(indexRequest);
            }
            BulkResponse bulk = client.bulk(bulkItemResponses,RequestOptions.DEFAULT);
            System.out.println(bulk.hasFailures());
        }
        //文档修改
        @Test
        void textUpdate()throws Exception{
            UpdateRequest updateRequest = new UpdateRequest("qy151-index", "1");
            User user = new User();
            user.setName("王文超");
            updateRequest.doc(JSON.toJSONString(user), XContentType.JSON);
            UpdateResponse update = client.update(updateRequest, RequestOptions.DEFAULT);
            System.out.println(update.getResult());
        }
        //删除文档
        @Test
        void deleteDoc()throws Exception{
            DeleteRequest deleteRequest = new DeleteRequest("qy151-index");
            deleteRequest.id("1");
            DeleteResponse delete = client.delete(deleteRequest, RequestOptions.DEFAULT);
            System.out.println(delete.getResult());
        }
        @Test
        //判断索引文档是否存在
        void textExit()throws Exception{
            GetRequest getRequest = new GetRequest("qy151-index");
            getRequest.id("1");
            boolean exists = client.exists(getRequest, RequestOptions.DEFAULT);
            System.out.println(exists);
        }
        @Test
        //获取索引
        void GetIndex() throws Exception{
            GetRequest getRequest = new GetRequest("qy151-index");
            getRequest.id("1");
            GetResponse documentFields = client.get(getRequest, RequestOptions.DEFAULT);
    
            System.out.println(documentFields.getSourceAsMap().get("name"));
        }
        @Test
        //添加USer
        void add() throws Exception{
            IndexRequest indexRequest = new IndexRequest("qy151-index");
            indexRequest.id("1");//指定文档id
            //USer格式json
            indexRequest.source(JSON.toJSONString(new User(null,"张三","北京",15)),XContentType.JSON);
            IndexResponse index = client.index(indexRequest, RequestOptions.DEFAULT);
            System.out.println(index.getResult());
        }
    
    
    
        @Test
            //查看索引是否存在
        void textExits() throws Exception{
            GetIndexRequest getIndexRequest = new GetIndexRequest("qy151-index");
            boolean exists = client.indices().exists(getIndexRequest, RequestOptions.DEFAULT);
            System.out.println(exists);
        }
        @Test
        //删除索引
        void textDeleteIndex() throws Exception{
            DeleteIndexRequest deleteIndexRequest = new DeleteIndexRequest("qy151-index");
            AcknowledgedResponse delete = client.indices().delete(deleteIndexRequest, RequestOptions.DEFAULT);
            System.out.println(delete.isAcknowledged());
        }
        @Test
        void contextLoads() throws Exception{
            //创建索引
                //该类把索引的信息封装到该类中
                CreateIndexRequest indexRequest = new CreateIndexRequest("qy151-index");
                CreateIndexResponse createIndexResponse = client.indices().create(indexRequest, RequestOptions.DEFAULT);
                System.out.println(createIndexResponse.isAcknowledged());
        }
    
    
    }
    
    HtmlParseUtils爬虫
    public class HtmlParseUtils {
        public static List parseJd(String keyword) throws Exception{
            String path="https://search.jd.com/Search?keyword="+keyword;
            //Document整个网页对象
            Document document = Jsoup.parse(new URL(path), 30000);
            Element j_goodsList = document.getElementById("J_goodsList");
            Elements li = j_goodsList.getElementsByTag("li");
            ArrayList list = new ArrayList<>();
            for (Element element:li){
                String pprice = element.getElementsByClass("p-price").eq(0).text();
                String pname = element.getElementsByClass("p-name").eq(0).text();
                String pimg = element.getElementsByTag("img").eq(0).attr("data-lazy-img");
                list.add(new Prodect(pname,pprice,pimg));
            }
            return list;
        }
    }
    
  • 相关阅读:
    入侵检测——WebCrack
    【yolo系列:yolov7改进wise-iou】
    Flutter整体框架
    基于物联网的自动灌溉系统的设计与实现
    Mysql MMM
    low power-upf-vcsnlp(五)
    vue:实现锚点双向滚动/文章章节联动滚动效果
    js获取对象的属性值
    文章标题编号
    core-site.xml,yarn-site.xml,hdfs-site.xml,mapred-site.xml配置
  • 原文地址:https://blog.csdn.net/weixin_65942614/article/details/126377060