• Elasticsearch搜索功能的实现(五)-- 实战


    实战环境

    elastic search 8.5.0 + kibna 8.5.0 + springboot 3.0.2 + spring data elasticsearch 5.0.2 + jdk 17

    一、集成 spring data elasticsearch

    1 添加依赖

    <dependency>
        <groupId>org.springframework.bootgroupId>
        <artifactId>spring-boot-starter-data-elasticsearchartifactId>
    dependency>
    

    2 配置es连接

    @Configuration
    public class ElasticsearchConfig extends ElasticsearchConfiguration {
    
        @Override
        public ClientConfiguration clientConfiguration() {
    
                return ClientConfiguration.builder()
                        .connectedTo("127.0.0.1:9200")
                        .withBasicAuth("elastic", "********")
                        .build();
    
        }
    }
    

    3 配置打印DSL语句

    # 日志配置
    logging:
      level:
        #es日志
        org.springframework.data.elasticsearch.client.WIRE : trace
    

    二、index及mapping 文件编写

    @Data
    @Document(indexName = "news") //索引名
    @Setting(shards = 1,replicas = 0,refreshInterval = "1s") //shards 分片数 replicas 副本数
    @Schema(name = "News",description = "新闻对象")
    public class News implements Serializable {
    
        @Id  //索引主键
        @NotBlank(message = "新闻ID不能为空")
        @Schema(type = "integer",description = "新闻ID",example = "1")
        private Integer id;
    
        @NotBlank(message = "新闻标题不能为空")
        @Schema(type = "String",description = "新闻标题")
        @MultiField(mainField = @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart"),
                otherFields = {@InnerField(type = FieldType.Keyword, suffix = "keyword") }) //混合类型字段 指定 建立索引时分词器与搜索时入参分词器
        private String title;
    
        @Schema(type = "LocalDate",description = "发布时间")
        @Field(type = FieldType.Date,format = DateFormat.date)
        private LocalDate pubDate;
    
        @Schema(type = "String",description = "来源")
        @Field(type = FieldType.Keyword)
        private String source;
    
        @Schema(type = "String",description = "行业类型代码",example = "1,2,3")
        @Field(type = FieldType.Text,analyzer = "ik_max_word",searchAnalyzer = "ik_smart")
        private String industry;
    
        @Schema(type = "String",description = "预警类型")
        @Field(type = FieldType.Keyword)
        private String type;
    
        @Schema(type = "String",description = "涉及公司")
        @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart")
        private String companies;
    
        @Schema(type = "String",description = "新闻内容")
        @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart")
        private String content;
    
    }
    

    三、DAO层编写

    @Repository
    public interface NewsRepository extends ElasticsearchRepository<News,Integer> {
    
        Page<News> findByType(String type, Pageable pageable);
    }
    

    四、简单功能实现

    4.1 简单功能写法

        /**
         * 新增新闻
         * @param news
         * @return
         */
        @Override
        public void saveNews(News news) {
            newsRepository.save(news);
        }
    
        /**
         * 删除新闻
         * @param newsId
         */
        @Override
        public void delete(Integer newsId) {
            newsRepository.deleteById(newsId);
        }
    
        /**
         * 删除新闻索引
         */
        @Override
        public void deleteIndex() {
            operations.indexOps(News.class).delete();
        }
    
        /**
         * 创建索引
         */
        @Override
        public void createIndex() {
            operations.indexOps(News.class).createWithMapping();
        }
    
        @Override
        public PageResult findByType(String type) {
            // 先发布日期排序
            Sort sort = Sort.by(new Order(Sort.Direction.DESC, "pubDate"));
            Pageable pageable = PageRequest.of(0,10,sort);
            final Page<News> newsPage = newsRepository.findByType(type, pageable);
            return new PageResult(newsPage.getTotalElements(),newsPage.getContent());
    
        }
    

    实现效果图片:
    image

    实际执行的DSL语句:
    image

    注意: 当指定排序条件时 _score 会被置空

    4.2 搜索功能的实现

        @Override
        public PageResult searchNews(NewsPageSearch search) {
    
            //创建原生查询DSL对象
            final NativeQueryBuilder nativeQueryBuilder = new NativeQueryBuilder();
    
            // 先发布日期再得分排序
            Sort sort = Sort.by(new Order(Sort.Direction.DESC, "pubDate"),new Order(Sort.Direction.DESC, "_score"));
    
            Pageable pageable = PageRequest.of(search.getCurPage(), search.getPageSize(),sort);
    
    
            final BoolQuery.Builder boolBuilder = new BoolQuery.Builder();
            //过滤条件
            setFilter(search, boolBuilder);
    
            //关键字搜索
            if (StringUtils.isNotBlank(search.getKeyword())){
                setKeyWordAndHighlightField(search, nativeQueryBuilder, boolBuilder);
            }else {
                nativeQueryBuilder.withQuery(q -> q.bool(boolBuilder.build()));
            }
    
            nativeQueryBuilder.withPageable(pageable);
    
            SearchHits searchHits = operations.search(nativeQueryBuilder.build(), News.class);
            //高亮回填封装
            final List newsList = searchHits.getSearchHits().stream()
                    .map(s -> {
                        final News content = s.getContent();
                        final List title = s.getHighlightFields().get("title");
                        final List contentList = s.getHighlightFields().get("content");
                        if (!CollectionUtils.isEmpty(title)){
                            s.getContent().setTitle(title.get(0));
                        }
                        if (!CollectionUtils.isEmpty(contentList)){
                            s.getContent().setContent(contentList.get(0));
                        }
                        return content;
    
                    }).collect(Collectors.toList());
    
            return new PageResult(searchHits.getTotalHits(),newsList);
    
        }
    
        /**
         * 设置过滤条件 行业类型 来源 预警类型
         * @param search
         * @param boolBuilder
         */
        private void setFilter(NewsPageSearch search, BoolQuery.Builder boolBuilder) {
            //行业类型
            if(StringUtils.isNotBlank(search.getIndustry())){
                // 按逗号拆分
                List industryQueries = Arrays.asList(search.getIndustry().split(",")).stream().map(p -> {
                    Query.Builder queryBuilder = new Query.Builder();
                    queryBuilder.term(t -> t.field("industry").value(p));
                    return queryBuilder.build();
                }).collect(Collectors.toList());
                boolBuilder.filter(f -> f.bool(t -> t.should(industryQueries)));
            }
            // 来源
            if(StringUtils.isNotBlank(search.getSource())){
                // 按逗号拆分
                List sourceQueries = Arrays.asList(search.getSource().split(",")).stream().map(p -> {
                    Query.Builder queryBuilder = new Query.Builder();
                    queryBuilder.term(t -> t.field("source").value(p));
                    return queryBuilder.build();
                }).collect(Collectors.toList());
                boolBuilder.filter(f -> f.bool(t -> t.should(sourceQueries)));
            }
            // 预警类型
            if(StringUtils.isNotBlank(search.getType())){
                // 按逗号拆分
                List typeQueries = Arrays.asList(search.getType().split(",")).stream().map(p -> {
                    Query.Builder queryBuilder = new Query.Builder();
                    queryBuilder.term(t -> t.field("type").value(p));
                    return queryBuilder.build();
                }).collect(Collectors.toList());
                boolBuilder.filter(f -> f.bool(t -> t.should(typeQueries)));
            }
    
            //范围区间
            if (StringUtils.isNotBlank(search.getStartDate())){
                boolBuilder.filter(f -> f.range(r -> r.field("pubDate")
                        .gte(JsonData.of(search.getStartDate()))
                        .lte(JsonData.of(search.getEndDate()))));
            }
        }
    
        /**
         * 关键字搜索 title 权重更高
         * 高亮字段  title 、content
         * @param search
         * @param nativeQueryBuilder
         * @param boolBuilder
         */
        private void setKeyWordAndHighlightField(NewsPageSearch search, NativeQueryBuilder nativeQueryBuilder, BoolQuery.Builder boolBuilder) {
            final String keyword = search.getKeyword();
            //查询条件
            boolBuilder.must(b -> b.multiMatch(m -> m.fields("title","content","companies").query(keyword)));
    
            //高亮
            final HighlightFieldParameters.HighlightFieldParametersBuilder builder = HighlightFieldParameters.builder();
            builder.withPreTags("")
                    .withPostTags("")
                    .withRequireFieldMatch(true) //匹配才加标签
                    .withNumberOfFragments(0); //显示全文
            final HighlightField titleHighlightField = new HighlightField("title", builder.build());
            final HighlightField contentHighlightField = new HighlightField("content", builder.build());
            final Highlight titleHighlight = new Highlight(List.of(titleHighlightField,contentHighlightField));
    
            nativeQueryBuilder.withQuery(
                            f -> f.functionScore(
                                    fs -> fs.query(q -> q.bool(boolBuilder.build()))
                                            .functions( FunctionScore.of(func -> func.filter(
                                                            fq -> fq.match(ft -> ft.field("title").query(keyword))).weight(100.0)),
                                                    FunctionScore.of(func -> func.filter(
                                                            fq -> fq.match(ft -> ft.field("content").query(keyword))).weight(20.0)),
                                                    FunctionScore.of(func -> func.filter(
                                                            fq -> fq.match(ft -> ft.field("companies").query(keyword))).weight(10.0)))
                                            .scoreMode(FunctionScoreMode.Sum)
                                            .boostMode(FunctionBoostMode.Sum)
                                            .minScore(1.0)))
                    .withHighlightQuery(new HighlightQuery(titleHighlight,News.class));
    
        }
    

    实现效果

    加权前效果:
    image

    加权后效果:
    image

    DSL 语句:

    {
    	"from": 0,
    	"size": 6,
    	"sort": [{
    		"pubDate": {
    			"mode": "min",
    			"order": "desc"
    		}
    	}, {
    		"_score": {
    			"order": "desc"
    		}
    	}],
    	"highlight": {
    		"fields": {
    			"title": {
    				"number_of_fragments": 0,
    				"post_tags": [""],
    				"pre_tags": [""]
    			},
    			"content": {
    				"number_of_fragments": 0,
    				"post_tags": [""],
    				"pre_tags": [""]
    			}
    		}
    	},
    	"query": {
    		"function_score": {
    			"boost_mode": "sum",
    			"functions": [{
    				"filter": {
    					"match": {
    						"title": {
    							"query": "立足优势稳住外贸基本盘"
    						}
    					}
    				},
    				"weight": 100.0
    			}, {
    				"filter": {
    					"match": {
    						"content": {
    							"query": "立足优势稳住外贸基本盘"
    						}
    					}
    				},
    				"weight": 20.0
    			}, {
    				"filter": {
    					"match": {
    						"companies": {
    							"query": "立足优势稳住外贸基本盘"
    						}
    					}
    				},
    				"weight": 10.0
    			}],
    			"min_score": 1.0,
    			"query": {
    				"bool": {
    					"filter": [{
    						"bool": {
    							"should": [{
    								"term": {
    									"industry": {
    										"value": "1"
    									}
    								}
    							}, {
    								"term": {
    									"industry": {
    										"value": "2"
    									}
    								}
    							}, {
    								"term": {
    									"industry": {
    										"value": "3"
    									}
    								}
    							}]
    						}
    					}, {
    						"bool": {
    							"should": [{
    								"term": {
    									"source": {
    										"value": "新华社"
    									}
    								}
    							}, {
    								"term": {
    									"source": {
    										"value": "中国经济网"
    									}
    								}
    							}]
    						}
    					}, {
    						"bool": {
    							"should": [{
    								"term": {
    									"type": {
    										"value": "经济简报"
    									}
    								}
    							}, {
    								"term": {
    									"type": {
    										"value": "外贸简报"
    									}
    								}
    							}]
    						}
    					}, {
    						"range": {
    							"pubDate": {
    								"gte": "2023-03-29",
    								"lte": "2023-03-30"
    							}
    						}
    					}],
    					"must": [{
    						"multi_match": {
    							"fields": ["title", "content", "companies"],
    							"query": "立足优势稳住外贸基本盘"
    						}
    					}]
    				}
    			},
    			"score_mode": "sum"
    		}
    	},
    	"track_scores": false,
    	"version": true
    }
    

    4.3 接口测试

    image

  • 相关阅读:
    算法刷题:经典TopK问题整理
    mybatis学习记录(四)-----MyBatis核心配置文件详解
    BERT: 面向语言理解的深度双向Transformer预训练
    VMware Workstation 12 安装windows_server_2016
    leetcode698. 划分为k个相等的子集
    【全民编程】《软件编程-讲课视频》【零基础入门到实战应用】
    如何实现通过Leaflet加载dwg格式的CAD图
    列表和标签企业报告版的完整报告解决方案
    Glide - Android的图像加载和缓存库,专注于平滑滚动
    Java线程池
  • 原文地址:https://www.cnblogs.com/gdwkong/p/17331639.html