• Elasticsearch 带中文分词的全文检索(分页+高亮返回)


    一.全文搜索介绍

    Full text queries 全文搜索主要有以下几种类型:

    1.1 匹配查询(match query)

    1. QueryBuilder qb = matchQuery(
    2. "name", //field 字段
    3. "kimchy elasticsearch" // text
    4. );

    DSL 查询语句:

    1. GET /_search
    2. {
    3. "query": {
    4. "match" : {
    5. "message" : "this is a test"
    6. }
    7. }
    8. }

    1.2 多字段查询(multi_match query)

    可以用来对多个字段的版本进行匹配查询

    1. QueryBuilder qb = multiMatchQuery(
    2. "kimchy elasticsearch", //text
    3. "user", "message" //fields 多个字段
    4. );

    DSL查询语句:

    1. GET /_search
    2. {
    3. "query": {
    4. "multi_match" : {
    5. "query": "this is a test",
    6. "fields": [ "subject", "message" ]
    7. }
    8. }
    9. }

    1.3 常用术语查询(common_terms query)

    可以对一些比较专业的偏门词语进行的更加专业的查询

    1. QueryBuilder qb = commonTermsQuery(
    2. "name", //field 字段
    3. "kimchy"); // value

    DSL查询语句:

    1. GET /_search
    2. {
    3. "query": {
    4. "common": {
    5. "body": {
    6. "query": "this is bonsai cool",
    7. "cutoff_frequency": 0.001
    8. }
    9. }
    10. }
    11. }

    1.4 查询语句查询(query_string query)

           与lucene查询语句的语法结合的更加紧密的一种查询,允许你在一个查询语句中使用多个特殊条件关键字(如:AND|OR|NOT )对多个字段进行查询,这种查询仅限专家用户去使用。

    QueryBuilder qb = queryStringQuery("+kimchy -elasticsearch");    //text

    DSL查询语句:

    1. GET /_search
    2. {
    3. "query": {
    4. "query_string" : {
    5. "default_field" : "content",
    6. "query" : "this AND that OR thus"
    7. }
    8. }
    9. }

            以上四种是全文搜索可以用到的查询方式,但是一般使用多字段查询(multi_match query)比较多,这里重点写下第二种方式的使用。

    二.使用multi_match query的方式实现全文多字段的匹配查询

    2.1 检索服务

    实现一个关键字分词匹配多个字段,分页查询,命中字段高亮显示

    1. private SearchDto getResult(ShipQueryDto shipQueryDto, String indexName, Class clazz) throws IOException, IllegalAccessException {
    2. SearchRequest searchRequest = new SearchRequest();
    3. searchRequest.indices(indexName);
    4. SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
    5. /* 高亮查询 */
    6. HighlightBuilder highlightBuilder = new HighlightBuilder();
    7. highlightBuilder.numOfFragments(0); /*长度*/
    8. highlightBuilder.preTags("");
    9. highlightBuilder.postTags("");
    10. highlightBuilder.highlighterType("plain");
    11. for (String name : EsSmartIndexHelper.classMapMap.get(clazz).keySet()) {
    12. highlightBuilder.field(name).requireFieldMatch(false);
    13. }
    14. sourceBuilder.highlighter(highlightBuilder);
    15. BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
    16. if(StringUtils.isNotEmpty(shipQueryDto.getKeys())){
    17. boolQueryBuilder.must(QueryBuilders.multiMatchQuery(shipQueryDto.getKeys()).fields(EsSmartIndexHelper.classMapMap.get(clazz)).type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
    18. // .minimumShouldMatch("70%")
    19. //使用最细粒度分词搜索
    20. .analyzer("ik_max_word").operator(Operator.OR)
    21. );
    22. }
    23. sourceBuilder.query(boolQueryBuilder);
    24. // 分页
    25. Integer from = (shipQueryDto.getPageNum()-1) * shipQueryDto.getPageSize();
    26. sourceBuilder.from(from);
    27. sourceBuilder.size(shipQueryDto.getPageSize());
    28. sourceBuilder.trackTotalHits(true);
    29. searchRequest.source(sourceBuilder);
    30. log.error("查询的DSL语句: " + searchRequest.source().toString());
    31. SearchResponse searchRes = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
    32. log.error("返回原始数据 : " + searchRes);
    33. SearchHit[] hits = searchRes.getHits().getHits();
    34. List searchShipCbgkDtos = new ArrayList<>();
    35. for (SearchHit hit : hits) {
    36. String json = hit.getSourceAsString();
    37. T shipDto = JSONObject.parseObject(json, clazz);
    38. //获取高亮字段
    39. Map highlightFields = hit.getHighlightFields();
    40. if(CollectionUtil.isNotEmpty(highlightFields)){
    41. //获取class子类的字段
    42. Field[] fields =clazz.getDeclaredFields();
    43. //获取class继承父类的字段
    44. Field[] fields1 = clazz.getSuperclass().getDeclaredFields();
    45. //字段高亮处理
    46. for (Field field : fields1) {
    47. field.setAccessible(true);
    48. if (highlightFields.containsKey(field.getName())){
    49. HighlightField highlightField = highlightFields.get(field.getName());
    50. Text[] fragments = highlightField.fragments();
    51. StringBuilder text = new StringBuilder();
    52. for (Text fragment : fragments) {
    53. text.append(fragment.toString());
    54. }
    55. field.set(shipDto, text.toString());
    56. }
    57. }
    58. for (Field field : fields) {
    59. field.setAccessible(true);
    60. if (highlightFields.containsKey(field.getName())){
    61. HighlightField highlightField = highlightFields.get(field.getName());
    62. Text[] fragments = highlightField.fragments();
    63. StringBuilder text = new StringBuilder();
    64. for (Text fragment : fragments) {
    65. text.append(fragment.toString());
    66. }
    67. field.set(shipDto, text.toString());
    68. }
    69. }
    70. }
    71. searchShipCbgkDtos.add(shipDto);
    72. }
    73. SearchDto searchDto = new SearchDto<>();
    74. searchDto.setTotal(searchRes.getHits().getTotalHits().value);
    75. searchDto.setSearchShips(searchShipCbgkDtos);
    76. return searchDto;
    77. }
    78. @Override
    79. public SearchDto searchShip(ShipQueryDto shipQueryDto) throws IOException, IllegalAccessException {
    80. return getResult(shipQueryDto, EsIndex.INDEX_SEAT_SEARCH_SHIP_CBGK.getStatus(), SearchShipCbgkDto.class);
    81. }
    1. import lombok.Data;
    2. import java.util.List;
    3. /**
    4. * 搜索返回实体
    5. * @param
    6. */
    7. @Data
    8. public class SearchDto {
    9. /** 该库数量 */
    10. private Long total;
    11. /** 该库返回列表 */
    12. private List searchShips;
    13. }
    1. import java.util.HashMap;
    2. import java.util.Map;
    3. /**
    4. * 全文搜索匹配的字段和权重
    5. */
    6. public class EsSmartIndexHelper {
    7. public static Map shipCbgkfields = new HashMap();
    8. public static HashMapextends BaseSearchDto>, Map> classMapMap = new HashMapextends BaseSearchDto>, Map>();
    9. static {
    10. //船舶库
    11. classMapMap.put(SearchShipCbgkDto.class, shipCbgkfields);
    12. shipCbgkfields.put("shipName", 2.5f);
    13. // "shipId",
    14. shipCbgkfields.put("shipRegistryPort", 1.8f);
    15. // "shipOwnerId",
    16. shipCbgkfields.put("shipOwnerName", 1.5f);
    17. shipCbgkfields.put("shipOwnerSex", 1f);
    18. shipCbgkfields.put("shipOwnerTel", 1f);
    19. shipCbgkfields.put("shipOwnerIdNumber", 1.1f);
    20. shipCbgkfields.put("deptId", 1f);
    21. shipCbgkfields.put("createTime", 1f);
    22. shipCbgkfields.put("bdsTerminalNo", 1.3f);
    23. shipCbgkfields.put("mmsi", 1.3f);
    24. }
    25. }

    2.2 检索的DSL语句

    Get       /index/queryShip?keys=琼海口渔&pageNum=1&pageSize=10
    1. GET index_test_search_ship/_search
    2. {
    3. "from": 0,
    4. "size": 20,
    5. "query": {
    6. "bool": {
    7. "must": [{
    8. "multi_match": {
    9. "query": "琼海口渔",
    10. "fields": ["bdsTerminalNo^1.3", "createTime^1.0", "deptId^1.0", "mmsi^1.3", "shipName^2.5", "shipOwnerIdNumber^1.1", "shipOwnerName^1.5", "shipOwnerSex^1.0", "shipOwnerTel^1.0", "shipRegistryPort^1.8"],
    11. "type": "cross_fields",
    12. "operator": "OR",
    13. "analyzer": "ik_max_word",
    14. "slop": 0,
    15. "prefix_length": 0,
    16. "max_expansions": 50,
    17. "zero_terms_query": "NONE",
    18. "auto_generate_synonyms_phrase_query": true,
    19. "fuzzy_transpositions": true,
    20. "boost": 1.0
    21. }
    22. }],
    23. "adjust_pure_negative": true,
    24. "boost": 1.0
    25. }
    26. },
    27. "track_total_hits": 2147483647,
    28. "highlight": {
    29. "pre_tags": [""],
    30. "post_tags": [""],
    31. "number_of_fragments": 0,
    32. "type": "plain",
    33. "require_field_match": false,
    34. "fields": {
    35. "shipOwnerName": {},
    36. "shipOwnerTel": {},
    37. "createTime": {},
    38. "mmsi": {},
    39. "bdsTerminalNo": {},
    40. "deptId": {},
    41. "shipName": {},
    42. "shipOwnerSex": {},
    43. "shipOwnerIdNumber": {},
    44. "shipRegistryPort": {}
    45. }
    46. }
    47. }

    2.3 返回的原始JSON数据

    1. {
    2. "took" : 4,
    3. "timed_out" : false,
    4. "_shards" : {
    5. "total" : 1,
    6. "successful" : 1,
    7. "skipped" : 0,
    8. "failed" : 0
    9. },
    10. "hits" : {
    11. "total" : {
    12. "value" : 20,
    13. "relation" : "eq"
    14. },
    15. "max_score" : 7.7624564,
    16. "hits" : [
    17. {
    18. "_index" : "index_test_search_ship",
    19. "_type" : "_doc",
    20. "_id" : "bpMhcYQB4gQEvltnaqX-",
    21. "_score" : 7.7624564,
    22. "_source" : {
    23. "shipId" : "01",
    24. "shipName" : "琼海口渔",
    25. "shipOwnerName" : "李宁",
    26. "shipOwnerTel" : "15173934187",
    27. "shipOwnerIdNumber" : "430525199408136134",
    28. "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
    29. },
    30. "highlight" : {
    31. "shipName" : [
    32. "琼海口渔"
    33. ]
    34. }
    35. },
    36. {
    37. "_index" : "index_test_search_ship",
    38. "_type" : "_doc",
    39. "_id" : "b5MhcYQB4gQEvltnbaUM",
    40. "_score" : 7.7624564,
    41. "_source" : {
    42. "shipId" : "01",
    43. "shipName" : "琼海口渔",
    44. "shipOwnerName" : "李宁",
    45. "shipOwnerTel" : "15173934187",
    46. "shipOwnerIdNumber" : "430525199408136134",
    47. "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
    48. },
    49. "highlight" : {
    50. "shipName" : [
    51. "琼海口渔"
    52. ]
    53. }
    54. },
    55. {
    56. "_index" : "index_test_search_ship",
    57. "_type" : "_doc",
    58. "_id" : "U5PBb4QB4gQEvltnIKV-",
    59. "_score" : 7.0790462,
    60. "_source" : {
    61. "shipId" : "01",
    62. "shipName" : "013234琼海口渔",
    63. "shipOwnerName" : "李宁",
    64. "shipOwnerTel" : "15173934187",
    65. "shipOwnerIdNumber" : "430525199408136134",
    66. "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
    67. },
    68. "highlight" : {
    69. "shipName" : [
    70. "013234琼海口渔"
    71. ]
    72. }
    73. },
    74. {
    75. "_index" : "index_test_search_ship",
    76. "_type" : "_doc",
    77. "_id" : "VJPEb4QB4gQEvltnm6Uz",
    78. "_score" : 7.0790462,
    79. "_source" : {
    80. "shipId" : "01",
    81. "shipName" : "013913琼海口渔",
    82. "shipOwnerName" : "李宁",
    83. "shipOwnerTel" : "15173934187",
    84. "shipOwnerIdNumber" : "430525199408136134",
    85. "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
    86. },
    87. "highlight" : {
    88. "shipName" : [
    89. "013913琼海口渔"
    90. ]
    91. }
    92. },
    93. {
    94. "_index" : "index_test_search_ship",
    95. "_type" : "_doc",
    96. "_id" : "bZMhcYQB4gQEvltnQKVb",
    97. "_score" : 7.0790462,
    98. "_source" : {
    99. "shipId" : "01",
    100. "shipName" : "琼海口渔013",
    101. "shipOwnerName" : "",
    102. "shipOwnerTel" : "15173934187",
    103. "shipOwnerIdNumber" : "430525199408136134",
    104. "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
    105. },
    106. "highlight" : {
    107. "shipName" : [
    108. "琼海口渔013"
    109. ]
    110. }
    111. },
    112. {
    113. "_index" : "index_test_search_ship",
    114. "_type" : "_doc",
    115. "_id" : "a5MccYQB4gQEvltnY6Ur",
    116. "_score" : 7.0790462,
    117. "_source" : {
    118. "shipId" : "01",
    119. "shipName" : "琼海口渔013",
    120. "shipOwnerName" : "李宁",
    121. "shipOwnerTel" : "15173934187",
    122. "shipOwnerIdNumber" : "430525199408136134",
    123. "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
    124. },
    125. "highlight" : {
    126. "shipName" : [
    127. "琼海口渔013"
    128. ]
    129. }
    130. },
    131. {
    132. "_index" : "index_test_search_ship",
    133. "_type" : "_doc",
    134. "_id" : "bJMccYQB4gQEvltnZaV1",
    135. "_score" : 7.0790462,
    136. "_source" : {
    137. "shipId" : "01",
    138. "shipName" : "琼海口渔013",
    139. "shipOwnerName" : "李宁",
    140. "shipOwnerTel" : "15173934187",
    141. "shipOwnerIdNumber" : "430525199408136134",
    142. "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
    143. },
    144. "highlight" : {
    145. "shipName" : [
    146. "琼海口渔013"
    147. ]
    148. }
    149. },
    150. {
    151. "_index" : "index_test_search_ship",
    152. "_type" : "_doc",
    153. "_id" : "VZPbb4QB4gQEvltnraU6",
    154. "_score" : 6.506234,
    155. "_source" : {
    156. "shipId" : "01",
    157. "shipName" : "013913琼海口渔",
    158. "shipOwnerName" : "013913琼海口渔",
    159. "shipOwnerTel" : "15173934187",
    160. "shipOwnerIdNumber" : "430525199408136134",
    161. "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
    162. },
    163. "highlight" : {
    164. "shipOwnerName" : [
    165. "013913琼海口渔"
    166. ],
    167. "shipName" : [
    168. "013913琼海口渔"
    169. ]
    170. }
    171. },
    172. {
    173. "_index" : "index_test_search_ship",
    174. "_type" : "_doc",
    175. "_id" : "apMWcYQB4gQEvltnT6Vt",
    176. "_score" : 6.019184,
    177. "_source" : {
    178. "shipId" : "01",
    179. "shipName" : "琼海口渔013 李宁",
    180. "shipOwnerName" : "12341",
    181. "shipOwnerTel" : "15173934187",
    182. "shipOwnerIdNumber" : "430525199408136134",
    183. "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
    184. },
    185. "highlight" : {
    186. "shipName" : [
    187. "琼海口渔013 李宁"
    188. ]
    189. }
    190. },
    191. {
    192. "_index" : "index_test_search_ship",
    193. "_type" : "_doc",
    194. "_id" : "cpNRcYQB4gQEvltnQ6Xw",
    195. "_score" : 6.019184,
    196. "_source" : {
    197. "shipId" : "01",
    198. "shipName" : "琼海口渔013 李宁",
    199. "shipOwnerName" : "李宁",
    200. "shipOwnerTel" : "15173934187",
    201. "shipOwnerIdNumber" : "430525199408136134",
    202. "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
    203. },
    204. "highlight" : {
    205. "shipName" : [
    206. "琼海口渔013 李宁"
    207. ]
    208. }
    209. }
    210. ]
    211. }
    212. }

    2.4 接收格式化后返回的接口数据

    1. {
    2. "code": "SUCCESS",
    3. "businessCode": "0",
    4. "message": "操作成功",
    5. "data": {
    6. "total": 20,
    7. "searchShips": [
    8. {
    9. "shipId": "01",
    10. "shipName": "琼海口渔",
    11. "shipRegistryPort": null,
    12. "shipOwnerId": null,
    13. "shipOwnerName": "李宁",
    14. "shipOwnerSex": null,
    15. "shipOwnerTel": "15173934187",
    16. "shipOwnerIdNumber": "430525199408136134",
    17. "deptId": null,
    18. "createTime": null,
    19. "bdsTerminalNo": null,
    20. "mmsi": null
    21. },
    22. {
    23. "shipId": "01",
    24. "shipName": "琼海口渔",
    25. "shipRegistryPort": null,
    26. "shipOwnerId": null,
    27. "shipOwnerName": "李宁",
    28. "shipOwnerSex": null,
    29. "shipOwnerTel": "15173934187",
    30. "shipOwnerIdNumber": "430525199408136134",
    31. "deptId": null,
    32. "createTime": null,
    33. "bdsTerminalNo": null,
    34. "mmsi": null
    35. },
    36. {
    37. "shipId": "01",
    38. "shipName": "013234琼海口渔",
    39. "shipRegistryPort": null,
    40. "shipOwnerId": null,
    41. "shipOwnerName": "李宁",
    42. "shipOwnerSex": null,
    43. "shipOwnerTel": "15173934187",
    44. "shipOwnerIdNumber": "430525199408136134",
    45. "deptId": null,
    46. "createTime": null,
    47. "bdsTerminalNo": null,
    48. "mmsi": null
    49. },
    50. {
    51. "shipId": "01",
    52. "shipName": "013913琼海口渔",
    53. "shipRegistryPort": null,
    54. "shipOwnerId": null,
    55. "shipOwnerName": "李宁",
    56. "shipOwnerSex": null,
    57. "shipOwnerTel": "15173934187",
    58. "shipOwnerIdNumber": "430525199408136134",
    59. "deptId": null,
    60. "createTime": null,
    61. "bdsTerminalNo": null,
    62. "mmsi": null
    63. },
    64. {
    65. "shipId": "01",
    66. "shipName": "琼海口渔013",
    67. "shipRegistryPort": null,
    68. "shipOwnerId": null,
    69. "shipOwnerName": "",
    70. "shipOwnerSex": null,
    71. "shipOwnerTel": "15173934187",
    72. "shipOwnerIdNumber": "430525199408136134",
    73. "deptId": null,
    74. "createTime": null,
    75. "bdsTerminalNo": null,
    76. "mmsi": null
    77. },
    78. {
    79. "shipId": "01",
    80. "shipName": "琼海口渔013",
    81. "shipRegistryPort": null,
    82. "shipOwnerId": null,
    83. "shipOwnerName": "李宁",
    84. "shipOwnerSex": null,
    85. "shipOwnerTel": "15173934187",
    86. "shipOwnerIdNumber": "430525199408136134",
    87. "deptId": null,
    88. "createTime": null,
    89. "bdsTerminalNo": null,
    90. "mmsi": null
    91. },
    92. {
    93. "shipId": "01",
    94. "shipName": "琼海口渔013",
    95. "shipRegistryPort": null,
    96. "shipOwnerId": null,
    97. "shipOwnerName": "李宁",
    98. "shipOwnerSex": null,
    99. "shipOwnerTel": "15173934187",
    100. "shipOwnerIdNumber": "430525199408136134",
    101. "deptId": null,
    102. "createTime": null,
    103. "bdsTerminalNo": null,
    104. "mmsi": null
    105. },
    106. {
    107. "shipId": "01",
    108. "shipName": "013913琼海口渔",
    109. "shipRegistryPort": null,
    110. "shipOwnerId": null,
    111. "shipOwnerName": "013913琼海口渔",
    112. "shipOwnerSex": null,
    113. "shipOwnerTel": "15173934187",
    114. "shipOwnerIdNumber": "430525199408136134",
    115. "deptId": null,
    116. "createTime": null,
    117. "bdsTerminalNo": null,
    118. "mmsi": null
    119. },
    120. {
    121. "shipId": "01",
    122. "shipName": "琼海口渔013 李宁",
    123. "shipRegistryPort": null,
    124. "shipOwnerId": null,
    125. "shipOwnerName": "12341",
    126. "shipOwnerSex": null,
    127. "shipOwnerTel": "15173934187",
    128. "shipOwnerIdNumber": "430525199408136134",
    129. "deptId": null,
    130. "createTime": null,
    131. "bdsTerminalNo": null,
    132. "mmsi": null
    133. },
    134. {
    135. "shipId": "01",
    136. "shipName": "琼海口渔013 李宁",
    137. "shipRegistryPort": null,
    138. "shipOwnerId": null,
    139. "shipOwnerName": "李宁",
    140. "shipOwnerSex": null,
    141. "shipOwnerTel": "15173934187",
    142. "shipOwnerIdNumber": "430525199408136134",
    143. "deptId": null,
    144. "createTime": null,
    145. "bdsTerminalNo": null,
    146. "mmsi": null
    147. }
    148. ]
    149. }
    150. }

    可以看出,返回的字段,凡是命中关键词分词其中一个的都做高亮处理了。

  • 相关阅读:
    Golang 中 map[string]string 如何在 TOML 文件中配置
    docker安装Prometheus+Grafana监控系统(SpringBoot集成监控)
    LVS集群
    软件设计师_操作系统基本原理_学习笔记
    python-SQLite更新的操作没有保存
    3个云渲染平台的价格体系,哪个最合适(四)
    Maven创建项目【带有目录结构】
    itk中图像2d-3d配准整理
    婴儿摇铃玩具亚马逊审查要求做CPC认证标准要求
    Spring源码学习笔记13——总结篇, 从IOC到AOP
  • 原文地址:https://blog.csdn.net/linhaiyun_ytdx/article/details/127955266