创建索引
PUT /nba_20220101
{
"mappings": {
"properties": {
"age": {
"type": "integer"
},
"birthDay": {
"type": "date"
},
"birthDayStr": {
"type": "keyword"
},
"code": {
"type": "text"
},
"country": {
"type": "keyword"
},
"countryEn": {
"type": "keyword"
},
"displayAffiliation": {
"type": "text"
},
"displayName": {
"type": "text",
"analyzer": "ik_max_word_pinyin",
"fields" : {
"suggest" : {
"type" : "completion",
"analyzer" : "ik_smart_pinyin",
"preserve_separators" : true,
"preserve_position_increments" : true,
"max_input_length" : 50
}
}
},
"displayNameEn": {
"type": "text"
},
"draft": {
"type": "long"
},
"heightValue": {
"type": "float"
},
"jerseyNo": {
"type": "keyword"
},
"playYear": {
"type": "long"
},
"playerId": {
"type": "keyword"
},
"position": {
"type": "text"
},
"schoolType": {
"type": "text"
},
"teamCity": {
"type": "text"
},
"teamCityEn": {
"type": "text"
},
"teamConference": {
"type": "keyword"
},
"teamConferenceEn": {
"type": "keyword"
},
"teamName": {
"type": "keyword"
},
"teamNameEn": {
"type": "keyword"
},
"weight": {
"type": "text"
}
}
},
"settings" : {
"index" : {
"analysis" : {
"analyzer" : {
"default" : {
"type" : "ik_max_word"
}
}
}
},
"analysis": {
"analyzer": {
"ik_smart_pinyin": {
"type": "custom",
"tokenizer": "ik_smart",
"filter": ["my_pinyin"]
},
"ik_max_word_pinyin": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter": ["my_pinyin"]
}
},
"filter": {
"my_pinyin": {
"type" : "pinyin",
"keep_separate_first_letter" : false,
"keep_full_pinyin" : true,
"keep_original" : true,
"limit_first_letter_length" : 16,
"lowercase" : true,
"remove_duplicated_term" : true
}
}
}
}
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
- 100
- 101
- 102
- 103
- 104
- 105
- 106
- 107
- 108
- 109
- 110
- 111
- 112
- 113
- 114
- 115
- 116
- 117
- 118
- 119
- 120
- 121
- 122
- 123
自定义 ik_smart_pinyin ik_max_word_pinyin 分词器
- 分别基于
my_pinyin + ik_max_word 和 my_pinyin + ik_smart 创建自定义分词器 my_pinyin 定义拼音分词过滤displayName 字段使用 ik_max_word_pinyin 使用 拼音 汉字 双检索, 并 添加 suggest 检索补全
pinyin 插件可选参数说明
| 配置参数 | 说明 | 示例 |
|---|
| keep_first_letter | 保存第一个字母,默认值:true | 例如: 刘德华 > ldh |
| keep_separate_first_letter | 启用此选项后,将单独保留第一个字母, 默认值: false | 例如:刘德华 > l, d, h,(注意:由于词频太高,查询结果可能太模糊) |
| limit_first_letter_length | 设置 first_letter 结果的最大长度,默认值:16 | |
| keep_full_pinyin | 当启用此选项时,默认值:true | 例如:刘德华> [ liu, de, hua] |
| keep_joined_full_pinyin | 当启用此选项时,默认值:false | 例如:刘德华> [ liudehua] |
| keep_none_chinese | 结果保留非中文字母或数字,默认值:true | 例如:刘德华AT2016-> ldhat2016, 注意:keep_none_chinese应先启用keep_none_chinese_in_first_letter |
| keep_none_chinese_in_first_letter | 首字母保留非中文字母,默认值:true | 例如:刘德华AT2016-> ldhat2016 |
| keep_none_chinese_together | 保持非中文字母在一起,默认:true | 例如:DJ音乐家-> DJ, yin, yue, jia , 当设置为 时false,例如:DJ音乐家-> D, J, yin, yue, jia, |
| keep_none_chinese_in_joined_full_pinyin | 保留非中文字母加入全拼音,默认:false | 例如:刘德华2016-> liudehua2016 |
| none_chinese_pinyin_tokenize | 如果非中文字母是拼音,则将其拆分为单独的拼音词 ,默认:true | 例如:liudehuaalibaba13zhuanghan-> liu, de, hua, a, li, ba, , ba, 13, zhuang, han ( 注意: keep_none_chinese并且keep_none_chinese_together应首先启用 ) |
| keep_original | 启用此选项时,也将保留原始输入 ,默认值:false | |
| lowercase | 小写非汉字,默认:true | |
| trim_whitespace | 默认值:true | |
| remove_duplicated_term | 启用此选项时,将删除重复的术语以保存索引,默认值:false | 例如:de的> de (注意:位置相关查询可能会受到影响 ) |
| ignore_pinyin_offset | 6.0以后严格限制offset,不允许重叠token,有了这个参数,overlapping token会忽略offset ,默认值:true | 注意,所有position相关的query或者highlight都会出错,应该使用multi fields,不同的设置不同查询目的。如果需要偏移量,请将其设置为 false。 |
导入数据
POST /nba_20220101/_doc/566
{
"countryEn": "Croatia",
"teamName": "快船",
"birthDay": 858661200000,
"country": "克罗地亚",
"teamCityEn": "LA",
"code": "ivica_zubac",
"displayAffiliation": "Croatia",
"displayName": "伊维察 祖巴茨哥哥",
"schoolType": "",
"teamConference": "西部",
"teamConferenceEn": "Western",
"weight": "108.9 公斤",
"teamCity": "洛杉矶",
"playYear": 3,
"jerseyNo": "40",
"teamNameEn": "Clippers",
"draft": 2016,
"displayNameEn": "Ivica Zubac",
"heightValue": 2.16,
"birthDayStr": "1997-03-18",
"position": "中锋",
"age": 22,
"playerId": "1627826"
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
校验汉字分词是否有效
GET /nba_20220101/_search
{
"query": {
"match": {
"displayName": "伊维察"
}
}
}

校验拼音分词是否有效
GET /nba_20220101/_search
{
"query": {
"match": {
"displayName": "yi wei"
}
}
}

校验汉字补全是否生效
POST /nba_20220101/_search
{
"suggest": {
"my-suggestion": {
"text": "伊维",
"completion": {
"field": "displayName.suggest"
}
}
}
}

校验拼音补全是否生效
POST /nba_20220101/_search
{
"suggest": {
"my-suggestion": {
"text": "yi wei",
"completion": {
"field": "displayName.suggest"
}
}
}
}

注意拼音分词与中文分词不一致时(可以参考下方配置)

"analysis": {
"analyzer": {
"ik_smart_pinyin": {
"type": "custom",
"tokenizer": "ik_smart",
"filter": ["my_pinyin"]
},
"ik_max_word_pinyin": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter": ["my_pinyin"]
}
},
"filter": {
"my_pinyin": {
"type": "pinyin",
"keep_first_letter": false,
"keep_separate_first_letter": false,
"keep_full_pinyin": false,
"keep_original": true,
"limit_first_letter_length": 16,
"lowercase": true,
"remove_duplicated_term": true,
"keep_joined_full_pinyin": true,
"keep_none_chinese_together": true,
"none_chinese_pinyin_tokenize": false,
"keep_none_chinese":true,
"keep_none_chinese_in_joined_full_pinyin":true
}
}
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31