什么是分词?
把文本转换为一个个的单词,分词称之为analysis,es默认只对英文语句做分词,中文不支持,每个中文都会被拆分为独立的个体。
全局分析
POST /_analyze
{
"analyzer": "standard",
"text": "text文本"
}
演示:
结果:
{
"tokens":[
{
"token":"my",
"start_offset":0,
"end_offset":2,
"type":"",
"position":0
},
{
"token":"name",
"start_offset":3,
"end_offset":7,
"type":"",
"position":1
},
{
"token":"is",
"start_offset":8,
"end_offset":10,
"type":"",
"position":2
},
{
"token":"peter",
"start_offset":11,
"end_offset":16,
"type":"",
"position":3
},
{
"token":"parker",
"start_offset":17,
"end_offset":23,
"type":"",
"position":4
},
{
"token":"i",
"start_offset":24,
"end_offset":25,
"type":"",
"position":5
},
{
"token":"am",
"start_offset":26,
"end_offset":28,
"type":"",
"position":6
},
{
"token":"a",
"start_offset":29,
"end_offset":30,
"type":"",
"position":7
},
{
"token":"super",
"start_offset":31,
"end_offset":36,
"type":"",
"position":8
},
{
"token":"hero",
"start_offset":37,
"end_offset":41,
"type":"",
"position":9
},
{
"token":"i",
"start_offset":43,
"end_offset":44,
"type":"",
"position":10
},
{
"token":"don't",
"start_offset":45,
"end_offset":50,
"type":"",
"position":11
},
{
"token":"like",
"start_offset":51,
"end_offset":55,
"type":"",
"position":12
},
{
"token":"the",
"start_offset":56,
"end_offset":59,
"type":"",
"position":13
},
{
"token":"criminals",
"start_offset":60,
"end_offset":69,
"type":"",
"position":14
}
]
}
使用现有的索引分析
POST /my_doc/_analyze
{
"analyzer": "standard",
"field": "name",
"text": "text文本"
}
演示:
结果:
{
"tokens":[
{
"token":"my",
"start_offset":0,
"end_offset":2,
"type":"",
"position":0
},
{
"token":"name",
"start_offset":3,
"end_offset":7,
"type":"",
"position":1
},
{
"token":"is",
"start_offset":8,
"end_offset":10,
"type":"",
"position":2
},
{
"token":"peter",
"start_offset":11,
"end_offset":16,
"type":"",
"position":3
},
{
"token":"parker",
"start_offset":17,
"end_offset":23,
"type":"",
"position":4
},
{
"token":"i",
"start_offset":24,
"end_offset":25,
"type":"",
"position":5
},
{
"token":"am",
"start_offset":26,
"end_offset":28,
"type":"",
"position":6
},
{
"token":"a",
"start_offset":29,
"end_offset":30,
"type":"",
"position":7
},
{
"token":"super",
"start_offset":31,
"end_offset":36,
"type":"",
"position":8
},
{
"token":"hero",
"start_offset":37,
"end_offset":41,
"type":"",
"position":9
},
{
"token":"i",
"start_offset":43,
"end_offset":44,
"type":"",
"position":10
},
{
"token":"don't",
"start_offset":45,
"end_offset":50,
"type":"",
"position":11
},
{
"token":"like",
"start_offset":51,
"end_offset":55,
"type":"",
"position":12
},
{
"token":"the",
"start_offset":56,
"end_offset":59,
"type":"",
"position":13
},
{
"token":"criminals",
"start_offset":60,
"end_offset":69,
"type":"",
"position":14
}
]
}
es内置分词器
{
"analyzer": "standard",
"text": "My name is Peter Parker,I am a Super Hero. I don't like the Criminals."
}