• ElasticSearch7.3学习(二十七)----聚合概念(bucket和metric)及其示例


    一、两个核心概念:bucket和metric

    1.1 bucket

    有如下数据

    cityname 
    北京张三 
    北京李四
    天津王五
    天津赵六
    天津王麻子

    划分出来两个bucket,一个是北京bucket,一个是天津bucket

    北京bucket:包含了2个人,张三,李四

    上海bucket:包含了3个人,王五,赵六,王麻子

    1.2 metric

    metric,就是对一个bucket执行的某种聚合分析的操作,比如说求平均值,求最大值,求最小值

    比如下面的一个sql语句

    select count(*) from book group studymodel

    bucket:group by studymodel --> 那些studymodel相同的数据,就会被划分到一个bucket中

    metric:count(*),对每个bucket中所有的数据,计算一个数量。例如avg(),sum(),max(),min()

    二、聚合示例

    2.1 数据准备

    首先创建book索引

    1. PUT /book/
    2. {
    3. "settings": {
    4. "number_of_shards": 1,
    5. "number_of_replicas": 0
    6. },
    7. "mappings": {
    8. "properties": {
    9. "name": {
    10. "type": "text",
    11. "analyzer": "ik_max_word",
    12. "search_analyzer": "ik_smart"
    13. },
    14. "description": {
    15. "type": "text",
    16. "analyzer": "ik_max_word",
    17. "search_analyzer": "ik_smart"
    18. },
    19. "studymodel": {
    20. "type": "keyword"
    21. },
    22. "price": {
    23. "type": "double"
    24. },
    25. "timestamp": {
    26. "type": "date",
    27. "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
    28. },
    29. "pic": {
    30. "type": "text",
    31. "index": false
    32. }
    33. }
    34. }
    35. }

    添加测试数据

    1. PUT /book/_doc/1
    2. {
    3. "name": "Bootstrap开发",
    4. "description": "Bootstrap是一个非常流行的开发框架。此开发框架可以帮助不擅长css页面开发的程序人员轻松的实现一个css,不受浏览器限制的精美界面css效果。",
    5. "studymodel": "201002",
    6. "price": 38.6,
    7. "timestamp": "2019-08-25 19:11:35",
    8. "pic": "group1/M00/00/00/wKhlQFs6RCeAY0pHAAJx5ZjNDEM428.jpg",
    9. "tags": [
    10. "bootstrap",
    11. "dev"
    12. ]
    13. }
    14. PUT /book/_doc/2
    15. {
    16. "name": "java编程思想",
    17. "description": "java语言是世界第一编程语言,在软件开发领域使用人数最多。",
    18. "studymodel": "201001",
    19. "price": 68.6,
    20. "timestamp": "2019-08-25 19:11:35",
    21. "pic": "group1/M00/00/00/wKhlQFs6RCeAY0pHAAJx5ZjNDEM428.jpg",
    22. "tags": [
    23. "java",
    24. "dev"
    25. ]
    26. }
    27. PUT /book/_doc/3
    28. {
    29. "name": "spring开发基础",
    30. "description": "spring 在java领域非常流行,java程序员都在用。",
    31. "studymodel": "201001",
    32. "price": 88.6,
    33. "timestamp": "2019-08-24 19:11:35",
    34. "pic": "group1/M00/00/00/wKhlQFs6RCeAY0pHAAJx5ZjNDEM428.jpg",
    35. "tags": [
    36. "spring",
    37. "java"
    38. ]
    39. }

    2.2 计算每个studymodel下的商品数量

    sql语句: select studymodel,count(*) from book group by studymodel

    "size": 0,   ==>  作用 :只需要聚合的数据,不需要查询的数据

    1. GET /book/_search
    2. {
    3. "size": 0,
    4. "query": {
    5. "match_all": {}
    6. },
    7. "aggs": {
    8. "group_by_model": {
    9. "terms": {
    10. "field": "studymodel"
    11. }
    12. }
    13. }
    14. }

    结果:

    1. {
    2. "took" : 2,
    3. "timed_out" : false,
    4. "_shards" : {
    5. "total" : 1,
    6. "successful" : 1,
    7. "skipped" : 0,
    8. "failed" : 0
    9. },
    10. "hits" : {
    11. "total" : {
    12. "value" : 3,
    13. "relation" : "eq"
    14. },
    15. "max_score" : null,
    16. "hits" : [ ]
    17. },
    18. "aggregations" : {
    19. "group_by_model" : {
    20. "doc_count_error_upper_bound" : 0,
    21. "sum_other_doc_count" : 0,
    22. "buckets" : [
    23. {
    24. "key" : "201001",
    25. "doc_count" : 2
    26. },
    27. {
    28. "key" : "201002",
    29. "doc_count" : 1
    30. }
    31. ]
    32. }
    33. }
    34. }

    2.3 计算每个tags下的商品数量

    设置字段"fielddata": true,不设置会报错

    1. PUT /book/_mapping/
    2. {
    3. "properties": {
    4. "tags": {
    5. "type": "text",
    6. "fielddata": true
    7. }
    8. }
    9. }

    查询

    1. GET /book/_search
    2. {
    3. "size": 0,
    4. "query": {
    5. "match_all": {}
    6. },
    7. "aggs": {
    8. "group_by_tags": {
    9. "terms": { "field": "tags" }
    10. }
    11. }
    12. }

    结果:

    1. {
    2. "took" : 2,
    3. "timed_out" : false,
    4. "_shards" : {
    5. "total" : 1,
    6. "successful" : 1,
    7. "skipped" : 0,
    8. "failed" : 0
    9. },
    10. "hits" : {
    11. "total" : {
    12. "value" : 3,
    13. "relation" : "eq"
    14. },
    15. "max_score" : null,
    16. "hits" : [ ]
    17. },
    18. "aggregations" : {
    19. "group_by_tags" : {
    20. "doc_count_error_upper_bound" : 0,
    21. "sum_other_doc_count" : 0,
    22. "buckets" : [
    23. {
    24. "key" : "dev",
    25. "doc_count" : 2
    26. },
    27. {
    28. "key" : "java",
    29. "doc_count" : 2
    30. },
    31. {
    32. "key" : "bootstrap",
    33. "doc_count" : 1
    34. },
    35. {
    36. "key" : "spring",
    37. "doc_count" : 1
    38. }
    39. ]
    40. }
    41. }
    42. }

    2.4 加上搜索条件,计算每个tags下的商品数量

    1. GET /book/_search
    2. {
    3. "size": 0,
    4. "query": {
    5. "match": {
    6. "description": "java程序员"
    7. }
    8. },
    9. "aggs": {
    10. "group_by_tags": {
    11. "terms": { "field": "tags" }
    12. }
    13. }
    14. }

    结果:

    1. {
    2. "took" : 70,
    3. "timed_out" : false,
    4. "_shards" : {
    5. "total" : 1,
    6. "successful" : 1,
    7. "skipped" : 0,
    8. "failed" : 0
    9. },
    10. "hits" : {
    11. "total" : {
    12. "value" : 2,
    13. "relation" : "eq"
    14. },
    15. "max_score" : null,
    16. "hits" : [ ]
    17. },
    18. "aggregations" : {
    19. "group_by_tags" : {
    20. "doc_count_error_upper_bound" : 0,
    21. "sum_other_doc_count" : 0,
    22. "buckets" : [
    23. {
    24. "key" : "java",
    25. "doc_count" : 2
    26. },
    27. {
    28. "key" : "dev",
    29. "doc_count" : 1
    30. },
    31. {
    32. "key" : "spring",
    33. "doc_count" : 1
    34. }
    35. ]
    36. }
    37. }
    38. }

    2.5 计算每个tag下的商品的平均价格

    子聚合

    1. GET /book/_search
    2. {
    3. "size": 0,
    4. "aggs": {
    5. "group_by_tags": {
    6. "terms": {
    7. "field": "tags"
    8. },
    9. "aggs": {
    10. "avg_price": {
    11. "avg": {
    12. "field": "price"
    13. }
    14. }
    15. }
    16. }
    17. }
    18. }

    结果:

    1. {
    2. "took" : 0,
    3. "timed_out" : false,
    4. "_shards" : {
    5. "total" : 1,
    6. "successful" : 1,
    7. "skipped" : 0,
    8. "failed" : 0
    9. },
    10. "hits" : {
    11. "total" : {
    12. "value" : 3,
    13. "relation" : "eq"
    14. },
    15. "max_score" : null,
    16. "hits" : [ ]
    17. },
    18. "aggregations" : {
    19. "group_by_tags" : {
    20. "doc_count_error_upper_bound" : 0,
    21. "sum_other_doc_count" : 0,
    22. "buckets" : [
    23. {
    24. "key" : "dev",
    25. "doc_count" : 2,
    26. "avg_price" : {
    27. "value" : 53.599999999999994
    28. }
    29. },
    30. {
    31. "key" : "java",
    32. "doc_count" : 2,
    33. "avg_price" : {
    34. "value" : 78.6
    35. }
    36. },
    37. {
    38. "key" : "bootstrap",
    39. "doc_count" : 1,
    40. "avg_price" : {
    41. "value" : 38.6
    42. }
    43. },
    44. {
    45. "key" : "spring",
    46. "doc_count" : 1,
    47. "avg_price" : {
    48. "value" : 88.6
    49. }
    50. }
    51. ]
    52. }
    53. }
    54. }

    2.6 计算每个tag下的商品的平均价格,按照平均价格降序排序

    小技巧,如果是查询全部,match_all可省略

    1. GET /book/_search
    2. {
    3. "size": 0,
    4. "aggs": {
    5. "group_by_tags": {
    6. "terms": {
    7. "field": "tags",
    8. "order": {
    9. "avg_price": "desc"
    10. }
    11. },
    12. "aggs": {
    13. "avg_price": {
    14. "avg": {
    15. "field": "price"
    16. }
    17. }
    18. }
    19. }
    20. }
    21. }

    结果:

    1. {
    2. "took" : 4,
    3. "timed_out" : false,
    4. "_shards" : {
    5. "total" : 1,
    6. "successful" : 1,
    7. "skipped" : 0,
    8. "failed" : 0
    9. },
    10. "hits" : {
    11. "total" : {
    12. "value" : 3,
    13. "relation" : "eq"
    14. },
    15. "max_score" : null,
    16. "hits" : [ ]
    17. },
    18. "aggregations" : {
    19. "group_by_tags" : {
    20. "doc_count_error_upper_bound" : 0,
    21. "sum_other_doc_count" : 0,
    22. "buckets" : [
    23. {
    24. "key" : "spring",
    25. "doc_count" : 1,
    26. "avg_price" : {
    27. "value" : 88.6
    28. }
    29. },
    30. {
    31. "key" : "java",
    32. "doc_count" : 2,
    33. "avg_price" : {
    34. "value" : 78.6
    35. }
    36. },
    37. {
    38. "key" : "dev",
    39. "doc_count" : 2,
    40. "avg_price" : {
    41. "value" : 53.599999999999994
    42. }
    43. },
    44. {
    45. "key" : "bootstrap",
    46. "doc_count" : 1,
    47. "avg_price" : {
    48. "value" : 38.6
    49. }
    50. }
    51. ]
    52. }
    53. }
    54. }

    2.7 按照指定的价格范围区间进行分组,然后在每组内再按照tag进行分组,最后再计算每组的平均价格

    1. GET /book/_search
    2. {
    3. "size": 0,
    4. "aggs": {
    5. "group_by_price": {
    6. "range": {
    7. "field": "price",
    8. "ranges": [
    9. {
    10. "from": 0,
    11. "to": 40
    12. },
    13. {
    14. "from": 40,
    15. "to": 60
    16. },
    17. {
    18. "from": 60,
    19. "to": 80
    20. }
    21. ]
    22. },
    23. "aggs": {
    24. "group_by_tags": {
    25. "terms": {
    26. "field": "tags"
    27. },
    28. "aggs": {
    29. "average_price": {
    30. "avg": {
    31. "field": "price"
    32. }
    33. }
    34. }
    35. }
    36. }
    37. }
    38. }
    39. }

    结果:

    1. {
    2. "took" : 5,
    3. "timed_out" : false,
    4. "_shards" : {
    5. "total" : 1,
    6. "successful" : 1,
    7. "skipped" : 0,
    8. "failed" : 0
    9. },
    10. "hits" : {
    11. "total" : {
    12. "value" : 3,
    13. "relation" : "eq"
    14. },
    15. "max_score" : null,
    16. "hits" : [ ]
    17. },
    18. "aggregations" : {
    19. "group_by_price" : {
    20. "buckets" : [
    21. {
    22. "key" : "0.0-40.0",
    23. "from" : 0.0,
    24. "to" : 40.0,
    25. "doc_count" : 1,
    26. "group_by_tags" : {
    27. "doc_count_error_upper_bound" : 0,
    28. "sum_other_doc_count" : 0,
    29. "buckets" : [
    30. {
    31. "key" : "bootstrap",
    32. "doc_count" : 1,
    33. "average_price" : {
    34. "value" : 38.6
    35. }
    36. },
    37. {
    38. "key" : "dev",
    39. "doc_count" : 1,
    40. "average_price" : {
    41. "value" : 38.6
    42. }
    43. }
    44. ]
    45. }
    46. },
    47. {
    48. "key" : "40.0-60.0",
    49. "from" : 40.0,
    50. "to" : 60.0,
    51. "doc_count" : 0,
    52. "group_by_tags" : {
    53. "doc_count_error_upper_bound" : 0,
    54. "sum_other_doc_count" : 0,
    55. "buckets" : [ ]
    56. }
    57. },
    58. {
    59. "key" : "60.0-80.0",
    60. "from" : 60.0,
    61. "to" : 80.0,
    62. "doc_count" : 1,
    63. "group_by_tags" : {
    64. "doc_count_error_upper_bound" : 0,
    65. "sum_other_doc_count" : 0,
    66. "buckets" : [
    67. {
    68. "key" : "dev",
    69. "doc_count" : 1,
    70. "average_price" : {
    71. "value" : 68.6
    72. }
    73. },
    74. {
    75. "key" : "java",
    76. "doc_count" : 1,
    77. "average_price" : {
    78. "value" : 68.6
    79. }
    80. }
    81. ]
    82. }
    83. }
    84. ]
    85. }
    86. }
    87. }

  • 相关阅读:
    IDEA插件开发(2)--- 插件内容
    牛掰!“基础-中级-高级”Java程序员面试集结,看完献出我的膝盖
    手把手教你调用微信扫一扫!三分钟包会
    千万级大型API网关设计
    【食品化学与营养】第二章 水的化学与营养 笔记
    什么是特洛伊木马,它能造成什么损害?
    Apple Watch的精织斜纹表带现已上市
    Java---刷题01
    机器学习基础了解
    【React源码】(七)React 调度原理(scheduler)
  • 原文地址:https://blog.csdn.net/FaithWh/article/details/126912560