elasticsearch - 如何在弹性搜索中随着时间的推移将文档中的字数作为汇总?

我正在尝试将文档中的字数趋势作为汇总结果。虽然使用以下方法,我能够获得文档计数聚合结果,但我无法找到任何资源,我可以使用这些资源来获得 jan 、 feb 和 mar 月份的字数

PUT test/_doc/1
{
  "description" : "one two three four",
  "month" : "jan"

}
PUT test/_doc/2
{
  "description" : "one one test test test",
  "month" : "feb"

}

PUT test/_doc/3
{
  "description" : "one one one test",
  "month" : "mar"

}

GET test/_search
{
  "size": 0,
  "query": {
    "match": {
      "description": {
        "query": "one"
      }
    }
  },
  "aggs": {
    "monthly_count": {
      "terms": {
        "field": "month.keyword"
      }
    }
  }
}

输出

{
  "took" : 706,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 3,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "monthly_count" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "feb",
          "doc_count" : 1
        },
        {
          "key" : "jan",
          "doc_count" : 1
        },
        {
          "key" : "mar",
          "doc_count" : 1
        }
      ]
    }
  }
}

预计字数超过一个月

"aggregations" : {
    "monthly_count" : {
      "buckets" : [
        {
          "key" : "feb",
          "word_count" : 2
        },
        {
          "key" : "jan",
          "word_count" : 1
        },
        {
          "key" : "mar",
          "word_count" : 3
        }
      ]
    }
  }

回答1

也许这个查询可以帮助你:

GET test/_search
{
  "size": 0,
  "aggs": {
    "monthly_count": {
      "terms": {
        "field": "month.keyword"
      },
      "aggs": {
        "count_word_one": {
          "terms": {
            "script": {
              "source": """
              def str = doc['description.keyword'].value;
              def array = str.splitOnToken(' ');
              int i = 0;
              for (item in array) {
                if(item == 'one'){
                  i++
                }
              }
              return i;
              """
            }, 
            "size": 10
          }
        }
      }
    }
  }
}

回复:

"aggregations" : {
    "monthly_count" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "feb",
          "doc_count" : 1,
          "count_word_one" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : "2",
                "doc_count" : 1
              }
            ]
          }
        },
        {
          "key" : "jan",
          "doc_count" : 1,
          "count_word_one" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : "1",
                "doc_count" : 1
              }
            ]
          }
        },
        {
          "key" : "mar",
          "doc_count" : 1,
          "count_word_one" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : "3",
                "doc_count" : 1
              }
            ]
          }
        }
      ]
    }
  }

相似文章

随机推荐

最新文章