es - elasticsearch - aggs - pipeline - normalize

甄成弘
2023-12-01

世界上并没有完美的程序,但是我们并不因此而沮丧,因为写程序就是一个不断追求完美的过程。

normalize

场景

  • 父管道聚合

作用

  • bucket值的规范化

配置项

  • buckets_path

    • 要规范化的bucket的路径
  • method

    • 规范化方法

    • rescale_0_1

      • (x - min) / (max - min)
    • rescale_0_100

      • 100 * (x - min) / (max - min)
    • percent_of_sum

      • x / sum
    • mean

      • (x - mean) / (max - min)
    • zscore(实测时invalid)

      • (x - mean) / stdev
      • stdev是标准差
    • softmax

      • e^x / sum_e_x
      • sum_e_x是原始值的指数和
  • format

    • 返回值的格式
    • 如,00.00%,00.00

使用

索引

DELETE /normalize_test

PUT /normalize_test
{
  "mappings" : {
    "properties" : {
      "type" : {"type" : "integer"},
      "num"  : {"type" : "integer"},
      "date" : {"type" : "date"}
    }
  }
}

POST /normalize_test/_bulk
{"index" : {"_id" : 1}}
{"type" : 1, "num" : 400, "date" : "2001-01-10"}
{"index" : {"_id" : 2}}
{"type" : 2, "num" : 450, "date" : "2001-01-20"}
{"index" : {"_id" : 3}}
{"type" : 1, "num" : 580, "date" : "2001-02-10"}
{"index" : {"_id" : 4}}
{"type" : 2, "num" : 990, "date" : "2001-03-20"}
{"index" : {"_id" : 5}}
{"type" : 1, "num" : 660, "date" : "2001-04-21"}
{"index" : {"_id" : 6}}
{"type" : 1, "num" : 680, "date" : "2001-05-21"}

method : rescale_0_1

GET /normalize_test/_search
{
  "size" : 0,
  "aggs" : {
    "month_aggs" : {
      "date_histogram": {
        "field" : "date",
        "calendar_interval" : "month"
      },
      "aggs" : {
        "sum_aggs" : {
          "sum": {
            "field": "num"
          }
        },
        "normalize_aggs" : {
          "normalize" : {
            "buckets_path" : "sum_aggs",
            "method" : "rescale_0_1",
            "format" : "00.00"
          }
        }
      }
    }
  }
}
  • 返回结果
{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 6,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "month_aggs" : {
      "buckets" : [
        {
          "key_as_string" : "2001-01-01T00:00:00.000Z",
          "key" : 978307200000,
          "doc_count" : 2,
          "sum_aggs" : {
            "value" : 850.0
          },
          "normalize_aggs" : {
            "value" : 0.6585365853658537,
            "value_as_string" : "00.66"
          }
        },
        {
          "key_as_string" : "2001-02-01T00:00:00.000Z",
          "key" : 980985600000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 580.0
          },
          "normalize_aggs" : {
            "value" : 0.0,
            "value_as_string" : "00.00"
          }
        },
        {
          "key_as_string" : "2001-03-01T00:00:00.000Z",
          "key" : 983404800000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 990.0
          },
          "normalize_aggs" : {
            "value" : 1.0,
            "value_as_string" : "01.00"
          }
        },
        {
          "key_as_string" : "2001-04-01T00:00:00.000Z",
          "key" : 986083200000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 660.0
          },
          "normalize_aggs" : {
            "value" : 0.1951219512195122,
            "value_as_string" : "00.20"
          }
        },
        {
          "key_as_string" : "2001-05-01T00:00:00.000Z",
          "key" : 988675200000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 680.0
          },
          "normalize_aggs" : {
            "value" : 0.24390243902439024,
            "value_as_string" : "00.24"
          }
        }
      ]
    }
  }
}

method : rescale_0_100

GET /normalize_test/_search
{
  "size" : 0,
  "aggs" : {
    "month_aggs" : {
      "date_histogram": {
        "field" : "date",
        "calendar_interval" : "month"
      },
      "aggs" : {
        "sum_aggs" : {
          "sum": {
            "field": "num"
          }
        },
        "normalize_aggs" : {
          "normalize" : {
            "buckets_path" : "sum_aggs",
            "method" : "rescale_0_100",
            "format" : "00"
          }
        }
      }
    }
  }
}
  • 返回结果
{
  "took" : 34,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 6,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "month_aggs" : {
      "buckets" : [
        {
          "key_as_string" : "2001-01-01T00:00:00.000Z",
          "key" : 978307200000,
          "doc_count" : 2,
          "sum_aggs" : {
            "value" : 850.0
          },
          "normalize_aggs" : {
            "value" : 65.85365853658537,
            "value_as_string" : "66"
          }
        },
        {
          "key_as_string" : "2001-02-01T00:00:00.000Z",
          "key" : 980985600000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 580.0
          },
          "normalize_aggs" : {
            "value" : 0.0,
            "value_as_string" : "00"
          }
        },
        {
          "key_as_string" : "2001-03-01T00:00:00.000Z",
          "key" : 983404800000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 990.0
          },
          "normalize_aggs" : {
            "value" : 100.0,
            "value_as_string" : "100"
          }
        },
        {
          "key_as_string" : "2001-04-01T00:00:00.000Z",
          "key" : 986083200000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 660.0
          },
          "normalize_aggs" : {
            "value" : 19.51219512195122,
            "value_as_string" : "20"
          }
        },
        {
          "key_as_string" : "2001-05-01T00:00:00.000Z",
          "key" : 988675200000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 680.0
          },
          "normalize_aggs" : {
            "value" : 24.390243902439025,
            "value_as_string" : "24"
          }
        }
      ]
    }
  }
}

method : percent_of_sum

GET /normalize_test/_search
{
  "size" : 0,
  "aggs" : {
    "month_aggs" : {
      "date_histogram": {
        "field" : "date",
        "calendar_interval" : "month"
      },
      "aggs" : {
        "sum_aggs" : {
          "sum": {
            "field": "num"
          }
        },
        "normalize_aggs" : {
          "normalize" : {
            "buckets_path" : "sum_aggs",
            "method" : "percent_of_sum",
            "format" : "00.00%"
          }
        }
      }
    }
  }
}
  • 返回结果
{
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 6,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "month_aggs" : {
      "buckets" : [
        {
          "key_as_string" : "2001-01-01T00:00:00.000Z",
          "key" : 978307200000,
          "doc_count" : 2,
          "sum_aggs" : {
            "value" : 850.0
          },
          "normalize_aggs" : {
            "value" : 0.22606382978723405,
            "value_as_string" : "22.61%"
          }
        },
        {
          "key_as_string" : "2001-02-01T00:00:00.000Z",
          "key" : 980985600000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 580.0
          },
          "normalize_aggs" : {
            "value" : 0.15425531914893617,
            "value_as_string" : "15.43%"
          }
        },
        {
          "key_as_string" : "2001-03-01T00:00:00.000Z",
          "key" : 983404800000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 990.0
          },
          "normalize_aggs" : {
            "value" : 0.2632978723404255,
            "value_as_string" : "26.33%"
          }
        },
        {
          "key_as_string" : "2001-04-01T00:00:00.000Z",
          "key" : 986083200000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 660.0
          },
          "normalize_aggs" : {
            "value" : 0.17553191489361702,
            "value_as_string" : "17.55%"
          }
        },
        {
          "key_as_string" : "2001-05-01T00:00:00.000Z",
          "key" : 988675200000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 680.0
          },
          "normalize_aggs" : {
            "value" : 0.18085106382978725,
            "value_as_string" : "18.09%"
          }
        }
      ]
    }
  }
}

method : mean

GET /normalize_test/_search
{
  "size" : 0,
  "aggs" : {
    "month_aggs" : {
      "date_histogram": {
        "field" : "date",
        "calendar_interval" : "month"
      },
      "aggs" : {
        "sum_aggs" : {
          "sum": {
            "field": "num"
          }
        },
        "normalize_aggs" : {
          "normalize" : {
            "buckets_path" : "sum_aggs",
            "method" : "mean",
            "format" : "0.00"
          }
        }
      }
    }
  }
}
  • 返回结果
{
  "took" : 5,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 6,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "month_aggs" : {
      "buckets" : [
        {
          "key_as_string" : "2001-01-01T00:00:00.000Z",
          "key" : 978307200000,
          "doc_count" : 2,
          "sum_aggs" : {
            "value" : 850.0
          },
          "normalize_aggs" : {
            "value" : 0.23902439024390243,
            "value_as_string" : "0.24"
          }
        },
        {
          "key_as_string" : "2001-02-01T00:00:00.000Z",
          "key" : 980985600000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 580.0
          },
          "normalize_aggs" : {
            "value" : -0.4195121951219512,
            "value_as_string" : "-0.42"
          }
        },
        {
          "key_as_string" : "2001-03-01T00:00:00.000Z",
          "key" : 983404800000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 990.0
          },
          "normalize_aggs" : {
            "value" : 0.5804878048780487,
            "value_as_string" : "0.58"
          }
        },
        {
          "key_as_string" : "2001-04-01T00:00:00.000Z",
          "key" : 986083200000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 660.0
          },
          "normalize_aggs" : {
            "value" : -0.22439024390243903,
            "value_as_string" : "-0.22"
          }
        },
        {
          "key_as_string" : "2001-05-01T00:00:00.000Z",
          "key" : 988675200000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 680.0
          },
          "normalize_aggs" : {
            "value" : -0.17560975609756097,
            "value_as_string" : "-0.18"
          }
        }
      ]
    }
  }
}

method : softmax

  • 由于是指数,所以测试本方法时索引中的num都去掉一个0
GET /normalize_test/_search
{
  "size" : 0,
  "aggs" : {
    "month_aggs" : {
      "date_histogram": {
        "field" : "date",
        "calendar_interval" : "month"
      },
      "aggs" : {
        "sum_aggs" : {
          "sum": {
            "field": "num"
          }
        },
        "normalize_aggs" : {
          "normalize" : {
            "buckets_path" : "sum_aggs",
            "method" : "softmax"
          }
        }
      }
    }
  }
}
  • 返回结果
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 6,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "month_aggs" : {
      "buckets" : [
        {
          "key_as_string" : "2001-01-01T00:00:00.000Z",
          "key" : 978307200000,
          "doc_count" : 2,
          "sum_aggs" : {
            "value" : 85.0
          },
          "normalize_aggs" : {
            "value" : 8.315280276640997E-7
          }
        },
        {
          "key_as_string" : "2001-02-01T00:00:00.000Z",
          "key" : 980985600000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 58.0
          },
          "normalize_aggs" : {
            "value" : 1.5628808897545835E-18
          }
        },
        {
          "key_as_string" : "2001-03-01T00:00:00.000Z",
          "key" : 983404800000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 99.0
          },
          "normalize_aggs" : {
            "value" : 0.9999991684719333
          }
        },
        {
          "key_as_string" : "2001-04-01T00:00:00.000Z",
          "key" : 986083200000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 66.0
          },
          "normalize_aggs" : {
            "value" : 4.658882271108809E-15
          }
        },
        {
          "key_as_string" : "2001-05-01T00:00:00.000Z",
          "key" : 988675200000,
          "doc_count" : 1,
          "sum_aggs" : {
            "value" : 68.0
          },
          "normalize_aggs" : {
            "value" : 3.442474245953642E-14
          }
        }
      ]
    }
  }
}
 类似资料: