从quickstart/tutorial/wikiticker-2015-09-12-sampled.json.gz数据文件读取wikipedia数据,创建一个名称为deletion-tutorial的数据源
deletion-index.json内容如下,创建的segment为小时粒度
[root@bigdata001 apache-druid-0.22.1]# cat quickstart/tutorial/deletion-index.json
{
"type" : "index_parallel",
"spec" : {
"dataSchema" : {
"dataSource" : "deletion-tutorial",
"timestampSpec": {
"column": "time",
"format": "iso"
},
"dimensionsSpec" : {
"dimensions" : [
"channel",
"cityName",
"comment",
"countryIsoCode",
"countryName",
"isAnonymous",
"isMinor",
"isNew",
"isRobot",
"isUnpatrolled",
"metroCode",
"namespace",
"page",
"regionIsoCode",
"regionName",
"user",
{ "name": "added", "type": "long" },
{ "name": "deleted", "type": "long" },
{ "name": "delta", "type": "long" }
]
},
"metricsSpec" : [],
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "hour",
"queryGranularity" : "none",
"intervals" : ["2015-09-12/2015-09-13"],
"rollup" : false
}
},
"ioConfig" : {
"type" : "index_parallel",
"inputSource" : {
"type" : "local",
"baseDir" : "quickstart/tutorial/",
"filter" : "wikiticker-2015-09-12-sampled.json.gz"
},
"inputFormat" : {
"type" : "json"
},
"appendToExisting" : false
},
"tuningConfig" : {
"type" : "index_parallel",
"maxRowsPerSegment" : 5000000,
"maxRowsInMemory" : 25000
}
}
}
[root@bigdata001 apache-druid-0.22.1]#
在命令行执行task
[root@bigdata001 apache-druid-0.22.1]#
[root@bigdata001 apache-druid-0.22.1]# bin/post-index-task --file quickstart/tutorial/deletion-index.json --url http://bigdata003:9081
Beginning indexing data for deletion-tutorial
Task started: index_parallel_deletion-tutorial_eeegkkll_2022-04-01T08:44:41.767Z
Task log: http://bigdata003:9081/druid/indexer/v1/task/index_parallel_deletion-tutorial_eeegkkll_2022-04-01T08:44:41.767Z/log
Task status: http://bigdata003:9081/druid/indexer/v1/task/index_parallel_deletion-tutorial_eeegkkll_2022-04-01T08:44:41.767Z/status
Task index_parallel_deletion-tutorial_eeegkkll_2022-04-01T08:44:41.767Z still running...
Task index_parallel_deletion-tutorial_eeegkkll_2022-04-01T08:44:41.767Z still running...
Task index_parallel_deletion-tutorial_eeegkkll_2022-04-01T08:44:41.767Z still running...
Task index_parallel_deletion-tutorial_eeegkkll_2022-04-01T08:44:41.767Z still running...
Task finished with status: SUCCESS
Completed indexing data for deletion-tutorial. Now loading indexed data onto the cluster...
[root@bigdata001 apache-druid-0.22.1]#
查询数据源数据
dsql> select * from "deletion-tutorial" limit 1;
┌──────────────────────────┬───────┬───────────────┬──────────┬───────────────┬────────────────┬─────────────┬─────────┬───────┬─────────────┬─────────┬───────┬─────────┬───────────────┬───────────┬───────────┬──────────────────────┬───────────────┬────────────┬──────────────┐
│ __time │ added │ channel │ cityName │ comment │ countryIsoCode │ countryName │ deleted │ delta │ isAnonymous │ isMinor │ isNew │ isRobot │ isUnpatrolled │ metroCode │ namespace │ page │ regionIsoCode │ regionName │ user │
├──────────────────────────┼───────┼───────────────┼──────────┼───────────────┼────────────────┼─────────────┼─────────┼───────┼─────────────┼─────────┼───────┼─────────┼───────────────┼───────────┼───────────┼──────────────────────┼───────────────┼────────────┼──────────────┤
│ 2015-09-12T00:46:58.771Z │ 36 │ #en.wikipedia │ │ added project │ │ │ 0 │ 36 │ false │ false │ false │ false │ false │ │ Talk │ Talk:Oswald Tilghman │ │ │ GELongstreet │
└──────────────────────────┴───────┴───────────────┴──────────┴───────────────┴────────────────┴─────────────┴─────────┴───────┴─────────────┴─────────┴───────┴─────────┴───────────────┴───────────┴───────────┴──────────────────────┴───────────────┴────────────┴──────────────┘
Retrieved 1 row in 0.06s.
dsql>
dsql> select count(*) from "deletion-tutorial" limit 1;
┌────────┐
│ EXPR$0 │
├────────┤
│ 39244 │
└────────┘
Retrieved 1 row in 0.46s.
dsql>
永久删除一个segment需要两步:
下面的命令对18和19小时的segment进行标记
[root@bigdata001 apache-druid-0.22.1]# curl -X 'POST' -H 'Content-Type:application/json' -d '{"interval" : "2015-09-12T18:00:00.000Z/2015-09-12T20:00:00.000Z"}' http://bigdata003:9081/druid/coordinator/v1/datasources/deletion-tutorial/markUnused
{"numChangedSegments":2}[root@bigdata001 apache-druid-0.22.1]#
对quickstart/tutorial/deletion-disable-segments.json进行修改,修改成我们自己的segment ID
这里我们对13和14小时的segment进行标记,内容如下
然后同步到Druid集群所有服务器的quickstart/tutorial目录下
[root@bigdata001 apache-druid-0.22.1]# cat quickstart/tutorial/deletion-disable-segments.json
{
"segmentIds":
[
"deletion-tutorial_2015-09-12T13:00:00.000Z_2015-09-12T14:00:00.000Z_2022-04-01T08:44:41.783Z",
"deletion-tutorial_2015-09-12T14:00:00.000Z_2015-09-12T15:00:00.000Z_2022-04-01T08:44:41.783Z"
]
}
[root@bigdata001 apache-druid-0.22.1]#
在命令行执行task
[root@bigdata001 apache-druid-0.22.1]# curl -X 'POST' -H 'Content-Type:application/json' -d @quickstart/tutorial/deletion-disable-segments.json http://bigdata003:9081/druid/coordinator/v1/datasources/deletion-tutorial/markUnused
{"numChangedSegments":2}[root@bigdata001 apache-druid-0.22.1]#
deletion-kill.json的内容如下
[root@bigdata001 apache-druid-0.22.1]#
[root@bigdata001 apache-druid-0.22.1]# cat quickstart/tutorial/deletion-kill.json
{
"type": "kill",
"dataSource": "deletion-tutorial",
"interval" : "2015-09-12/2015-09-13"
}
[root@bigdata001 apache-druid-0.22.1]#
提交kill任务到Overload
[root@bigdata001 apache-druid-0.22.1]# curl -X 'POST' -H 'Content-Type:application/json' -d @quickstart/tutorial/deletion-kill.json http://bigdata003:9081/druid/indexer/v1/task
{"task":"kill_deletion-tutorial_iilainag_2015-09-12T00:00:00.000Z_2015-09-13T00:00:00.000Z_2022-04-01T09:42:04.383Z"}[root@bigdata001 apache-druid-0.22.1]#
segment将从Deep storage上删除