Configure Kafka ingestion on metrics collection cluster¶
Imported from Confluence
Content may be outdated. Verify before following any procedures. View original | Last updated: March 2022
Ensure that the Druid Kafka indexing service extension is loaded on the metrics collection cluster. See extensions for information on loading Druid extension.
Supervisor spec OFW druid:
{
"type": "kafka",
"dataSchema": {
"dataSource": "druid-metrics",
"parser": {
"type": "string",
"parseSpec": {
"format": "json",
"dimensionsSpec": {
"dimensions": [],
"dimensionExclusions": [ "segment", "interval" ]
},
"timestampSpec": {
"column": "timestamp",
"format": "auto"
}
}
},
"metricsSpec": [
{"type":"count", "name":"count"},
{"type":"doubleSum", "name":"sum", "fieldName":"value"},
{"type":"doubleMin", "name":"min", "fieldName":"value"},
{"type":"doubleMax", "name":"max", "fieldName":"value"},
{"type":"approxHistogram", "name":"histogram", "fieldName":"value", "resolution":50}
],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "hour",
"queryGranularity": "none",
"rollup": true
}
},
"tuningConfig": {
"type": "kafka",
"maxRowsInMemory": 150000,
"maxRowsPerSegment": 5000000,
"intermediatePersistPeriod": "PT2H",
"resetOffsetAutomatically": true
},
"ioConfig": {
"topic": "druid-metrics",
"taskCount": 1,
"replicas": 2,
"useEarliestOffset": false,
"taskDuration": "PT2H",
"consumerProperties": {
"bootstrap.servers": "druid-kafka-0.service.consul:32092,druid-kafka-1.service.consul:32192,druid-kafka-2.service.consul:32292"
}
}
}
Supervisor spec FairBid Druid Virginia:
{
"type": "kafka",
"dataSchema": {
"dataSource": "druid-metrics",
"parser": {
"type": "string",
"parseSpec": {
"format": "json",
"dimensionsSpec": {
"dimensions": [],
"dimensionExclusions": [
"segment",
"interval"
]
},
"timestampSpec": {
"column": "timestamp",
"format": "auto"
}
}
},
"metricsSpec": [
{
"type": "count",
"name": "count"
},
{
"type": "doubleSum",
"name": "sum",
"fieldName": "value"
},
{
"type": "doubleMin",
"name": "min",
"fieldName": "value"
},
{
"type": "doubleMax",
"name": "max",
"fieldName": "value"
},
{
"type": "approxHistogram",
"name": "histogram",
"fieldName": "value",
"resolution": 50
}
],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "hour",
"queryGranularity": "none",
"rollup": true
}
},
"tuningConfig": {
"type": "kafka",
"maxRowsInMemory": 150000,
"maxRowsPerSegment": 5000000,
"intermediatePersistPeriod": "PT1H",
"resetOffsetAutomatically": true
},
"ioConfig": {
"topic": "druid-metrics",
"taskCount": 3,
"replicas": 2,
"useEarliestOffset": false,
"taskDuration": "PT1H",
"consumerProperties": {
"bootstrap.servers": "druid-metrics-kafka-0.druid-metrics-kafka-brokers.druid.svc.cluster.local:9092,druid-metrics-kafka-1.druid-metrics-kafka-brokers.druid.svc.cluster.local:9092,druid-metrics-kafka-2.druid-metrics-kafka-brokers.druid.svc.cluster.local:9092"
}
}
}
Adjust kafka bootstrap adress, metrics topic name and other settings accordingly.
Apply the spec by running the following command from the directory to which you downloaded the spec:
curl -XPOST -H'Content-Type: application/json' -d@clarity-kafka-supervisor.json http://<overlord_address>:8090/druid/indexer/v1/supervisor
curl -u admin:<PASSWORD> -XPOST -H'Content-Type: application/json' -d@clarity-kafka-supervisor.json http://<overlord_address>:8090/druid/indexer/v1/supervisor
Replace overlord_address with the IP address of the machine running the overlord process in your Imply cluster. This is typically the Master server in the Druid cluster.
References:
1. Tutorial Kafka