How to tell druid to skip non avro records during kafka injestion

I have a schema registry consisting of multiple schemas. I encode a payload using one of the schema registry IDs and send it to Kafka. This data is then ingested into Druid. However, when I send non-Avro data to the same Kafka topic, my Druid ingestion fails, causing the further processing of incoming Kafka records to stop.

I'm using Druid version 0.23 and my ingestion spec looks like this:

  "type": "kafka",
  "spec": {
    "ioConfig": {
      "type": "kafka",
      "consumerProperties": {
        "bootstrap.servers": "localhost:19092"
      },
      "topic": "producerV4",
      "inputFormat": {
        "type": "avro_stream",
        "binaryAsString": false,
        "avroBytesDecoder": {
          "type": "schema_registry",
          "url": "http://localhost:7081"
        }
      },
      "useEarliestOffset": true,
      "taskCount": 1,
      "replicas": 1
    },
    "tuningConfig": {
      "type": "kafka",
      "maxRowsInMemory": 500000,
      "intermediatePersistPeriod": "PT10M",
      "maxPendingPersists": 0,
      "skipInvalidRows": true,
      "reportParseExceptions": false
    },
    "dataSchema": {
      "dataSource": "producerV4",
      "timestampSpec": {
        "column": "event_time",
        "format": "auto"
      },
      "dimensionsSpec": {
        "useSchemaDiscovery": true
      },
      "metricsSpec": [
        {
          "type": "count",
          "name": "count"
        }
      ],
      "granularitySpec": {
        "type": "uniform",
        "segmentGranularity": "DAY",
        "queryGranularity": "NONE",
        "rollup": false
      }
    }
  }
}

Upvotes: 0

Views: 13

Answers (0)

Related Questions