Reputation: 77
I use ArrayBlockingQueue to decouple Kafka consumers from sinks:
During the operation, I encountered an error, which troubled me for several days. I don't understand which part of the problem is wrong:
11:44:10.794 [pool-2-thread-1] ERROR com.alibaba.kafka.source.KafkaConsumerRunner - [pool-2-thread-1] ConcurrentModificationException: KafkaConsumer is not safe for multi-threaded access
java.util.ConcurrentModificationException: KafkaConsumer is not safe for multi-threaded access
at org.apache.kafka.clients.consumer.KafkaConsumer.acquire(KafkaConsumer.java:1824)
at org.apache.kafka.clients.consumer.KafkaConsumer.acquireAndEnsureOpen(KafkaConsumer.java:1808)
at org.apache.kafka.clients.consumer.KafkaConsumer.commitSync(KafkaConsumer.java:1255)
at com.alibaba.kafka.source.KafkaConsumerRunner$1.call(KafkaConsumerRunner.java:75)
at com.alibaba.kafka.source.KafkaConsumerRunner$1.call(KafkaConsumerRunner.java:71)
at com.alibaba.kafka.sink.Sink.run(Sink.java:25)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Source Code:
Queues.java
public class Queues {
public static volatile BlockingQueue[] queues;
/**
* Create Multiple Queues.
* @param count The number of queues created.
* @param capacity The Capacity of each queue.
*/
public static void createQueues(final int count, final int capacity) {
Queues.queues = new BlockingQueue[count];
for (int i=0; i<count; ++i) {
Queues.queues[i] = new ArrayBlockingQueue(capacity, true);
}
}
}
Record
@Builder
@Getter
public class Record {
private final String value;
private final Callable<Boolean> ackCallback;
}
Sink.java
public class Sink implements Runnable {
private final int queueId;
public Sink(int queueId) {
this.queueId = queueId;
}
@Override
public void run() {
while (true) {
try {
Record record = (Record) Queues.queues[this.queueId].take();
// (1) Handler: Write to database
Thread.sleep(10);
// (2) ACK: notify kafka consumer to commit offset manually
record.getAckCallback().call();
} catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
}
}
}
KafkaConsumerRunner
@Slf4j
public class KafkaConsumerRunner implements Runnable {
private final String topic;
private final KafkaConsumer<String, String> consumer;
public KafkaConsumerRunner(String topic, Properties properties) {
this.topic = topic;
this.consumer = new KafkaConsumer<>(properties);
}
@Override
public void run() {
// offsets to commit
Map<TopicPartition, OffsetAndMetadata> offsetsToCommit = new HashMap<>();
// Subscribe topic
this.consumer.subscribe(Collections.singletonList(this.topic));
// Consume Kafka Message
while (true) {
try {
ConsumerRecords<String, String> consumerRecords = this.consumer.poll(10000L);
for (TopicPartition topicPartition : consumerRecords.partitions()) {
for (ConsumerRecord<String, String> consumerRecord : consumerRecords.records(topicPartition)) {
// (1) Restore [partition -> offset] Map
offsetsToCommit.put(topicPartition, new OffsetAndMetadata(consumerRecord.offset()));
// (2) Put into queue
int queueId = topicPartition.partition() % Queues.queues.length;
Queues.queues[queueId].put(Record.builder()
.value(consumerRecord.value())
.ackCallback(this.getAckCallback(offsetsToCommit))
.build());
}
}
} catch (ConcurrentModificationException | InterruptedException e) {
log.error("[{}] {}", Thread.currentThread().getName(), ExceptionUtils.getMessage(e), e);
System.exit(1);
}
}
}
private Callable<Boolean> getAckCallback(Map<TopicPartition, OffsetAndMetadata> offsets) {
return new AckCallback<Boolean>(this.consumer, new HashMap<>(offsets)) {
@Override
public Boolean call() throws Exception {
try {
this.getConsumer().commitSync(this.getOffsets());
return true;
} catch (Exception e) {
log.error(String.format("[%s] %s", Thread.currentThread().getName(), ExceptionUtils.getMessage(e)), e);
return false;
}
}
};
}
@Getter
@AllArgsConstructor
abstract class AckCallback<T> implements Callable<T> {
private final KafkaConsumer<String, String> consumer;
private final Map<TopicPartition, OffsetAndMetadata> offsets;
}
}
Application.java
public class Application {
private static final String TOPIC = "YEWEI_TOPIC";
private static final int QUEUE_COUNT = 1;
private static final int QUEUE_CAPACITY = 4;
private static void createQueues() {
Queues.createQueues(QUEUE_COUNT, QUEUE_CAPACITY);
}
private static void startupSource() {
if (null == System.getProperty("java.security.auth.login.config")) {
System.setProperty("java.security.auth.login.config", "jaas.conf");
}
Properties properties = new Properties();
properties.put(ConsumerConfig.GROUP_ID_CONFIG, "ConsumerGroup1");
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "cdh1:9092,cdh2:9092,cdh3:9092");
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class);
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class);
properties.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, 2);
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
properties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SASL_PLAINTEXT");
properties.put(SaslConfigs.SASL_MECHANISM, "PLAIN");
ExecutorService executorService = Executors.newFixedThreadPool(QUEUE_COUNT);
for (int queueId = 0; queueId < QUEUE_COUNT; ++queueId) {
executorService.execute(new KafkaConsumerRunner(TOPIC, properties));
}
}
private static void startupSinks() {
ExecutorService executorService = Executors.newFixedThreadPool(QUEUE_COUNT);
for (int queueId = 0; queueId < QUEUE_COUNT; ++queueId) {
executorService.execute(new Sink(queueId));
}
}
public static void main(String[] args) {
Application.createQueues();
Application.startupSource();
Application.startupSinks();
}
}
Upvotes: 3
Views: 4205
Reputation: 77
I figured out this problem. Kafka consumer runs in its own thread and is also called back by the Sink thread. The poll
and commitSync
method of KafkaConsumer can only be applied to one thread. See org.apache.kafka.clients.consumer.KafkaConsumer#acquireAndEnsureOpen
.
Change to: The Sink callback does not directly use the consumer
object, but sends the ACK message to the LinkedTransferQueue. KafkaConsumerRunner polls the LinkedTransferQueue every time and batches ACKs
@Slf4j
public class KafkaConsumerRunner implements Runnable {
private final String topic;
private final BlockingQueue ackQueue;
private final KafkaConsumer<String, String> consumer;
public KafkaConsumerRunner(String topic, Properties properties) {
this.topic = topic;
this.ackQueue = new LinkedTransferQueue<Map<TopicPartition, OffsetAndMetadata>>();
this.consumer = new KafkaConsumer<>(properties);
}
@Override
public void run() {
// Subscribe topic
this.consumer.subscribe(Collections.singletonList(this.topic));
// Consume Kafka Message
while (true) {
while (!this.ackQueue.isEmpty()) {
try {
Map<TopicPartition, OffsetAndMetadata> offsets = (Map<TopicPartition, OffsetAndMetadata>) this.ackQueue.take();
this.consumer.commitSync(offsets);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
...
}
}
private Callable<Boolean> getAckCallback(Map<TopicPartition, OffsetAndMetadata> offsets) {
return new AckCallback<Boolean>(new HashMap<>(offsets)) {
@Override
public Boolean call() throws Exception {
try {
ackQueue.put(offsets);
return true;
} catch (Exception e) {
log.error(String.format("[%s] %s", Thread.currentThread().getName(), ExceptionUtils.getMessage(e)), e);
System.exit(1);
return false;
}
}
};
}
...
}
Upvotes: 1