Reputation: 1034
I am not getting any data from the queue using Kafka direct stream. In my code I put System.out.println() This statement not run that means I am not getting any data from that topic..
I am pretty sure data available in queue and since not getting in console.
I didn't see any error in console also.
Can anyone please suggest something?
Here is my Java code,
SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount11").setMaster("local[*]");
sparkConf.set("spark.streaming.concurrentJobs", "3");
// Create the context with 2 seconds batch size
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(3000));
Map<String, Object> kafkaParams = new HashMap<>();
kafkaParams.put("bootstrap.servers", "x.xx.xxx.xxx:9092");
kafkaParams.put("key.deserializer", StringDeserializer.class);
kafkaParams.put("value.deserializer", StringDeserializer.class);
kafkaParams.put("group.id", "use_a_separate_group_id_for_each_stream");
kafkaParams.put("auto.offset.reset", "latest");
kafkaParams.put("enable.auto.commit", true);
Collection<String> topics = Arrays.asList("topicName");
final JavaInputDStream<ConsumerRecord<String, String>> stream = KafkaUtils.createDirectStream(jssc,
LocationStrategies.PreferConsistent(),
ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams));
JavaPairDStream<String, String> lines = stream
.mapToPair(new PairFunction<ConsumerRecord<String, String>, String, String>() {
@Override
public Tuple2<String, String> call(ConsumerRecord<String, String> record) {
return new Tuple2<>(record.key(), record.value());
}
});
lines.print();
// System.out.println(lines.count());
lines.foreachRDD(rdd -> {
rdd.values().foreachPartition(p -> {
while (p.hasNext()) {
System.out.println("Value of Kafka queue" + p.next());
}
});
});
Upvotes: 1
Views: 1551
Reputation: 1034
I am able to print string which fetch from the kafka queue using direct kafka stream..
Here is my code,
import java.util.HashMap;
import java.util.HashSet;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collection;
import java.util.Currency;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import java.util.regex.Pattern;
import scala.Tuple2;
import kafka.serializer.StringDecoder;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.*;
import org.apache.spark.streaming.api.java.*;
import org.apache.spark.streaming.kafka.HasOffsetRanges;
import org.apache.spark.streaming.kafka.KafkaUtils;
import org.apache.spark.streaming.kafka.OffsetRange;
import org.json.JSONObject;
import org.omg.CORBA.Current;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.Durations;
public final class KafkaConsumerDirectStream {
public static void main(String[] args) throws Exception {
try {
SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount11").setMaster("local[*]");
sparkConf.set("spark.streaming.concurrentJobs", "30");
// Create the context with 2 seconds batch size
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(200));
Map<String, String> kafkaParams = new HashMap<>();
kafkaParams.put("metadata.broker.list", "x.xx.xxx.xxx:9091");
Set<String> topics = new HashSet();
topics.add("PartWithTopic02Queue");
JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class,
String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics);
JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
@Override
public String call(Tuple2<String, String> tuple2) {
return tuple2._2();
}
});
lines.foreachRDD(rdd -> {
if (rdd.count() > 0) {
List<String> strArray = rdd.collect();
// Print string here
}
});
jssc.start();
jssc.awaitTermination();
}
}
catch (Exception e) {
e.printStackTrace();
}
}
Upvotes: 1
Reputation: 337
@Vimal Here is a link to the working version of creating direct streams in Scala.
I believe after reviewing it in Scala, you must convert it easily.
Please make sure that you are turning off for reading the latest topics in Kafka. It might not pick any topic which was processed last time.
Upvotes: 0