Reputation: 994
Kafka Stream to HDFS
public static void main(String[] args) throws Exception {
String brokers = "quickstart:9092";
String topics = "simple_topic_6";
String master = "local[*]";
SparkSession sparkSession = SparkSession
.builder().appName(EventKafkaToParquet.class.getName())
.master(master).getOrCreate();
SQLContext sqlContext = sparkSession.sqlContext();
SparkContext context = sparkSession.sparkContext();
context.setLogLevel("ERROR");
Dataset<Row> rawDataSet = sparkSession.readStream()
.format("kafka")
.option("kafka.bootstrap.servers", brokers)
.option("subscribe", topics).load();
rawDataSet.printSchema();
rawDataSet = rawDataSet.withColumn("employee", rawDataSet.col("value").cast(DataTypes.StringType));
rawDataSet.createOrReplaceTempView("basicView");
Dataset<Row> writeDataset = sqlContext.sql("select employee from basicView");
writeDataset
.repartition(1)
.writeStream()
.option("path","/user/cloudera/employee/")
.option("checkpointLocation", "/user/cloudera/employee.checkpoint/")
.format("parquet")
.trigger(Trigger.ProcessingTime(5000))
.start()
.awaitTermination();
}
External table on Hive
CREATE EXTERNAL TABLE employee_raw ( employee STRING )
STORED AS PARQUET
LOCATION '/user/cloudera/employee' ;
Now I want to create a HIVE view on top of employee_raw table, which gives the out put as
firstName, lastName, street, city, state, zip
The output of employee_raw table is
hive> select * from employee_raw;
OK
{"employee":{"firstName":"Ganesh","lastName":"Kumar","address":{"street":"1400 Dakota Dr","city":"Princeton","state":"NJ","zip":"09800"}}}
{"employee":{"firstName":"Ganesh","lastName":"Kumar","address":{"street":"1400 Dakota Dr","city":"Princeton","state":"NJ","zip":"09800"}}}
{"employee":{"firstName":"Ganesh","lastName":"Kumar","address":{"street":"1400 Dakota Dr","city":"Princeton","state":"NJ","zip":"09800"}}}
{"employee":{"firstName":"Ganesh","lastName":"Kumar","address":{"street":"1400 Dakota Dr","city":"Princeton","state":"NJ","zip":"09800"}}}
{"employee":{"firstName":"Ganesh","lastName":"Kumar","address":{"street":"1400 Dakota Dr","city":"Princeton","state":"NJ","zip":"09800"}}}
Time taken: 0.123 seconds, Fetched: 5 row(s)
Your inputs are appreciated
Upvotes: 0
Views: 449
Reputation: 12009
According your description it looks for me that you mainly like to "Extract values from JSON string in Hive", so you may find the answer in the linked thread.
Upvotes: 1