Reputation: 393
I want to load the XML file into hive columns, but I am getting NULL when triggering select query on hive table.
Can anyone help?
Create table statement
CREATE TABLE `clobtest_h`(
`id` double,
`subject` string,
`body` string,
`purge_id` double,
`purge_date` timestamp,
`s_retention_applied` string,
`d_primary_column` double)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'hdfs://nameservice1/user/hive/warehouse/support.db/clobtest_h'
TBLPROPERTIES (
'COLUMN_STATS_ACCURATE'='false',
'last_modified_by'='root',
'last_modified_time'='1488897940',
'numFiles'='0',
'numRows'='-1',
'rawDataSize'='-1',
'totalSize'='0',
'transient_lastDdlTime'='1488897940')
Insert query
insert into clobtest_h values(2,'Testing issue','<?xml version="1.0"?>
<?xml-stylesheet href="catalog.xsl" type="text/xsl"?>
<!DOCTYPE catalog SYSTEM "catalog.dtd">
<catalog>
<product description="Cardigan Sweater" product_image="cardigan.jpg">
<catalog_item gender="Mens">
<item_number>QWZ5671</item_number>
<price>39.95</price>
<size description="Medium">
<color_swatch image="red_cardigan.jpg">Red</color_swatch>
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
</size>
<size description="Large">
<color_swatch image="red_cardigan.jpg">Red</color_swatch>
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
</size>
</catalog_item>
<catalog_item gender="Womens">
<item_number>RRX9856</item_number>
<price>42.50</price>
<size description="Small">
<color_swatch image="red_cardigan.jpg">Red</color_swatch>
<color_swatch image="navy_cardigan.jpg">Navy</color_swatch>
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
</size>
<size description="Medium">
<color_swatch image="red_cardigan.jpg">Red</color_swatch>
<color_swatch image="navy_cardigan.jpg">Navy</color_swatch>
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
<color_swatch image="black_cardigan.jpg">Black</color_swatch>
</size>
<size description="Large">
<color_swatch image="navy_cardigan.jpg">Navy</color_swatch>
<color_swatch image="black_cardigan.jpg">Black</color_swatch>
</size>
<size description="Extra Large">
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
<color_swatch image="black_cardigan.jpg">Black</color_swatch>
</size>
</catalog_item>
</product>
</catalog>',1234.0,'2017-03-07 20:15:04','N',6.0)
Select query on table, getting NULLs after first line fetching
"select * from support.clobtest_h"
Logging initialized using configuration in jar:file:/opt/cloudera/parcels/CDH-5.6.0-1.cdh5.6.0.p0.45/jars/hive-common-1.1.0-cdh5.6.0.jar!/hive-log4j.properties
OK
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/cloudera/parcels/CDH-5.6.0-1.cdh5.6.0.p0.45/jars/parquet-pig-bundle-1.5.0-cdh5.6.0.jar!/shaded/parquet/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/cloudera/parcels/CDH-5.6.0-1.cdh5.6.0.p0.45/jars/parquet-format-2.1.0-cdh5.6.0.jar!/shaded/parquet/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/cloudera/parcels/CDH-5.6.0-1.cdh5.6.0.p0.45/jars/parquet-hadoop-bundle-1.5.0-cdh5.6.0.jar!/shaded/parquet/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/cloudera/parcels/CDH-5.6.0-1.cdh5.6.0.p0.45/jars/hive-exec-1.1.0-cdh5.6.0.jar!/shaded/parquet/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/cloudera/parcels/CDH-5.6.0-1.cdh5.6.0.p0.45/jars/hive-jdbc-1.1.0-cdh5.6.0-standalone.jar!/shaded/parquet/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [shaded.parquet.org.slf4j.helpers.NOPLoggerFactory]
2.0 Testing issue <?xml version="1.0"?> NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL
NULL 1234.0 2017-03-07 20:15:04 NULL NULL NULL NULL
Time taken: 1.878 seconds, Fetched: 42 row(s)
Mar 8, 2017 1:37:37 PM WARNING: parquet.hadoop.ParquetRecordReader: Can not initialize counter due to context is not a instance of TaskInputOutputContext, but is org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
Mar 8, 2017 1:37:37 PM INFO: parquet.hadoop.InternalParquetRecordReader: RecordReader initialized will read a total of 42 records.
Mar 8, 2017 1:37:37 PM INFO: parquet.hadoop.InternalParquetRecordReader: at row 0. reading next block
Mar 8, 2017 1:37:37 PM INFO: parquet.hadoop.InternalParquetRecordReader: block read in memory in 21 ms. row count = 42
Upvotes: 1
Views: 651
Reputation: 393
I have set the below property in hive-site.xml, which resolved the issue.
<property>
<name>hive.query.result.fileformat</name>
<value>SequenceFile</value>
</property>
Upvotes: 1