Reputation: 191
I am trying to select multiple elements from hive external table .
my XML :
<?xml version="1.0" encoding="UTF-8"?>
<data>
<units version="1">
<unit>
<id>1</id>
<name>ABC</name>
<details>
<detail>
<subid>001</subid>
</detail>
</details>
</unit>
<unit>
<id>2</id>
<name>DEF</name>
<details>
<detail>
<subid>002</subid>
</detail>
</details>
</unit>
</units>
</data>
and my table script is like this :
create external table testxml (
id array<string>,
name array<string>,
subid array<string>
)
row format serde "com.ibm.spss.hive.serde2.xml.XmlSerDe"
with serdeproperties (
"column.xpath.id"="/units/unit/id/text()",
"column.xpath.name"="/units/unit/name/text()",
"column.xpath.subid"="/units/unit/details/detail/subid/text()"
)
stored as inputformat "com.ibm.spss.hive.serde2.xml.XmlInputFormat"
outputformat "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
location "somepath/test"
tblproperties (
"xmlinput.start"="<units version",
"xmlinput.end"="</units>"
);
my select statement returns these values :
hive> select * from testxml;
OK
["1","2"] ["ABC","DEF"] ["001","002"]
but my desired output is should be like this :
+--+------+------+
|id| name| subid|
+--+------+------+
| 1| ABC| 001|
| 2| DEF| 002|
+--+------+------+
any feedback would be relly helpful.
Upvotes: 1
Views: 555
Reputation: 11244
Try this
create external table testxml (
id string,
name string,
subid string
)
ROW FORMAT SERDE 'com.ibm.spss.hive.serde2.xml.XmlSerDe'
with serdeproperties (
"column.xpath.id"="/unit/id/text()",
"column.xpath.name"="/unit/name/text()",
"column.xpath.subid"="/unit/details/detail/subid/text()"
)
stored as inputformat "com.ibm.spss.hive.serde2.xml.XmlInputFormat"
outputformat "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
location "file:///home/cloudera/xmlfiles2"
tblproperties (
"xmlinput.start"="<unit>",
"xmlinput.end"="</unit>"
);
INFO : OK
+-------------+---------------+----------------+--+
| testxml.id | testxml.name | testxml.subid |
+-------------+---------------+----------------+--+
| 1 | ABC | 001 |
| 2 | DEF | 002 |
+-------------+---------------+----------------+--+
2 rows selected (0.154 seconds)
Upvotes: 1