Reputation: 23
I have simple task to do. 1) I have Cognos generating report in XML that looks something like that:
<?xml version="1.0" encoding="utf-8"?>
<dataset xmlns="http://developer.cognos.com/schemas/xmldata/1/" xmlns:xs="http://www.w3.org/2001/XMLSchema-instance">
<!--
<dataset xmlns="http://developer.cognos.com/schemas/xmldata/1/" xmlns:xs="http://www.w3.org/2001/XMLSchema-instance" xs:schemaLocation="http://developer.cognos.com/schemas/xmldata/1/ xmldata.xsd">
-->
<metadata>
<item name="OID" type="xs:string" length="32"/>
<item name="NAME" type="xs:string" length="522"/>
<item name="TYPE" type="xs:string" length="32"/>
<item name="O_STATUS" type="xs:string" length="52"/>
<item name="O_REGION" type="xs:string" length="52"/>
<item name="O_DEV_IN_COUNTRY" type="xs:string" length="12"/>
<item name="O_START" type="xs:date"/>
<item name="O_REQ_IMPL" type="xs:date"/>
<item name="O_B_INIT_APPR" type="xs:string" length="50"/>
<item name="O_B_FIN_APPR" type="xs:date"/>
<item name="O_REL_Y" type="xs:string" length="12"/>
<item name="O_REL_M" type="xs:string" length="12"/>
<item name="O_REL_T" type="xs:string" length="4"/>
<item name="O_SIZING" type="xs:date"/>
<item name="O_BPM" type="xs:string" length="592"/>
<item name="O_BS" type="xs:string" length="22"/>
<item name="O_MAINT_ASSESSOR" type="xs:string" length="22"/>
<item name="O_P_ASSIGN_MGR" type="xs:string" length="32"/>
<item name="O_FUNC_AREA" type="xs:string" length="102"/>
<item name="O_APS" type="xs:string" length="22"/>
<item name="O_INV_TYPE" type="xs:string" length="32"/>
<item name="O_INV_CAT" type="xs:string" length="92"/>
<item name="O_INV_SUB_CAT" type="xs:string" length="82"/>
<item name="O_PRIM_AREA_OF_IMPACT" type="xs:string" length="92"/>
<item name="O_MAND_REQ_TYP" type="xs:string" length="72"/>
<item name="O_M_COUNTRY" type="xs:string" length="42"/>
<item name="O_IMP_COUN_1" type="xs:string" length="42"/>
<item name="O_IMP_COUN_2" type="xs:string" length="42"/>
<item name="O_IMP_COUN_3" type="xs:string" length="42"/>
<item name="O_IMP_COUN_4" type="xs:string" length="42"/>
<item name="O_IMP_COUN_5" type="xs:string" length="42"/>
<item name="O_IMP_COUN_6" type="xs:string" length="42"/>
<item name="O_IMP_COUN_7" type="xs:string" length="42"/>
<item name="O_IMP_COUN_8" type="xs:string" length="32"/>
<item name="O_IMP_COUN_9" type="xs:string" length="42"/>
<item name="O_IMP_COUN_10" type="xs:string" length="32"/>
</metadata>
<data>
<row>
<value>X-1234567</value>
<value>SOME TEXT</value>
<value>Project</value>
<value>New</value>
<value>AAAA</value>
<value>Yes</value>
<value>2015-01-01</value>
<value>2015-02-01</value>
<value xs:nil="true" />
<value xs:nil="true" />
<value xs:nil="true" />
<value xs:nil="true" />
<value xs:nil="true" />
<value xs:nil="true" />
<value xs:nil="true" />
<value>AA12345</value>
<value xs:nil="true" />
<value xs:nil="true" />
<value>SOME TEXT</value>
<value>SOME TEXT</value>
<value>SOME TEXT</value>
<value>SOME TEXT</value>
<value>SOME TEXT</value>
<value>SOME TEXT</value>
<value xs:nil="true" />
<value>SOME TEXT</value>
<value>SOME TEXT</value>
<value xs:nil="true" />
<value xs:nil="true" />
<value xs:nil="true" />
<value xs:nil="true" />
<value xs:nil="true" />
<value xs:nil="true" />
<value xs:nil="true" />
<value xs:nil="true" />
<value xs:nil="true" />
</row>
<row>
...
</row>
</data>
</dataset>
2) Such file is send to file share that is mapped as Oracle Directory called IMPDIR
3) I have access to this file from SQL
What I would like to do is to import data from this file in to a table that would have table with fields like in metadata and records would be filled by data from nodes.
I have search thru Stack and Goolgle and most cases that I have found are related to XML that have clear path with fields that can be distinguish by path.
Any idea how to part such file ? The file it self is something around 10MB.
Upvotes: 2
Views: 236
Reputation: 21085
The generic approach is to use dynamic SQL as the column names are part of the XML. The query below select the column names and values fro each row and return a insert statement that stores the data.
I assume the XML is in table TST
in column xml
WITH colnames AS
(SELECT pos,
column_name
FROM tst t,
XMLTable( XMLNAMESPACES (DEFAULT 'http://developer.cognos.com/schemas/xmldata/1/' ), 'for $i in /dataset/metadata/item
return $i' passing (t.xml) columns
pos FOR ORDINALITY,
column_name VARCHAR2(30) path '@name' ) x
),
vals AS
(SELECT vals ,
rn
FROM tst t,
XMLTable( XMLNAMESPACES (DEFAULT 'http://developer.cognos.com/schemas/xmldata/1/' ), 'for $i in /dataset/data/row
return $i' passing (t.xml) columns
rn FOR ORDINALITY,
vals XMLType path 'value' ) x
) ,
vals2 AS
(SELECT rn,
pos,
val
FROM vals,
XMLTable( XMLNAMESPACES (DEFAULT 'http://developer.cognos.com/schemas/xmldata/1/' ), 'for $i in /value
return $i' passing (vals.vals) columns
pos FOR ORDINALITY,
val VARCHAR2(4000) path 'text()' ) x
)
SELECT rn,
'insert into TAB ('
||
(SELECT listagg(column_name,', ') within GROUP (ORDER BY pos) FROM colnames
)
||') values ('
/* quote and escape the string */
|| listagg(case when val is NULL then 'null' else ''''||replace(val,'''','''''')||'''' end,', ') within GROUP (ORDER BY pos)
||');' AS insert_txt
FROM vals2
GROUP BY rn
;
One example output - perform all inserts with execute immediate
INSERT INTO TAB
(
OID,
NAME,
TYPE,
O_STATUS,
O_REGION,
O_DEV_IN_COUNTRY,
O_START,
O_REQ_IMPL,
O_B_INIT_APPR,
O_B_FIN_APPR,
O_REL_Y,
O_REL_M,
O_REL_T,
O_SIZING,
O_BPM,
O_BS,
O_MAINT_ASSESSOR,
O_P_ASSIGN_MGR,
O_FUNC_AREA,
O_APS,
O_INV_TYPE,
O_INV_CAT,
O_INV_SUB_CAT,
O_PRIM_AREA_OF_IMPACT,
O_MAND_REQ_TYP,
O_M_COUNTRY,
O_IMP_COUN_1,
O_IMP_COUN_2,
O_IMP_COUN_3,
O_IMP_COUN_4,
O_IMP_COUN_5,
O_IMP_COUN_6,
O_IMP_COUN_7,
O_IMP_COUN_8,
O_IMP_COUN_9,
O_IMP_COUN_10
)
VALUES
(
'X-1234567',
'SOME TEXT',
'Project',
'New',
'AAAA',
'Yes',
'2015-01-01',
'2015-02-01',
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
'AA12345',
NULL,
NULL,
'SOME TEXT',
'SOME TEXT',
'SOME TEXT',
'SOME TEXT',
'SOME TEXT',
'SOME TEXT',
NULL,
'SOME TEXT',
'SOME TEXT',
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL
);
Please not that the dynamic select will scale better for a large number of columns, but does not handle possible required type conversions, that can be done better in a static select.
Upvotes: 0
Reputation: 191415
You haven't said how you have access to the file - as an external table, something you can read with utl_file
, or imported into a real table as a CLOB, or something similar. If you can get the entire file into a CLOB then you can get it into an XMLType, or convert it as you go. You then need to transform the generic layout into something relational.
This example assumes you have a table called t42
that has a column called xml
which is XMLType and contains your file. You can use XMLTable with FLWOR XQuery expressions (read more in the documentation) that transforms each generic 'item' node into one named using the corresponding metadata entry:
select x.*
from t42
cross join xmltable(
xmlnamespaces('http://www.w3.org/2001/XMLSchema-instance' as "xs",
default 'http://developer.cognos.com/schemas/xmldata/1/'),
'for $row in /dataset/data/row
return element row {
for $col_num in (1 to count(/dataset/metadata/item))
return element {/dataset/metadata/item[$col_num]/@name} {
$row/value[$col_num]/text()
}
}'
passing t42.xml
columns oid varchar2(32) path '/row/OID',
o_dev_in_country varchar2(12) path '/row/O_DEV_IN_COUNTRY',
o_start date path '/row/O_START'
) x;
OID O_DEV_IN_COU O_START
-------------------------------- ------------ ---------
X-1234567 Yes 01-JAN-15
X-1234568 No 14-NOV-15
I've added a second row
node to your sample to make the results slightly more useful, and I've just shown three example columns. Since you know the metadata in advance you just need to create more columns
entries with the appropriate types and paths, either for all the columns or only the ones you're interested in.
The XMLTable first defines the namespaces you used, with one named "xs"
and the other as default. This lets all the nodes be identified; though you don't really need "xs"
to extract the data, and it's a built-in one anyway - you just need the default in this case. Then it iterates over each row
in your data, and constructs a new row
element. Then for each row it counts the number of metadata items, and creates a 'column' element for each, named with the col_num
th metadate item's name
attribute, and using the col_num
th row item
value.
You can use the result of that to insert into your real table (insert into ... select x.oid, x.o_start from ...
.
If you're reading the data with utl_file
then you could do the same thing all in one PL/SQL block, something like:
declare
l_clob clob;
begin
-- populate l_clob with utl_file
insert into your_real_table (oid, o_dev_in_country, o_start) -- add more columns
select oid, o_dev_in_country, o_start -- add more columns
from xmltable(
xmlnamespaces('http://www.w3.org/2001/XMLSchema-instance' as "xs",
default 'http://developer.cognos.com/schemas/xmldata/1/'),
'for $row in /dataset/data/row
return element row {
for $col_num in (1 to count(/dataset/metadata/item))
return element {/dataset/metadata/item[$col_num]/@name} {
$row/value[$col_num]/text()
}
}'
passing xmltype(l_clob)
columns oid varchar2(32) path '/row/OID',
o_dev_in_country varchar2(12) path '/row/O_DEV_IN_COUNTRY',
o_start date path '/row/O_START'
-- add more columns...
) x;
end;
/
This gets the file into a CLOB (how to do that is off-topic but sounds like you already have that part covered?), and passes that CLOB straight into the XMLTable expression, converting it on the way with XMLType()
.
You could possibly also do a XLST transform but that isn't something I've dabbled with much.
You don't need a completely dynamic solution, but just for fun, you could build a query dynamically by extracting the columns
clause members from the metadata, and executing that with the same basic transformation shown above. You could even take it further and create the table dynamically - not something I'd generally recommend at runtime of course, but as I said, just for fun...
declare
l_clob clob;
l_sql varchar2(32767);
begin
-- populate l_clob with utl_file
-- dynamic create table statement from metadata
l_sql := 'create table t42 (';
for col in (
select *
from xmltable(
xmlnamespaces(default 'http://developer.cognos.com/schemas/xmldata/1/'),
'/dataset/metadata/item'
passing xmltype(l_clob)
columns position for ordinality,
column_name varchar2(30) path '@name',
data_type varchar2(30) path '@type',
data_length varchar2(30) path '@length'
) x) loop
l_sql := l_sql || case when col.position > 1 then ', ' end
|| col.column_name
|| ' ' || case col.data_type
when 'xs:string' then 'varchar2'
when 'xs:date' then 'date'
-- add any other data types used
end
|| case when col.data_type = 'xs:string'
then '(' || col.data_length || ')' end;
end loop;
l_sql := l_sql || ')';
dbms_output.put_line(l_sql);
execute immediate l_sql;
-- dynamic insert/select creating the `columns` clause from metadata
l_sql := q'!insert into t42 select *
from xmltable(
xmlnamespaces(default 'http://developer.cognos.com/schemas/xmldata/1/'),
'for $row in /dataset/data/row
return element row {
for $col_num in (1 to count(/dataset/metadata/item))
return element column {
attribute name {/dataset/metadata/item[$col_num]/@name},
$row/value[$col_num]/text()
}
}'
passing xmltype(:l_clob)
columns !';
for col in (
select *
from xmltable(
xmlnamespaces(default 'http://developer.cognos.com/schemas/xmldata/1/'),
'/dataset/metadata/item'
passing xmltype(l_clob)
columns position for ordinality,
column_name varchar2(30) path '@name',
data_type varchar2(30) path '@type',
data_length varchar2(30) path '@length'
) x) loop
l_sql := l_sql || case when col.position > 1 then ', ' end
|| col.column_name
|| ' ' || case col.data_type
when 'xs:string' then 'varchar2'
when 'xs:date' then 'date'
-- add any other data types used
end
|| case when col.data_type = 'xs:string'
then '(' || col.data_length || ')' end
|| q'! path '//column[@name="!' || col.column_name || q'!"]'!';
end loop;
l_sql := l_sql || ')';
dbms_output.put_line(l_sql);
execute immediate l_sql using l_clob;
end;
/
This will create the t42
table, and then populate it from the data rows. It's a single insert so more efficient that extracting each data row and inserting those one-by-one.
You could also read the column names and data types into a local collection so you don't have to loop over the metadata part twice, which would make it a little more readable and maintainable, but I don't think it would have much effect on performance.
Upvotes: 1