Reputation: 59
I am using the IRS -900 tax file
https://s3.amazonaws.com/irs-form-990/200931393493000150_public.xml
to create a single table containing all elements, attributes with their associated values using SQL OPENXML
. I have built the query just to see if I can get few result as shown below. But I only get an empty table.
I also tried to use online utility to create xpath
reference or the XML tree of the document to identify the elements and attributes in this long XML file.
Please suggest any easy tool to list all elements and attributes easily as I think the xpath
reference is the issue.
Here is my code
--created a table for the xml document inside sql server --Example XML: https://s3.amazonaws.com/irs-form-990/200931393493000150_public.xml
USE [IRS-900]
GO
CREATE TABLE [200931393493000150_public] (
Id INT IDENTITY PRIMARY KEY
,XMLData XML
,LoadedDateTime DATETIME
)
INSERT INTO [200931393493000150_public] (
XMLData
,LoadedDateTime
)
SELECT CONVERT(XML, BulkColumn) AS BulkColumn
,GETDATE()
FROM OPENROWSET(BULK 'C:\Users\200931393493000150_public.xml', SINGLE_BLOB) AS x;
--select * from [dbo].[200931393493000150_public]
DECLARE @x XML -- to load the binary file to a readable xml document in sql
SELECT @x = p
FROM
-- to load the document I used openrowset
openrowset(BULK 'C:\Users\200931393493000150_public.xml', single_blob) AS [200931393493000150_public](p)
DECLARE @hdoc INT -- out put parameter
EXEC sp_xml_preparedocument @hDoc OUTPUT
,@x
SELECT *
--into table x - will load the result into sql table on the fly
-- xpath to the element , 1 is for attributes and 2 for elements
--from openxml(@hdoc, '//*',1)
--from openxml(@hdoc, '/root//*',1)
--from openxml(@hdoc, '/root//*',1)
--with specifies columns to be retreived
FROM openxml(@hdoc, '/root/returnheader/Filer/USAddress', 2)
-- flag 1 for attributes
-- flag 2 for elements
WITH (
EIN CHAR(10) '/../../@EIN'
,-- to refer the xpath for the attribute
AddressLine1 VARCHAR(50)
,AddressLine2 INT
,City VARCHAR(50)
,STATE VARCHAR(50)
,ZIPCode CHAR(5)
)
--catch release
EXEC sp_xml_removedocument @hDoc
GO
-- I have got empty table with the corresponding 6 columns
Upvotes: 0
Views: 708
Reputation: 22182
Microsoft proprietary OPENXML
and its companions sp_xml_preparedocument
and sp_xml_removedocument
are mostly kept just for backward compatibility with the obsolete SQL Server 2000.
Starting from SQL Server 2005 onwards it is better to use XQuery methods .nodes()
and .value()
to achieve what you need.
SQL
DECLARE @targetTable TABLE
(
ID INT IDENTITY PRIMARY KEY,
EIN CHAR(10),
AddressLine1 VARCHAR(50),
AddressLine2 VARCHAR(50),
City VARCHAR(50),
State CHAR(2),
ZIPCode CHAR(5)
);
-- directly from the XML file as a virtual DB table on the file system
;WITH XMLNAMESPACES (DEFAULT 'http://www.irs.gov/efile')
, rs (xmldata) AS
(
SELECT TRY_CAST(BulkColumn AS XML) AS BulkColumn
FROM OPENROWSET(BULK 'e:\Temp\200931393493000150_public.xml', SINGLE_BLOB) AS x
)
INSERT INTO @targetTable
SELECT c.value('(EIN/text())[1]', 'CHAR(10)') AS EIN
, c.value('(USAddress/AddressLine1/text())[1]','VARCHAR(50)') AS AddressLine1
, c.value('(USAddress/AddressLine2/text())[1]','VARCHAR(50)') AS AddressLine2
, c.value('(USAddress/City/text())[1]','VARCHAR(100)') AS City
, c.value('(USAddress/State/text())[1]','CHAR(2)') AS State
, c.value('(USAddress/ZIPCode/text())[1]','CHAR(5)') AS ZIPCode
FROM rs AS tbl
CROSS APPLY tbl.xmldata.nodes('/Return/ReturnHeader/Filer') AS t(c);
-- test
SELECT * FROM @targetTable;
Output
+----+------------+-------------------------------------+--------------+------------+-------+---------+
| ID | EIN | AddressLine1 | AddressLine2 | City | State | ZIPCode |
+----+------------+-------------------------------------+--------------+------------+-------+---------+
| 1 | 541599550 | c/o Piascik Assoc PC 4470 Cox Rd No | 250 | Glen Allen | VA | 23060 |
+----+------------+-------------------------------------+--------------+------------+-------+---------+
Upvotes: 2