Reputation: 50855
I have an XML document snippet that matches this XSD:
<xs:complexType name="QuestionType">
<xs:sequence>
<xs:element name="questionId" type="xs:string" minOccurs="1" />
<xs:element name="questionDescription" type="xs:string" minOccurs="1" />
<xs:element name="questionHeader" type="xs:string" minOccurs="0" />
<xs:element name="questionLabel" type="xs:string" minOccurs="0" />
<xs:element name="version" type="xs:string" minOccurs="1" maxOccurs="1" />
<xs:element name="SubQuestion" type="QuestionType"
minOccurs="0" maxOccurs="unbounded" />
</xs:sequence>
</xs:complexType>
This recursively defines a <Question>
elements that can have an infite number of <SubQuestion>
elements, both of the type QuestionType
.
Using SQL, I'd like to query the document once to get a single result set with all of the questions and sub-questions. I have two independent queries at the moment to achieve this (please note that I'm using NVarChar(1000)
for testing purposes only - they will be more appropriately sized in production, and that @X
is an XML variable that matches the schema above):
SELECT -- Top-level questions...
C.value('questionId[1]', 'NVarChar(1000)') Id,
NULL ParentId,
C.value('questionDescription[1]', 'NVarChar(1000)') Description,
NULLIF(C.value('questionHeader[1]', 'NVarChar(1000)'), '') Header,
NULLIF(C.value('questionLabel[1]', 'NVarChar(1000)'), '') Label,
C.value('version[1]', 'NVarChar(1000)') Version
FROM @X.nodes('//Question') X(C);
SELECT -- Sub-questions...
C.value('questionId[1]', 'NVarChar(1000)') Id,
C.query('..').value('(Question/questionId)[1]', 'NVarChar(1000)') ParentId,
C.value('questionDescription[1]', 'NVarChar(1000)') Description,
NULLIF(C.value('questionHeader[1]', 'NVarChar(1000)'), '') Header,
NULLIF(C.value('questionLabel[1]', 'NVarChar(1000)'), '') Label,
C.value('version[1]', 'NVarChar(1000)') Version
FROM @X.nodes('//SubQuestion') X(C);
I'd expect this could be solved using a recursive CTE, but I'm having trouble putting one together.
Upvotes: 1
Views: 653
Reputation: 831
Given that you have tagged this question with sql-server-2008 and that IMHO SQL Server 2008 has support for XQuery I would like to suggest a different "angle": use an XPath expression to select the nodes you are interested in.
.//*[local-name(.) = 'Question' or local-name(.) = 'SubQuestion']
Please note that I am using the XPath function local-name() in case your real XML data has namespace declarations.
I have created a sample XML file to test the expression above:
<?xml version="1.0" encoding="UTF-8"?>
<Questions xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://www.acme.com" xsi:schemaLocation="sample.xsd">
<Question>
<questionId>1</questionId>
<questionDescription>Question 1</questionDescription>
<version>1</version>
<SubQuestion>
<questionId>1.1</questionId>
<questionDescription>Question 1.1</questionDescription>
<version>1</version>
<SubQuestion>
<questionId>1.1.1</questionId>
<questionDescription>Question 1.1.1</questionDescription>
<version>1</version>
<SubQuestion>
<questionId>1.1.1.1</questionId>
<questionDescription>Question 1.1.1.1</questionDescription>
<version>1</version>
</SubQuestion>
<SubQuestion>
<questionId>1.1.1.2</questionId>
<questionDescription>Question 1.1.1.2</questionDescription>
<version>1</version>
</SubQuestion>
</SubQuestion>
<SubQuestion>
<questionId>1.2</questionId>
<questionDescription>Question 1.2</questionDescription>
</SubQuestion>
</SubQuestion>
</Question>
<Question>
<questionId>2</questionId>
<questionDescription>Question 2</questionDescription>
<version>1</version>
</Question>
<Question>
<questionId>3</questionId>
<questionDescription>Question 3</questionDescription>
<version>1</version>
<SubQuestion>
<questionId>3.1</questionId>
<questionDescription>Question 3.1</questionDescription>
<version>1</version>
</SubQuestion>
</Question>
</Questions>
Evaluating this XQuery query against that sample
declare namespace acme = "http://www.acme.com";
<AllQuestions>
{
for $question in .//*[local-name(.) = 'Question' or local-name(.) = 'SubQuestion']
return
<Question>
<questionId>{ data($question/acme:questionId) }</questionId>
<questionDescription>{ data($question/acme:questionDescription) }</questionDescription>
</Question>
}
</AllQuestions>
will result in
<?xml version="1.0" encoding="UTF-8"?>
<AllQuestions>
<Question>
<questionId>1</questionId>
<questionDescription>Question 1</questionDescription>
</Question>
<Question>
<questionId>1.1</questionId>
<questionDescription>Question 1.1</questionDescription>
</Question>
<Question>
<questionId>1.1.1</questionId>
<questionDescription>Question 1.1.1</questionDescription>
</Question>
<Question>
<questionId>1.1.1.1</questionId>
<questionDescription>Question 1.1.1.1</questionDescription>
</Question>
<Question>
<questionId>1.1.1.2</questionId>
<questionDescription>Question 1.1.1.2</questionDescription>
</Question>
<Question>
<questionId>1.2</questionId>
<questionDescription>Question 1.2</questionDescription>
</Question>
<Question>
<questionId>2</questionId>
<questionDescription>Question 2</questionDescription>
</Question>
<Question>
<questionId>3</questionId>
<questionDescription>Question 3</questionDescription>
</Question>
<Question>
<questionId>3.1</questionId>
<questionDescription>Question 3.1</questionDescription>
</Question>
</AllQuestions>
EDIT - Final Query
SELECT
C.value('questionId[1]', 'NVarChar(1000)') Id,
COALESCE(
C.query('..').value('(Question/questionId)[1]', 'NVarChar(1000)'),
C.query('..').value('(SubQuestion/questionId)[1]', 'NVarChar(1000)')
) ParentId,
C.value('questionDescription[1]', 'NVarChar(1000)') Description,
NULLIF(C.value('questionHeader[1]', 'NVarChar(1000)'), '') Header,
NULLIF(C.value('questionLabel[1]', 'NVarChar(1000)'), '') Label,
C.value('version[1]', 'NVarChar(1000)') Version
FROM
@X.nodes('.//*[local-name(.)="Question" or local-name(.)="SubQuestion"]') X(C);
Upvotes: 2
Reputation: 50855
I'm doing this so far, although I'm still hoping to compact the query a bit:
WITH Q AS (
SELECT
C.value('questionId[1]', 'NVarChar(1000)') Id,
NULL ParentId,
C.value('questionDescription[1]', 'NVarChar(1000)') Description,
NULLIF(C.value('questionHeader[1]', 'NVarChar(1000)'), '') Header,
NULLIF(C.value('questionLabel[1]', 'NVarChar(1000)'), '') Label,
C.value('version[1]', 'NVarChar(1000)') Version
FROM @X.nodes('//Question') X(C)
UNION ALL
SELECT
C.value('questionId[1]', 'NVarChar(1000)') Id,
COALESCE(
C.query('..').value('(Question/questionId)[1]', 'NVarChar(1000)'),
C.query('..').value('(SubQuestion/questionId)[1]', 'NVarChar(1000)')
) ParentId,
C.value('questionDescription[1]', 'NVarChar(1000)') Description,
NULLIF(C.value('questionHeader[1]', 'NVarChar(1000)'), '') Header,
NULLIF(C.value('questionLabel[1]', 'NVarChar(1000)'), '') Label,
C.value('version[1]', 'NVarChar(1000)') Version
FROM @X.nodes('//SubQuestion') X(C)
)
SELECT Q.Id, Q.ParentId, Q.Description, Q.Header, Q.Label, Q.Version
FROM Q;
This is the important bit, as it will get whichever is the first non-null ParentId
value:
COALESCE(
C.query('..').value('(Question/questionId)[1]', 'NVarChar(1000)'),
C.query('..').value('(SubQuestion/questionId)[1]', 'NVarChar(1000)')
) ParentId
Upvotes: 1
Reputation: 4270
You can use a CTE:
WITH TopLevel (ID, ParentID, Description, Header, Label,Level)
AS
(
SELECT -- Top-level questions...
C.value('questionId[1]', 'NVarChar(1000)') Id,
NULL ParentId,
C.value('questionDescription[1]', 'NVarChar(1000)') Description,
NULLIF(C.value('questionHeader[1]', 'NVarChar(1000)'), '') Header,
NULLIF(C.value('questionLabel[1]', 'NVarChar(1000)'), '') Label,
C.value('version[1]', 'NVarChar(1000)') Version,
0 AS Level
FROM @X.nodes('//Question') X(C)
UNION ALL
SELECT -- Sub-questions...
C.value('questionId[1]', 'NVarChar(1000)') Id,
C.query('..').value('(Question/questionId)[1]', 'NVarChar(1000)') ParentId,
C.value('questionDescription[1]', 'NVarChar(1000)') Description,
NULLIF(C.value('questionHeader[1]', 'NVarChar(1000)'), '') Header,
NULLIF(C.value('questionLabel[1]', 'NVarChar(1000)'), '') Label,
C.value('version[1]', 'NVarChar(1000)') Version
,Level + 1 AS Level
FROM @X.nodes('//SubQuestion') X(C)
JOIN TopLevel AS t ON C.query('..').value('(Question/questionId)[1]', 'NVarChar(1000)') = t.id )
SELECT * FROM TopLevel
Reference: http://msdn.microsoft.com/en-us/library/ms186243.aspx
Upvotes: 0