Reputation: 21
I am trying to transform an XML file with XSL within a for-each loop and remove or eliminate all but one open and close tag for the parent node element while outputting the children nodes.
I am working with XML that looks like this:
<ClinicalData StudyOID="S_ABCD1" MetaDataVersionOID="v1.0.0">
<SubjectData SubjectKey="SS_ABCD1068" OpenClinica:StudySubjectID="1068">
<StudyEventData StudyEventOID="SE_ABVISIT1" StudyEventRepeatKey="1">
<FormData FormOID="F_ABCDEKGINTER_VER10">
<ItemGroupData ItemGroupOID="i" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_AUTO_ID_4698" Value="1926"/>
</ItemGroupData>
<ItemGroupData ItemGroupOID="IG_ABCDE_UNGROUPED_366" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_EKGCDRID" Value="453"/>
</ItemGroupData>
<ItemGroupData ItemGroupOID="IG_ABCDE_UNGROUPED_366" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_EKGDATE" Value="2010-02-25"/>
</ItemGroupData>
<ItemGroupData ItemGroupOID="IG_ABCDE_UNGROUPED_366" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_EKGTTRID" Value="616"/>
</ItemGroupData>
<ItemGroupData ItemGroupOID="IG_ABCDE_UNGROUPED_366" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_EKG01" Value="1"/>
</ItemGroupData>
</FormData>
</StudyEventData>
</SubjectData>
<SubjectData SubjectKey="SS_ABCD1669" OpenClinica:StudySubjectID="1669">
<StudyEventData StudyEventOID="SE_ABVISIT1" StudyEventRepeatKey="1">
<FormData FormOID="F_ABCDEKGINTER_VER10">
<ItemGroupData ItemGroupOID="IG_ABCDE_UNGROUPED_366" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_AUTO_ID_4698" Value="1796"/>
</ItemGroupData>
<ItemGroupData ItemGroupOID="IG_ABCDE_UNGROUPED_366" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_EKG02" Value="1"/>
</ItemGroupData>
<ItemGroupData ItemGroupOID="IG_ABCDE_UNGROUPED_366" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_EKGCDRID" Value="453"/>
</ItemGroupData>
<ItemGroupData ItemGroupOID="IG_ABCDE_UNGROUPED_366" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_EKGDATE" Value="2009-12-21"/>
</ItemGroupData>
<ItemGroupData ItemGroupOID="IG_ABCDE_UNGROUPED_366" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_EKGTTRID" Value="616"/>
</ItemGroupData>
<ItemGroupData ItemGroupOID="IG_ABCDE_UNGROUPED_366" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_EKG01" Value="1"/>
</ItemGroupData>
<ItemGroupData ItemGroupOID="IG_ABCDE_UNGROUPED_366" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_EKG03" Value="1"/>
</ItemGroupData>
</FormData>
</StudyEventData>
</SubjectData>
The above XML when read into SAS has all the columns but, creates a separate row for each <ItemData>
node due to the preceding <ItemGroupData>
and trailing </ItemGroupData>
element nodes.
Example:
<ItemGroupData ItemGroupOID="i" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_AUTO_ID_4698" Value="1926"/>
</ItemGroupData>
Ideally, the transformation needs to include the leading element,
<ItemGroupData ItemGroupOID="i" TransactionType="Insert">
loop through the <ItemGroupData>
nodes, output all <ItemData>
children and only include the closing tag </ItemGroupData>
following the last child node <ItemData>
output from the <ItemGroupData>
node tree.
Transformed output should look like this:
<ClinicalData StudyOID="S_ABCD1" MetaDataVersionOID="v1.0.0">
<SubjectData SubjectKey="SS_ABCD1068" OpenClinica:StudySubjectID="1068">
<StudyEventData StudyEventOID="SE_ABVISIT1" StudyEventRepeatKey="1">
<FormData FormOID="F_ABCDEKGINTER_VER10">
<ItemGroupData ItemGroupOID="IG_ABCDE_UNGROUPED_366" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_AUTO_ID_4698" Value="1926"/>
<ItemData ItemOID="I_ABCDE_EKG01" Value="1"/>
<ItemData ItemOID="I_ABCDE_EKGCDRID" Value="453"/>
<ItemData ItemOID="I_ABCDE_EKGDATE" Value="2010-02-25"/>
<ItemData ItemOID="I_ABCDE_EKGTTRID" Value="616"/>
</ItemGroupData>
</FormData>
</StudyEventData>
</SubjectData>
<SubjectData SubjectKey="SS_ABCD1669" OpenClinica:StudySubjectID="1669">
<StudyEventData StudyEventOID="SE_ABVISIT1" StudyEventRepeatKey="1">
<FormData FormOID="F_ABCDEKGINTER_VER10">
<ItemGroupData ItemGroupOID="IG_ABCDE_UNGROUPED_366" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_AUTO_ID_4698" Value="1796"/>
<ItemData ItemOID="I_ABCDE_EKG01" Value="1"/>
<ItemData ItemOID="I_ABCDE_EKG02" Value="1"/>
<ItemData ItemOID="I_ABCDE_EKG03" Value="1"/>
<ItemData ItemOID="I_ABCDE_EKGCDRID" Value="453"/>
<ItemData ItemOID="I_ABCDE_EKGDATE" Value="2009-12-21"/>
<ItemData ItemOID="I_ABCDE_EKGTTRID" Value="616"/>
</ItemGroupData>
</FormData>
</StudyEventData>
</SubjectData>
The XML file is huge and I don't think I can include it.
Here is the XSL I am working with:
<?xml version="1.0"?>
<xsl:output encoding="utf-8" indent="yes" method="xml" name="xml"/>
<xsl:template match="/">
<xsl:variable name="vStudyName" select="substring(concat('S',substring(//odm:Study[position()=1]/@OID, 3)),1,8)"/>
<xsl:element name="{$vStudyName}">
<xsl:for-each select="odm:ODM/odm:ClinicalData/odm:SubjectData/odm:StudyEventData/odm:FormData/odm:ItemGroupData">
<xsl:element name="{@ItemGroupOID}"> <!-- begin <ItemGroupData> tag -->
<xsl:if test ="not(@ItemGroupOID = preceding-sibling::*/@ItemGroupOID)">
<xsl:element name="SubjectID">
<xsl:value-of select="../../../@OpenClinica:StudySubjectID"/>
</xsl:element>
<xsl:variable name="vStudyEventOID">
<xsl:value-of select="../../@StudyEventOID"/>
</xsl:variable>
<xsl:element name="StudyEvent">
<xsl:value-of select="/odm:ODM/odm:Study/odm:MetaDataVersion/odm:StudyEventDef[@OID=$vStudyEventOID]/@Name"/>
</xsl:element>
<xsl:element name="StudyEventRepeatKey">
<xsl:value-of select="../../@StudyEventRepeatKey"/>
</xsl:element>
<xsl:element name="ItemGroupRepeatKey">
<xsl:value-of select="@ItemGroupRepeatKey"/>
</xsl:element>
</xsl:if>
<xsl:for-each select="odm:ItemData">
<xsl:element name="{@ItemOID}">
<xsl:value-of select="@Value"/>
</xsl:element>
</xsl:for-each>
</xsl:element> <!-- closing </ItemGroupData> tag -->
</xsl:for-each>
</xsl:element>
</xsl:template>
<xsl:template name="get_tablename">
<xsl:param name="formname"/>
<xsl:param name="groupname"/>
<xsl:param name="groupid"/>
<xsl:value-of select="$groupid"/>
</xsl:template>
</xsl:stylesheet>
First time working with XSL. The language seems very powerful. Any help will be greatly appreciated - Thanks
Upvotes: 2
Views: 456
Reputation: 167581
One approach is using the identity transformation as the starting point and then a template for the first ItemGroupData
to process all the ItemData
of following siblings and to make sure the other ItemGroupData
don't produce any output:
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:strip-space elements="*"/>
<xsl:output indent="yes"/>
<xsl:template match="@* | node()">
<xsl:copy>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="FormData/ItemGroupData[1]">
<xsl:copy>
<xsl:apply-templates select="@* | * | following-sibling::ItemGroupData/ItemData"/>
</xsl:copy>
</xsl:template>
<xsl:template match="FormData/ItemGroupData[position() > 1]"/>
</xsl:stylesheet>
Upvotes: 1
Reputation: 338248
This XSLT 1.0 stylesheet looks like it does what you need:
<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="xml" encoding="UTF-8" indent="yes" />
<xsl:strip-space elements="*" />
<xsl:template match="FormData">
<xsl:copy>
<xsl:copy-of select="@*" />
<ItemGroupData>
<xsl:copy-of select="ItemGroupData[1]/@*" />
<xsl:copy-of select="*/ItemData" />
</ItemGroupData>
</xsl:copy>
</xsl:template>
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
</xsl:transform>
There are two templates here. The second one, commonly called the identity template, applies to any node for which there is no more specific template defined. All it does is copy the input as-is. This template does the bulk of the work, copying most of the input document unchanged.
The first template matches only one specific element, namely <FormData>
. This template copies the <FormData>
itself and its attributes, then creates a new <ItemGroupData>
, copies the attributes of the first <ItemGroupData>
in the current <FormData>
and finally all <ItemData>
elements from one level deeper.
The output is:
<ClinicalData xmlns:OpenClinica="OpenClinica" StudyOID="S_ABCD1" MetaDataVersionOID="v1.0.0">
<SubjectData SubjectKey="SS_ABCD1068" OpenClinica:StudySubjectID="1068">
<StudyEventData StudyEventOID="SE_ABVISIT1" StudyEventRepeatKey="1">
<FormData FormOID="F_ABCDEKGINTER_VER10">
<ItemGroupData ItemGroupOID="IG_ABCDE_UNGROUPED_366" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_AUTO_ID_4698" Value="1926"/>
<ItemData ItemOID="I_ABCDE_EKGCDRID" Value="453"/>
<ItemData ItemOID="I_ABCDE_EKGDATE" Value="2010-02-25"/>
<ItemData ItemOID="I_ABCDE_EKGTTRID" Value="616"/>
<ItemData ItemOID="I_ABCDE_EKG01" Value="1"/>
</ItemGroupData>
</FormData>
</StudyEventData>
</SubjectData>
<SubjectData SubjectKey="SS_ABCD1669" OpenClinica:StudySubjectID="1669">
<StudyEventData StudyEventOID="SE_ABVISIT1" StudyEventRepeatKey="1">
<FormData FormOID="F_ABCDEKGINTER_VER10">
<ItemGroupData ItemGroupOID="IG_ABCDE_UNGROUPED_366" TransactionType="Insert">
<ItemData ItemOID="I_ABCDE_AUTO_ID_4698" Value="1796"/>
<ItemData ItemOID="I_ABCDE_EKG02" Value="1"/>
<ItemData ItemOID="I_ABCDE_EKGCDRID" Value="453"/>
<ItemData ItemOID="I_ABCDE_EKGDATE" Value="2009-12-21"/>
<ItemData ItemOID="I_ABCDE_EKGTTRID" Value="616"/>
<ItemData ItemOID="I_ABCDE_EKG01" Value="1"/>
<ItemData ItemOID="I_ABCDE_EKG03" Value="1"/>
</ItemGroupData>
</FormData>
</StudyEventData>
</SubjectData>
</ClinicalData>
(I had to make up the OpenClinica
namespace URI, which is missing in your sample.)
Upvotes: 1