Reputation: 23
I have created an XSLT file that converts everything in a Word XML into clean HTML however I am unable to covert nested lists properly.
I saved a word v16.12 file into XML. The Word file contains two lists
Here is the exported Open XML (relating to just the bullets).
<w:body>
<w:p w:rsidR="00875AF6" w:rsidRDefault="007A38EC" w:rsidP="007A38EC">
<w:pPr>
<w:pStyle w:val="ListParagraph"/>
<w:numPr>
<w:ilvl w:val="0"/>
<w:numId w:val="1"/>
</w:numPr>
</w:pPr>
<w:r>
<w:t>List 1 Bullet 1 level 1</w:t>
</w:r>
</w:p>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="007A38EC">
<w:pPr>
<w:pStyle w:val="ListParagraph"/>
<w:numPr>
<w:ilvl w:val="0"/>
<w:numId w:val="1"/>
</w:numPr>
</w:pPr>
<w:r>
<w:t>List 1 Bullet 2 level 1</w:t>
</w:r>
</w:p>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="007A38EC">
<w:pPr>
<w:pStyle w:val="ListParagraph"/>
<w:numPr>
<w:ilvl w:val="1"/>
<w:numId w:val="1"/>
</w:numPr>
</w:pPr>
<w:r>
<w:t>List 1 Bullet 3 level 2</w:t>
</w:r>
</w:p>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="007A38EC">
<w:pPr>
<w:pStyle w:val="ListParagraph"/>
<w:numPr>
<w:ilvl w:val="2"/>
<w:numId w:val="1"/>
</w:numPr>
</w:pPr>
<w:r>
<w:t>List 1 Bullet 4 level 3</w:t>
</w:r>
</w:p>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="007A38EC">
<w:pPr>
<w:pStyle w:val="ListParagraph"/>
<w:numPr>
<w:ilvl w:val="2"/>
<w:numId w:val="1"/>
</w:numPr>
</w:pPr>
<w:r>
<w:t>List 1 Bullet 5 level 3</w:t>
</w:r>
</w:p>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="007A38EC">
<w:pPr>
<w:pStyle w:val="ListParagraph"/>
<w:numPr>
<w:ilvl w:val="1"/>
<w:numId w:val="1"/>
</w:numPr>
</w:pPr>
<w:r>
<w:t>List 1 Bullet 6 level 2</w:t>
</w:r>
</w:p>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="007A38EC">
<w:pPr>
<w:pStyle w:val="ListParagraph"/>
<w:numPr>
<w:ilvl w:val="2"/>
<w:numId w:val="1"/>
</w:numPr>
</w:pPr>
<w:r>
<w:t>List 1 Bullet 7 level 3</w:t>
</w:r>
</w:p>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="007A38EC">
<w:pPr>
<w:pStyle w:val="ListParagraph"/>
<w:numPr>
<w:ilvl w:val="0"/>
<w:numId w:val="1"/>
</w:numPr>
</w:pPr>
<w:r>
<w:t>List 1 Bullet 8 level 1</w:t>
</w:r>
</w:p>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="00575241"/>
<w:p w:rsidR="00575241" w:rsidRDefault="00575241" w:rsidP="00575241">
<w:r>
<w:t>This is a break</w:t>
</w:r>
</w:p>
<w:p w:rsidR="00575241" w:rsidRDefault="00575241" w:rsidP="00575241"/>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="00575241">
<w:pPr>
<w:pStyle w:val="ListParagraph"/>
<w:numPr>
<w:ilvl w:val="0"/>
<w:numId w:val="2"/>
</w:numPr>
</w:pPr>
<w:r>
<w:t>List 2 Bullet 1 level 1</w:t>
</w:r>
</w:p>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="00575241">
<w:pPr>
<w:pStyle w:val="ListParagraph"/>
<w:numPr>
<w:ilvl w:val="1"/>
<w:numId w:val="2"/>
</w:numPr>
</w:pPr>
<w:r>
<w:t>List 2 Bullet 2 level 2</w:t>
</w:r>
</w:p>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="00575241">
<w:pPr>
<w:pStyle w:val="ListParagraph"/>
<w:numPr>
<w:ilvl w:val="2"/>
<w:numId w:val="2"/>
</w:numPr>
</w:pPr>
<w:r>
<w:t>List 2 Bullet 3 level 3</w:t>
</w:r>
</w:p>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="00575241">
<w:pPr>
<w:pStyle w:val="ListParagraph"/>
<w:numPr>
<w:ilvl w:val="0"/>
<w:numId w:val="2"/>
</w:numPr>
</w:pPr>
<w:r>
<w:t>List 2 Bullet 4 level 1</w:t>
</w:r>
</w:p>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="00575241">
<w:pPr>
<w:pStyle w:val="ListParagraph"/>
<w:numPr>
<w:ilvl w:val="1"/>
<w:numId w:val="2"/>
</w:numPr>
</w:pPr>
<w:r>
<w:t>List 2 Bullet 5 level 2</w:t>
</w:r>
</w:p>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="00575241">
<w:pPr>
<w:pStyle w:val="ListParagraph"/>
<w:numPr>
<w:ilvl w:val="1"/>
<w:numId w:val="2"/>
</w:numPr>
</w:pPr>
<w:r>
<w:t>List 2 Bullet 6 level 2</w:t>
</w:r>
</w:p>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="00575241">
<w:pPr>
<w:pStyle w:val="ListParagraph"/>
<w:numPr>
<w:ilvl w:val="2"/>
<w:numId w:val="2"/>
</w:numPr>
</w:pPr>
<w:r>
<w:t>List 2 Bullet 7 level 3</w:t>
</w:r>
<w:bookmarkStart w:id="0" w:name="_GoBack"/>
<w:bookmarkEnd w:id="0"/>
</w:p>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="00575241">
<w:pPr>
<w:pStyle w:val="ListParagraph"/>
<w:numPr>
<w:ilvl w:val="1"/>
<w:numId w:val="2"/>
</w:numPr>
</w:pPr>
<w:r>
<w:t>List 2 Bullet 8 level 2</w:t>
</w:r>
</w:p>
<w:p w:rsidR="007A38EC" w:rsidRDefault="007A38EC" w:rsidP="00575241"/>
<w:sectPr w:rsidR="007A38EC" w:rsidSect="00D678D3">
<w:pgSz w:w="11900" w:h="16840"/>
<w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440"
w:header="708" w:footer="708" w:gutter="0"/>
<w:cols w:space="708"/>
<w:docGrid w:linePitch="360"/>
</w:sectPr>
</w:body>
Using XSLT I need to convert the XML into this HTML
<ul>
<li>List 1 Bullet 1 level 1</li>
<li>List 1 Bullet 2 level 1
<ul>
<li>List 1 Bullet 3 level 2
<ul>
<li>List 1 Bullet 4 level 3</li>
<li>List 1 Bullet 5 level 3</li>
</ul>
</li>
<li>List 1 Bullet 6 level 2
<ul>
<li>List 1 Bullet 7 level 3</li>
</ul>
</li>
</ul>
</li>
<li>List 1 Bullet 8 level 1</li>
</ul>
<p>This is a gap</p>
<ul>
<li>List 2 Bullet 1 level 1
<ul>
<li>List 2 Bullet 2 level 2
<ul>
<li>List 2 Bullet 3 level 3</li>
</ul>
</li>
</ul>
</li>
<li>List 2 Bullet 4 level 1
<ul>
<li>List 2 Bullet 5 level 2</li>
<li>List 2 Bullet 6 level 2
<ul>
<li>List 2 Bullet 7 level 3</li>
</ul>
</li>
<li>List 2 Bullet 8 level 2</li>
</ul>
</li>
</ul>
I have researched and the closest I found to was using a function and for-each-group like the below.
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:mf="http://example.com/mf" version="2.0"
exclude-result-prefixes="xs mf">
<xsl:strip-space elements="*"/>
<xsl:output indent="yes"/>
<xsl:function name="mf:group" as="node()*">
<xsl:param name="nodes" as="node()*"/>
<xsl:param name="level" as="xs:integer"/>
<xsl:if test="$nodes">
<list type="ul">
<xsl:for-each-group select="$nodes"
group-adjacent="boolean(self::*[@level = $level])">
<xsl:choose>
<xsl:when test="current-grouping-key()">
<xsl:apply-templates select="current-group()"/>
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="mf:group(current-group(), $level + 1)"/>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each-group>
</list>
</xsl:if>
</xsl:function>
<xsl:template match="@* | node()">
<xsl:copy>
<xsl:apply-templates select="@*, node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="item[@level]">
<item>
<xsl:apply-templates/>
</item>
</xsl:template>
<xsl:template match="test">
<xsl:copy>
<xsl:for-each-group select="*" group-adjacent="boolean(self::item)">
<xsl:choose>
<xsl:when test="current-grouping-key()">
<xsl:sequence select="mf:group(current-group(), 0)"/>
</xsl:when>
<xsl:otherwise>
<xsl:apply-templates select="current-group()"/>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each-group>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
Unfortunately using functions and the for-each-group is beyond my ability. My question is how would I amend the above XSLT to work with the XML that I am getting from Word?
Upvotes: 2
Views: 457
Reputation: 7173
First off, we'll start with an identity template:
<xsl:template match="@* | node()">
<xsl:copy>
<xsl:apply-templates select="@*, node()"/>
</xsl:copy>
</xsl:template>
Second, we have to match the root node w:body
and group the elements using xsl:for-each-group
. Afterwards, we'll store the nodes in a variable (firstpass) to further manipulate the nodes later, such as:
<!-- If you want to specify the target node (1 in 22 as you say),
you can adjust the xpath below to match your target node.
-->
<xsl:template match="w:body">
<xsl:variable name="firstPass">
<xsl:for-each-group select="*" group-adjacent="boolean(self::w:p[descendant::w:ilvl])">
<xsl:choose>
<xsl:when test="current-grouping-key()">
<!-- the zero (0) was obtained from the value of
w:val attribute of w:ilvl node -->
<xsl:sequence select="mf:group(current-group(), 0)"/>
</xsl:when>
<xsl:otherwise>
<xsl:apply-templates select="current-group()"/>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each-group>
</xsl:variable>
<xsl:apply-templates select="$firstPass/node()"/>
</xsl:template>
we can adapt the function that you mentioned. We can modify the group-adjacent target nodes to
<xsl:function name="mf:group" as="node()*">
<xsl:param name="nodes" as="node()*"/>
<xsl:param name="level" as="xs:integer"/>
<xsl:if test="$nodes">
<ul>
<xsl:for-each-group select="$nodes"
group-adjacent="boolean(self::*[descendant::w:ilvl/@w:val = $level])">
<xsl:choose>
<xsl:when test="current-grouping-key()">
<xsl:apply-templates select="current-group()"/>
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="mf:group(current-group(), $level + 1)"/>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each-group>
</ul>
</xsl:if>
</xsl:function>
The following are the templates needed for the cleanup
<xsl:template match="w:p">
<xsl:apply-templates select="descendant::w:t"/>
</xsl:template>
<xsl:template match="w:p[.='']|w:sectPr"/>
<xsl:template match="w:t">
<xsl:choose>
<xsl:when test="ancestor::w:p[descendant::w:pStyle[@w:val='ListParagraph']]">
<li>
<xsl:apply-templates/>
</li>
</xsl:when>
<xsl:otherwise>
<p>
<xsl:apply-templates/>
</p>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
After that, we still need to insert the <ul>
sublevels into parent <li>
. To do that, we have to do a second pass of transformation.
We will now then match the nodes present in the firstpass variable
<xsl:template match="li[following-sibling::*[1][name()='ul']]">
<xsl:copy>
<xsl:apply-templates/>
<!-- this will copy the target ul nodes, albeit in a different mode -->
<xsl:apply-templates select="following-sibling::*[1][name()='ul']" mode="transfer"/>
</xsl:copy>
</xsl:template>
<!-- this will delete the target node -->
<xsl:template match="ul[preceding-sibling::*[1][name()='li']]"/>
and an identity template for the other mode
<xsl:template match="@* | node()" mode="transfer">
<xsl:copy>
<xsl:apply-templates select="@*, node()"/>
</xsl:copy>
</xsl:template>
The whole stylesheet is as follows:
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:mf="http://example.com/mf"
xmlns:w="www.wnamespace.com"
version="2.0"
exclude-result-prefixes="xs mf w">
<xsl:strip-space elements="*"/>
<xsl:output indent="yes" omit-xml-declaration="yes"/>
<xsl:function name="mf:group" as="node()*">
<xsl:param name="nodes" as="node()*"/>
<xsl:param name="level" as="xs:integer"/>
<xsl:if test="$nodes">
<ul>
<xsl:for-each-group select="$nodes"
group-adjacent="boolean(self::*[descendant::w:ilvl/@w:val = $level])">
<xsl:choose>
<xsl:when test="current-grouping-key()">
<xsl:apply-templates select="current-group()"/>
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="mf:group(current-group(), $level + 1)"/>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each-group>
</ul>
</xsl:if>
</xsl:function>
<xsl:template match="@* | node()">
<xsl:copy>
<xsl:apply-templates select="@*, node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="@* | node()" mode="transfer">
<xsl:copy>
<xsl:apply-templates select="@*, node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="w:p">
<xsl:apply-templates select="descendant::w:t"/>
</xsl:template>
<xsl:template match="w:p[.='']|w:sectPr"/>
<xsl:template match="w:t">
<xsl:choose>
<xsl:when test="ancestor::w:p[descendant::w:pStyle[@w:val='ListParagraph']]">
<li>
<xsl:apply-templates/>
</li>
</xsl:when>
<xsl:otherwise>
<p>
<xsl:apply-templates/>
</p>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="w:body">
<xsl:variable name="firstPass">
<xsl:for-each-group select="*" group-adjacent="boolean(self::w:p[descendant::w:ilvl])">
<xsl:choose>
<xsl:when test="current-grouping-key()">
<xsl:sequence select="mf:group(current-group(), 0)"/>
</xsl:when>
<xsl:otherwise>
<xsl:apply-templates select="current-group()"/>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each-group>
</xsl:variable>
<xsl:apply-templates select="$firstPass/node()"/>
</xsl:template>
<xsl:template match="ul[preceding-sibling::*[1][name()='li']]"/>
</xsl:stylesheet>
See it in action here.
Upvotes: 2