Reputation: 119
I have the below XML
file and i would like to convert to multiple <TextBlock
to single line text. How to do this using XSLT any version (2.0 or 3.0)?
My MWE is:
<?xml version="1.0" encoding="UTF-8"?>
<Page ID="Page1" PHYSICAL_IMG_NR="1" WIDTH="595.000" HEIGHT="842.000">
<TextBlock ID="p1_b3" HPOS="90.0030" VPOS="84.8860" HEIGHT="12.0640" WIDTH="414.999">
<TextLine WIDTH="414.999" HEIGHT="12.0640" ID="p1_t3" HPOS="90.0030" VPOS="84.8860">
<String ID="p1_w3" CONTENT="Title:" HPOS="90.0030" VPOS="84.8860" WIDTH="31.0440" HEIGHT="12.0640"/>
<String ID="p1_w5" CONTENT="Kinetics" HPOS="219.184" VPOS="84.8860" WIDTH="50.5700" HEIGHT="12.0640"/>
<String ID="p1_w7" CONTENT="Volumetric" HPOS="329.593" VPOS="84.8860" WIDTH="67.1710" HEIGHT="12.0640"/>
<String ID="p1_w8" CONTENT="Changes," HPOS="399.598" VPOS="84.8860" WIDTH="58.5130" HEIGHT="12.0640"/>
</TextLine>
</TextBlock>
<TextBlock ID="p1_b4" HPOS="65.8820" VPOS="108.741" HEIGHT="10.2080" WIDTH="6.1160"/>
<TextBlock ID="p1_b5" HPOS="90.0030" VPOS="107.309" HEIGHT="12.0640" WIDTH="414.999">
<TextLine WIDTH="414.999" HEIGHT="12.0640" ID="p1_t5" HPOS="90.0030" VPOS="107.309">
<String ID="p1_w11" CONTENT="Precipitation," HPOS="90.0030" VPOS="107.309" WIDTH="82.3420" HEIGHT="12.0640"/>
<String ID="p1_w12" CONTENT="Strontium" HPOS="182.719" VPOS="107.309" WIDTH="61.3860" HEIGHT="12.0640"/>
<String ID="p1_w15" CONTENT="Fatigue" HPOS="347.481" VPOS="107.309" WIDTH="46.2280" HEIGHT="12.0640"/>
<String ID="p1_w17" CONTENT="Novel" HPOS="426.729" VPOS="107.309" WIDTH="35.3990" HEIGHT="12.0640"/>
</TextLine>
</TextBlock>
<TextBlock ID="p1_b18" HPOS="90.0020" VPOS="369.956" HEIGHT="12.0640" WIDTH="52.7280">
<TextLine WIDTH="52.7280" HEIGHT="12.0640" ID="p1_t26" HPOS="90.0020" VPOS="369.956">
<String ID="p1_w102" CONTENT="Abstract" HPOS="90.0020" VPOS="369.956" WIDTH="52.7280" HEIGHT="12.0640"/>
</TextLine>
</TextBlock>
<TextBlock ID="p1_b19" HPOS="59.7650" VPOS="403.935" HEIGHT="10.2080" WIDTH="414.986">
<TextLine WIDTH="414.986" HEIGHT="10.2080" ID="p1_t28" HPOS="90.0040" VPOS="403.935">
<String ID="p1_w124" CONTENT="Purpose: " HPOS="90.0040" VPOS="403.935" WIDTH="47.6740" HEIGHT="10.2080"/>
<String ID="p1_w115" CONTENT="The" HPOS="142.166" VPOS="403.935" WIDTH="18.9530" HEIGHT="10.2080"/>
<String ID="p1_w136" CONTENT="aim" HPOS="165.607" VPOS="403.935" WIDTH="17.7210" HEIGHT="10.2080"/>
<String ID="p1_w147" CONTENT="was" HPOS="187.816" VPOS="403.935" WIDTH="19.5690" HEIGHT="10.2080"/>
<String ID="p1_w109" CONTENT="determine" HPOS="225.535" VPOS="403.935" WIDTH="48.9170" HEIGHT="10.2080"/>
<String ID="p1_w110" CONTENT="effects" HPOS="278.940" VPOS="403.935" WIDTH="32.4060" HEIGHT="10.2080"/>
<String ID="p1_w113" CONTENT="monomer" HPOS="366.401" VPOS="403.935" WIDTH="46.4530" HEIGHT="10.2080"/>
<String ID="p1_w114" CONTENT="and" HPOS="417.353" VPOS="403.935" WIDTH="18.3480" HEIGHT="10.2080"/>
</TextLine>
</TextBlock>
<TextBlock ID="p1_b20" HPOS="59.7650" VPOS="429.233" HEIGHT="10.2080" WIDTH="414.997">
<TextLine WIDTH="414.997" HEIGHT="10.2080" ID="p1_t30" HPOS="90.0040" VPOS="429.233">
<String ID="p1_w117" CONTENT="phosphate" HPOS="90.0040" VPOS="429.233" WIDTH="51.3700" HEIGHT="10.2080"/>
<String ID="p1_w118" CONTENT="monohydrate" HPOS="145.180" VPOS="429.233" WIDTH="64.2070" HEIGHT="10.2080"/>
<String ID="p1_w120" CONTENT="on" HPOS="257.930" VPOS="429.233" WIDTH="12.2320" HEIGHT="10.2080"/>
<String ID="p1_w121" CONTENT="polymerization" HPOS="273.968" VPOS="429.233" WIDTH="70.9170" HEIGHT="10.2080"/>
<String ID="p1_w153" CONTENT="and" HPOS="389.182" VPOS="429.233" WIDTH="18.3480" HEIGHT="10.2080"/>
<String ID="p1_w194" CONTENT="volumetric." HPOS="411.336" VPOS="429.233" WIDTH="50.1160" HEIGHT="10.2080"/>
</TextLine>
</TextBlock>
<TextBlock ID="p1_b21" HPOS="59.7650" VPOS="454.531" HEIGHT="10.2080" WIDTH="414.997">
<TextLine WIDTH="414.997" HEIGHT="10.2080" ID="p1_t32" HPOS="90.0040" VPOS="454.531">
<String ID="p1_w127" CONTENT="Apatite" HPOS="90.0040" VPOS="454.531" WIDTH="33.0220" HEIGHT="10.2080" STYLEREFS="font0"/>
<String ID="p1_w128" CONTENT="precipitation," HPOS="126.183" VPOS="454.531" WIDTH="62.3700" HEIGHT="10.2080" STYLEREFS="font0"/>
<String ID="p1_w129" CONTENT="strontium" HPOS="191.721" VPOS="454.531" WIDTH="45.2320" HEIGHT="10.2080" STYLEREFS="font0"/>
<String ID="p1_w131" CONTENT="and" HPOS="279.347" VPOS="454.531" WIDTH="18.3480" HEIGHT="10.2080" STYLEREFS="font0"/>
<String ID="p1_w132" CONTENT="fatigue" HPOS="300.863" VPOS="454.531" WIDTH="33.0220" HEIGHT="10.2080" STYLEREFS="font0"/>
<String ID="p1_w134" CONTENT="novel" HPOS="349.384" VPOS="454.531" WIDTH="26.2900" HEIGHT="10.2080" STYLEREFS="font0"/>
<String ID="p1_w136" CONTENT="composites" HPOS="433.369" VPOS="454.531" WIDTH="55.6380" HEIGHT="10.2080" STYLEREFS="font0"/>
</TextLine>
</TextBlock>
<TextBlock ID="p1_b22" HPOS="59.7650" VPOS="479.829" HEIGHT="10.2080" WIDTH="72.1380">
<TextLine WIDTH="72.1380" HEIGHT="10.2080" ID="p1_t34" HPOS="90.0040" VPOS="479.829">
<String ID="p1_w139" CONTENT="vertebroplasty. " HPOS="90.0040" VPOS="479.829" WIDTH="72.1380" HEIGHT="10.2080" STYLEREFS="font0"/>
</TextLine>
</TextBlock>
<TextBlock ID="p1_b23" HPOS="59.7650" VPOS="517.127" HEIGHT="10.2080" WIDTH="414.997">
<TextLine WIDTH="414.997" HEIGHT="10.2080" ID="p1_t36" HPOS="90.0040" VPOS="517.127">
<String ID="p1_w141" CONTENT="Materials" HPOS="90.0040" VPOS="517.127" WIDTH="47.6850" HEIGHT="10.2080" STYLEREFS="font8"/>
<String ID="p1_w142" CONTENT="and" HPOS="141.506" VPOS="517.127" WIDTH="19.5580" HEIGHT="10.2080" STYLEREFS="font8"/>
<String ID="p1_w143" CONTENT="methods:" HPOS="164.870" VPOS="517.127" WIDTH="49.5000" HEIGHT="10.2080" STYLEREFS="font8"/>
<String ID="p1_w144" CONTENT="Polypropylene" HPOS="218.187" VPOS="517.127" WIDTH="69.7070" HEIGHT="10.2080" STYLEREFS="font0"/>
<String ID="p1_w145" CONTENT="(PPGDMA)" HPOS="291.711" VPOS="517.127" WIDTH="55.0000" HEIGHT="10.2080" STYLEREFS="font0"/>
<String ID="p1_w146" CONTENT="or" HPOS="350.517" VPOS="517.127" WIDTH="9.7900" HEIGHT="10.2080" STYLEREFS="font0"/>
<String ID="p1_w147" CONTENT="triethylene" HPOS="364.113" VPOS="517.127" WIDTH="50.7540" HEIGHT="10.2080" STYLEREFS="font0"/>
<String ID="p1_w148" CONTENT="(TEGDMA)" HPOS="418.684" VPOS="517.127" WIDTH="54.3730" HEIGHT="10.2080" STYLEREFS="font0"/>
<String ID="p1_w149" CONTENT="glycol" HPOS="476.874" VPOS="517.127" WIDTH="28.1270" HEIGHT="10.2080" STYLEREFS="font0"/>
</TextLine>
</TextBlock>
</Page>
Expected Output is:
<divS>Title: Kinetics Volumetric Changes, Precipitation, Strontium Fatigue Novel</divS> <divS>Abstract Purpose: The aim was determine effects monomer and phosphate monohydrate on polymerization and volumetric.</divS> <divS>Apatite precipitation, strontium and fatigue novel composites vertebroplasty.</divS> <divS>Materials and methods: Polypropylene (PPGDMA) or triethylene (TEGDMA) glycol</divS>
Any help greatly appreciated!
Upvotes: 0
Views: 77
Reputation: 116993
A simple way to create a line of text for each TextBlock
in the input would be:
XSLT 2.0
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
<xsl:template match="/Page">
<lines>
<xsl:for-each select="TextBlock">
<line>
<xsl:value-of select="TextLine/String/@CONTENT"/>
</line>
</xsl:for-each>
</lines>
</xsl:template>
</xsl:stylesheet>
However, the result:
<?xml version="1.0" encoding="UTF-8"?>
<lines>
<line>Title: Kinetics Volumetric Changes,</line>
<line/>
<line>Precipitation, Strontium Fatigue Novel</line>
<line>Abstract</line>
<line>Purpose: The aim was determine effects monomer and</line>
<line>phosphate monohydrate on polymerization and volumetric.</line>
<line>Apatite precipitation, strontium and fatigue novel composites</line>
<line>vertebroplasty. </line>
<line>Materials and methods: Polypropylene (PPGDMA) or triethylene (TEGDMA) glycol</line>
</lines>
is very different from the one shown in your question.
Note that this inserts a space between the words in a TextBlock
- a space that does not exist in the input.
Upvotes: 1