Reputation: 53
Just curious if there is a more simplified version to check if the given body has the word style of "Heading3" applied given this sample C# code I wrote learning the OpenXML library. To be clear, I am just asking given a body element how can I determine if the given body element has what word style applied. I eventually have to write a program that process numerous .DOCX files and need to process them from a top to bottom approach.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using System.IO;
namespace docxparsing
{
class Program
{
static void Main()
{
string file_to_parse = @"C:\temp\sample.docx";
WordprocessingDocument doc = WordprocessingDocument.Open(file_to_parse,false);
Body body = doc.MainDocumentPart.Document.Body;
string fooStr
foreach( var foo in body )
{
fooStr = foo.InnerXml;
/*
these 2 comments represent 2 different xml snippets from 'fooStr'. the only way i figure out how to get the word style is by reading
this xml and doing checks for values. i don't know of any other approach in using the body element to check for the applied word style
<w:pPr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"><w:pStyle w:val="Heading2" />
<w:pPr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"><w:pStyle w:val="Heading3" />
*/
bool hasHeading3 = fooStr.Contains("pStyle w:val=\"Heading3\"");
if ( hasHeading3 )
{
Console.WriteLine("heading3 found");
}
}
doc.Close();
}
}
}
// -------------------------------------------------------------------------------
EDIT
Here is updated code of one way to do this. Still not overall happy with it but it works. Function to look at is getWordStyleValue(string x)
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using System;
using System.Diagnostics;
using System.IO;
using System.Text;
namespace docxparsing
{
class Program
{
// ************************************************
// grab the word style value
// ************************************************
static string getWordStyleValue(string x)
{
int p = 0;
p = x.IndexOf("w:pStyle w:val=");
if ( p == -1 )
{
return "";
}
p = p + 15;
StringBuilder sb = new StringBuilder();
while (true)
{
p++;
char c = x[p];
if (c != '"')
{
sb.Append(c);
}
else
{
break;
}
}
string s = sb.ToString();
return s;
}
// ************************************************
// Main
// ************************************************
static void Main(string[] args)
{
string theFile = @"C:\temp\sample.docx";
WordprocessingDocument doc = WordprocessingDocument.Open(theFile,false);
string body_table = "DocumentFormat.OpenXml.Wordprocessing.Table";
string body_paragraph = "DocumentFormat.OpenXml.Wordprocessing.Paragraph";
Body body = doc.MainDocumentPart.Document.Body;
StreamWriter sw1 = new StreamWriter("paragraphs.log");
foreach (var b in body)
{
string body_type = b.ToString();
if (body_type == body_paragraph)
{
string str = getWordStyleValue(b.InnerXml);
if (str == "" || str == "HeadingNon-TOC" || str == "TOC1" || str == "TOC2" || str == "TableofFigures" || str == "AcronymList" )
{
continue;
}
sw1.WriteLine(str + "," + b.InnerText);
}
if ( body_type == body_table )
{
// sw1.WriteLine("Table:\n{0}",b.InnerText);
}
}
doc.Close();
sw1.Close();
}
}
}
Upvotes: 2
Views: 2570
Reputation: 53
Just pasting this Edit from original post so he has better visibility.
Here is one solution I came up with. Yes, it a little cody ( if that is a word ) but working LINQ ( my fav ) to optimize a more elegant solution.
--
Here is updated code of one way to do this. Still not overall happy with it but it works. Function to look at is getWordStyleValue(string x)
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using System;
using System.Diagnostics;
using System.IO;
using System.Text;
namespace docxparsing
{
class Program
{
// ************************************************
// grab the word style value
// ************************************************
static string getWordStyleValue(string x)
{
int p = 0;
p = x.IndexOf("w:pStyle w:val=");
if ( p == -1 )
{
return "";
}
p = p + 15;
StringBuilder sb = new StringBuilder();
while (true)
{
p++;
char c = x[p];
if (c != '"')
{
sb.Append(c);
}
else
{
break;
}
}
string s = sb.ToString();
return s;
}
// ************************************************
// Main
// ************************************************
static void Main(string[] args)
{
string theFile = @"C:\temp\sample.docx";
WordprocessingDocument doc = WordprocessingDocument.Open(theFile,false);
string body_table = "DocumentFormat.OpenXml.Wordprocessing.Table";
string body_paragraph = "DocumentFormat.OpenXml.Wordprocessing.Paragraph";
Body body = doc.MainDocumentPart.Document.Body;
StreamWriter sw1 = new StreamWriter("paragraphs.log");
foreach (var b in body)
{
string body_type = b.ToString();
if (body_type == body_paragraph)
{
string str = getWordStyleValue(b.InnerXml);
if (str == "" || str == "HeadingNon-TOC" || str == "TOC1" || str == "TOC2" || str == "TableofFigures" || str == "AcronymList" )
{
continue;
}
sw1.WriteLine(str + "," + b.InnerText);
}
if ( body_type == body_table )
{
// sw1.WriteLine("Table:\n{0}",b.InnerText);
}
}
doc.Close();
sw1.Close();
}
}
}
Upvotes: 1
Reputation: 45135
Yes. You could do something like this:
bool ContainsHeading3 = body.Descendants<ParagraphSytleId>().Any(psId => psId.Val == "Heading3");
This will look at all the ParagraphStyleId
elements (w:pStyle
in the xml) and see if any of them have the Val
of Heading3
.
Upvotes: 3