dxlguru
dxlguru

Reputation: 53

C# OpenXml Get DOCX WordStyle Property Simplified Code

Just curious if there is a more simplified version to check if the given body has the word style of "Heading3" applied given this sample C# code I wrote learning the OpenXML library. To be clear, I am just asking given a body element how can I determine if the given body element has what word style applied. I eventually have to write a program that process numerous .DOCX files and need to process them from a top to bottom approach.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using System.IO;

namespace docxparsing
{
   class Program
   {
    static void Main()
    {
        string file_to_parse = @"C:\temp\sample.docx";

        WordprocessingDocument doc = WordprocessingDocument.Open(file_to_parse,false);

        Body body = doc.MainDocumentPart.Document.Body;

        string fooStr
        foreach( var foo in body )
        {
            fooStr = foo.InnerXml;

            /*
                            these 2 comments represent 2 different xml snippets from 'fooStr'. the only way i figure out how to get the word style is by reading
                                    this xml and doing checks for values. i don't know of any other approach in using the body element to check for the applied word style

                <w:pPr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"><w:pStyle w:val="Heading2" />
                <w:pPr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"><w:pStyle w:val="Heading3" />
            */

            bool hasHeading3 = fooStr.Contains("pStyle w:val=\"Heading3\"");

            if ( hasHeading3 )
            {
                Console.WriteLine("heading3 found");
            }
        }

        doc.Close();
    }
}

}

// -------------------------------------------------------------------------------

EDIT

Here is updated code of one way to do this. Still not overall happy with it but it works. Function to look at is getWordStyleValue(string x)

using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using System;
using System.Diagnostics;
using System.IO;
using System.Text;


namespace docxparsing
{
    class Program
    {
        // ************************************************
        // grab the word style value
        // ************************************************
        static string getWordStyleValue(string x)
        {
            int p = 0;
            p = x.IndexOf("w:pStyle w:val=");
            if ( p == -1 )
            {
                return "";
            }
            p = p + 15;

            StringBuilder sb = new StringBuilder();
            while (true)
            {
                p++;
                char c = x[p];
                if (c != '"')
                {
                    sb.Append(c);
                }
                else
                {
                    break;
                }
            }

            string s = sb.ToString();
            return s;
        }


        // ************************************************
        // Main
        // ************************************************
        static void Main(string[] args)
        {
            string theFile = @"C:\temp\sample.docx";
            WordprocessingDocument doc =  WordprocessingDocument.Open(theFile,false);

            string body_table     = "DocumentFormat.OpenXml.Wordprocessing.Table";
            string body_paragraph = "DocumentFormat.OpenXml.Wordprocessing.Paragraph";

            Body body = doc.MainDocumentPart.Document.Body;
            StreamWriter sw1 = new StreamWriter("paragraphs.log");

            foreach (var b in body)
            {
                string body_type = b.ToString();

                if (body_type == body_paragraph)
                {
                    string str = getWordStyleValue(b.InnerXml);

                    if (str == "" || str == "HeadingNon-TOC" || str == "TOC1" || str == "TOC2" || str == "TableofFigures" || str == "AcronymList" )
                    {
                        continue;
                    }

                    sw1.WriteLine(str + "," + b.InnerText);
                }

                if ( body_type == body_table )
                {
             //       sw1.WriteLine("Table:\n{0}",b.InnerText);
                }
            }

            doc.Close();
            sw1.Close();
        }
    }
}

Upvotes: 2

Views: 2570

Answers (2)

dxlguru
dxlguru

Reputation: 53

Just pasting this Edit from original post so he has better visibility.

Here is one solution I came up with. Yes, it a little cody ( if that is a word ) but working LINQ ( my fav ) to optimize a more elegant solution.

--

Here is updated code of one way to do this. Still not overall happy with it but it works. Function to look at is getWordStyleValue(string x)

using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using System;
using System.Diagnostics;
using System.IO;
using System.Text;


namespace docxparsing
{
    class Program
    {
        // ************************************************
        // grab the word style value
        // ************************************************
        static string getWordStyleValue(string x)
        {
            int p = 0;
            p = x.IndexOf("w:pStyle w:val=");
            if ( p == -1 )
            {
                return "";
            }
            p = p + 15;

            StringBuilder sb = new StringBuilder();
            while (true)
            {
                p++;
                char c = x[p];
                if (c != '"')
                {
                    sb.Append(c);
                }
                else
                {
                    break;
                }
            }

            string s = sb.ToString();
            return s;
        }


        // ************************************************
        // Main
        // ************************************************
        static void Main(string[] args)
        {
            string theFile = @"C:\temp\sample.docx";
            WordprocessingDocument doc =  WordprocessingDocument.Open(theFile,false);

            string body_table     = "DocumentFormat.OpenXml.Wordprocessing.Table";
            string body_paragraph = "DocumentFormat.OpenXml.Wordprocessing.Paragraph";

            Body body = doc.MainDocumentPart.Document.Body;
            StreamWriter sw1 = new StreamWriter("paragraphs.log");

            foreach (var b in body)
            {
                string body_type = b.ToString();

                if (body_type == body_paragraph)
                {
                    string str = getWordStyleValue(b.InnerXml);

                    if (str == "" || str == "HeadingNon-TOC" || str == "TOC1" || str == "TOC2" || str == "TableofFigures" || str == "AcronymList" )
                    {
                        continue;
                    }

                    sw1.WriteLine(str + "," + b.InnerText);
                }

                if ( body_type == body_table )
                {
             //       sw1.WriteLine("Table:\n{0}",b.InnerText);
                }
            }

            doc.Close();
            sw1.Close();
        }
    }
}

Upvotes: 1

Matt Burland
Matt Burland

Reputation: 45135

Yes. You could do something like this:

bool ContainsHeading3 = body.Descendants<ParagraphSytleId>().Any(psId => psId.Val == "Heading3");

This will look at all the ParagraphStyleId elements (w:pStyle in the xml) and see if any of them have the Val of Heading3.

Upvotes: 3

Related Questions