QRUSH
QRUSH

Reputation: 81

How to get paragraphs under each Header using Word.Interop

I have some sample text like this:

1 Header
   bla bla bla...
   bla bla bla...
   1.1 SubHeader
       bla bla bla...
       bla bla bla...

I read each page of the .docx file and I want to store the paragraphs under headers and the header as dictionary:

{"1 Header", "bla bla bla"...},
{"1.1 Subheader", "bla bla bla"...},

where the key is a string (header name) and the value is Paragraphs or List<Paragraph>

I know that this can be done with Selection(), but I don't know how to use it.

for (int i = 1; i<=paragraphCollection.Count;i++)
{
    if (isHeader(paragraphCollection[i].Range.Text) || isSubHeader(paragraphCollection[i].Range.Text))
    {
       //collect paragraphs until we find another header or subheader                 
    }
}

Upvotes: 3

Views: 725

Answers (3)

Smith
Smith

Reputation: 183

I'm not sure this is what you want. My code is far more monstrous.

public List<Dictionary<Tuple<int, String>, List<string>>> MyList = new List<Dictionary<Tuple<int, string>, List<string>>>();

public void IteratingHeadingsForDoSomething()
{            
    Array listHeadingsString = (Array)(Object)this.ThisApplication.ActiveDocument.GetCrossReferenceItems(WdReferenceType.wdRefTypeHeading); // Collection of heading strings, No location info            
    int headingsCount = listHeadingsString.Length;

    

    ThisApplication.Selection.GoTo(What: Word.WdGoToItem.wdGoToHeading, Which: Word.WdGoToDirection.wdGoToFirst); //goto first heading
    for (int i=1; i <= headingsCount; i++)
    {
        Dictionary<Tuple<int, string>, List<string>> myDictionary = new Dictionary<Tuple<int, string>, List<string>>();
        List<string> paras = new List<string>();

        string headingString = listHeadingsString.GetValue(i).ToString();
        int headingOutlineLevel = (int)(Word.WdOutlineLevel)ThisApplication.Selection.Range.Paragraphs.OutlineLevel;
        Tuple<int, string> heading = new Tuple<int, string>  (headingOutlineLevel, headingString) ;

        Debug.WriteLine(headingString, headingOutlineLevel);
        
        SelectNextParagraph();
        Word.Selection currSelection = ThisApplication.Selection;
        if (IsOutlineLevel(ref currSelection) == true)
        {
            Debug.WriteLine("This is a heading too"); // >>> Skip storing??
            GotoPreviousHeading(); // go back to previous heading
        }
        else
        {
            currSelection = ThisApplication.Selection;
            
            while (IsOutlineLevel(ref currSelection) == false)                        
            {
                if (currSelection.Range.End == ThisApplication.ActiveDocument.Paragraphs.Last.Range.End)  // Check if reached end of doc)
                {
                    break;    
                }
                String para = ThisApplication.Selection.Range.Text;
                paras.Add(para);
                Debug.WriteLine(para);
                SelectNextParagraph();
            }
            GotoPreviousHeading();
        }
        //if (paras.Count == 0) paras.Add("");
        myDictionary.Add(heading, paras);
        MyList.Add(myDictionary);
        GoToNextHeading();
    }
    Debug.WriteLine(MyList.ToString());
}

private void GotoPreviousHeading()
{
    ThisApplication.Selection.GoTo(What: Word.WdGoToItem.wdGoToHeading, Which: Word.WdGoToDirection.wdGoToPrevious);
}
private void GoToNextHeading()
{
    ThisApplication.Selection.GoTo(What: Word.WdGoToItem.wdGoToHeading, Which: Word.WdGoToDirection.wdGoToNext);
}

private void SelectNextParagraph() 
{
    ThisApplication.Selection.Next(Unit: Word.WdUnits.wdParagraph, Count: 1).Select();
}

private bool IsOutlineLevel(ref Word.Selection mySelection)
{
    if (mySelection.Range.Paragraphs.OutlineLevel == Word.WdOutlineLevel.wdOutlineLevelBodyText)
    {
        return false;
    } 
    else 
    {
        return true;
    }
}

Result

Upvotes: 2

QRUSH
QRUSH

Reputation: 81

It looks like I've solved the problem, and all thanks to @Smith.

But my code looks ugly.

static void iterateEachHeading() {
    headingAndParagraphs = new Dictionary < Paragraph,
    Paragraphs > ();

    var listOfEachHeading = doc.GetCrossReferenceItems(WdReferenceType.wdRefTypeHeading);

    var headingsCount = listOfEachHeading.Length;

    app.Selection.GoTo(What: WdGoToItem.wdGoToHeading, Which: WdGoToDirection.wdGoToFirst);

    for (int i = 1; i <= headingsCount; i++) {
        Console.WriteLine(listOfEachHeading[i]);
        Console.WriteLine(app.Selection.Range.Paragraphs.OutlineLevel);
        SelectNextParagraph();
        if (isHeader(app.Selection)) {
            GotoPreviousHeading();
        }
        else {
            while (!isHeader(app.Selection)) //
            {
                Console.WriteLine(app.Selection.Range.Text);
                if (app.Selection.Next(Unit: WdUnits.wdParagraph, Count: 1) != null) {
                    SelectNextParagraph();
                }
                else {
                    break;
                }
            }
            GotoPreviousHeading();
        }
        GoToNextHeading();
    }
}

static Paragraph getHeadingAsParagraph() {
    return app.Selection.Range.Paragraphs.First;
}

static void SelectPreviousParagraph() {
    app.Selection.Previous(Unit: WdUnits.wdParagraph, Count: 1).Select();
}

static void GoToNextHeading() {
    app.Selection.GoTo(What: WdGoToItem.wdGoToHeading, Which: WdGoToDirection.wdGoToNext);
}

static bool isHeader(Selection mySelection) {
    return ! (mySelection.Range.Paragraphs.OutlineLevel == WdOutlineLevel.wdOutlineLevelBodyText);
}

static void SelectNextParagraph() {
    app.Selection.Next(Unit: WdUnits.wdParagraph, Count: 1).Select();
}

static void GotoPreviousHeading() {
    app.Selection.GoTo(What: WdGoToItem.wdGoToHeading, Which: WdGoToDirection.wdGoToPrevious);
}

static void Main(string[] args) {
    app = new Microsoft.Office.Interop.Word.Application();

    doc = app.Documents.Open("C:/Users/Demoss77/Desktop/test2.docx", Visible: true, ReadOnly: true);

    iterateEachHeading();

    app.Quit();
}

The second version of my code, like a beautiful one.

static void iterateEachHeading() {
    
    var listOfEachHeading = doc.GetCrossReferenceItems(WdReferenceType.wdRefTypeHeading);

    var headingsCount = listOfEachHeading.Length;

    for (int i = 1; i <= headingsCount; i++) {
        var start = app.Selection.GoTo(What: WdGoToItem.wdGoToHeading, Which: WdGoToDirection.wdGoToAbsolute, Count: i.ToString()).Start; //set position to the first heading

        var end = app.Selection.GoTo(What: WdGoToItem.wdGoToHeading, Which: WdGoToDirection.wdGoToAbsolute, Count: (i + 1).ToString()).End; //set position to the next heading

        Paragraphs paragraphs;

        if (start < end) {
            paragraphs = doc.Range(start, end).Paragraphs;
        }
        else {
            paragraphs = doc.Range(start).Paragraphs;
        }

        foreach(Paragraph paragraph in paragraphs) {
            Console.WriteLine(paragraph.Range.Text);
        }
    }
}

Upvotes: 3

Smith
Smith

Reputation: 183

I usually try an algorithm with VBA and translate it to C#..

C# is strong but when it comes to Object reference to MS Word, VBA is better.

Read my code and translate it to your C# code. I think iterating through all paragraphs is not preferable.

(Updated) this algorithm is for a dictionary up to 2d structure.

If you want a structure like JSON, you need to parse the document. In that case I think it'll better to parse .docx xml structure than using MS word objects and methods.

Plus, I did not considered where the first paragraph of the document is not a heading.

Sub IteratingHeadingsForDoSomething()

listHeadingsString = ActiveDocument.GetCrossReferenceItems(wdRefTypeHeading) ' Collection of heading strings, No location info
headingsCount = UBound(listHeadingsString)

Selection.GoTo What:=wdGoToHeading, Which:=wdGoToFirst 'goto first heading
For i = 1 To headingsCount Step 1
    
    Debug.Print listHeadingsString(i) ' put this to the list
    Debug.Print Selection.Range.Paragraphs.OutlineLevel ' use outline level
    
    SelectNextParagraph
    If IsOutlineLevel(Selection) = True Then
        ' para under the heading is also a heading
        Debug.Print "This is a heading too" ' >>> Skip storing??
        GotoPreviousHeading   ' go back to previous heading
    Else
        Do Until IsOutlineLevel(Selection) = True
            Debug.Print Selection.Range.Text '>>> Store paragraph separately or concatenatively
            SelectNextParagraph
        Loop
        GotoPreviousHeading
    End If
    
    
    GoToNextHeading
Next i

End Sub
'//////////////////////////////////////////
Sub GoToNextHeading()

    Selection.GoTo What:=wdGoToHeading, Which:=wdGoToNext

End Sub
'//////////////////////////////////////////
Sub GotoPreviousHeading()

     Selection.GoTo What:=wdGoToHeading, Which:=wdGoToPrevious

End Sub
'//////////////////////////////////////////
Sub SelectNextParagraph()

    Selection.Next(Unit:=wdParagraph, Count:=1).Select
    
End Sub
'//////////////////////////////////////////
Function IsOutlineLevel(mySelection As Selection) As Boolean

    If mySelection.Range.Paragraphs.OutlineLevel = wdOutlineLevelBodyText Then
        IsOutlineLevel = False
    Else
        IsOutlineLevel = True
    End If

End Function

Upvotes: 2

Related Questions