Shane
Shane

Reputation: 1

Word Macro: Find Key Word(s) and Extract Sentence to Excel

I'm a Proposal Manager and use the below macro to search a Word file for the word "shall" and extract the sentence containing shall to Excel. It works but I can't figure out how to edit the code so it can search for more than one word in the order they appear in the file.

Example: 1. Search for "shall" or "must". 2. It shouldn't search for "shall" and then look for "must". It should search for "shall" or "must" then "shall" or "must". 3. If a paragraph has four sentences, and the first sentence contains "shall", the second contains "shall", the third contains "must", and the fourth contains "shall", the macro should extract to Excel in that order.

Sub FindWordCopySentence()
Dim appExcel As Object
Dim objSheet As Object
Dim aRange As Range
Dim intRowCount As Integer
intRowCount = 1
Set aRange = ActiveDocument.Range
With aRange.Find
    Do
        .Text = "shall" ' the word I am looking for
        .Execute
        If .Found Then
            aRange.Expand Unit:=wdSentence
            aRange.Copy
            aRange.Collapse wdCollapseEnd
            If objSheet Is Nothing Then
                Set appExcel = CreateObject("Excel.Application")
                 'Change the file path to match the location of your test.xls
                Set objSheet = appExcel.workbooks.Open("C:\Temp\test.xlsx").Sheets("Sheet1")
                intRowCount = 1
            End If
            objSheet.Cells(intRowCount, 1).Select
            objSheet.Paste
            intRowCount = intRowCount + 1
        End If
    Loop While .Found
End With
If Not objSheet Is Nothing Then
    appExcel.workbooks(1).Close True
    appExcel.Quit
    Set objSheet = Nothing
    Set appExcel = Nothing
End If
Set aRange = Nothing
End Sub

Upvotes: 0

Views: 3639

Answers (2)

xidgel
xidgel

Reputation: 3145

One way to approach this is to:

(1) Use Word's search/replace to wrap the words of interest (shall, will) with tags, e.g., shall, will. and can be anything you don't expect to be in the Word source document;

(2) Use a modified version of your FindWordCopySentence to find the tagged words, then copy the corresponding sentences to Excel; then

(3) Use Word's search/replace to clean-up (remove the tags). Or you could just close the Word doc without saving.

Here's the code with some comments to explain the details:

Option Explicit
Const START_TAG As String = "$$SWSTART_"
Const END_TAG As String = "_SWEND$$"


Sub AddTagsToShallWords()
' SHALL_WORDS is a |-delimited string of the words you want to replace
' The "[Ss]" means that the first letter can be upper or lower case (same for [Ww])
' This is designed to be extendible, e.g. you could add "must" by appending |[Mm]ust
Const SHALL_WORDS = "[Ss]hall|[Ww]ill"
Dim v As Variant
Dim I As Long
Dim s As String
Dim aRange As Range
Dim sFindText As String
Dim sReplaceText As String

' Create shall words to an array
v = Split(SHALL_WORDS, "|")

' Replace each shall word with its tagged version
For I = 0 To UBound(v)
    s = CStr(v(I))
    Set aRange = ActiveDocument.Range

    ' Create the FindText arg, e.g. "(<[Ss]hall>)"
    ' The parentheses create a "group" that we use to build the replacement text
    ' The <> are used to mark the beginning and end of words
    ' to prevent FindText="will" from matching "swill", "goodwill", etc.
    sFindText = "(<" & s & ">)"

    ' Create the ReplaceText arg. "\1" is the found text. Wrap it in the tags.
    sReplaceText = START_TAG & "\1" & END_TAG
    With aRange.Find
        .MatchWildcards = True
        .Execute FindText:=sFindText, ReplaceWith:=sReplaceText, Replace:=wdReplaceAll
    End With
Next I
Set aRange = Nothing
End Sub



Sub RemoveTags()
Dim aRange As Range
Dim sFindText As String
Dim sReplaceText As String

Set aRange = ActiveDocument.Range
sFindText = START_TAG & "(*)" & END_TAG
sReplaceText = "\1"
With aRange.Find
    .MatchWildcards = True
    .Execute FindText:=sFindText, ReplaceWith:=sReplaceText, Replace:=wdReplaceAll
End With
Set aRange = Nothing
End Sub



Sub FindWordCopySentence()
Dim appExcel As Object
Dim objSheet As Object
Dim aRange As Range
Dim intRowCount As Integer
Dim s As String
intRowCount = 1
Set aRange = ActiveDocument.Range
With aRange.Find
    .MatchWildcards = True
    Do
        .Text = START_TAG & "*" & END_TAG ' the word I am looking for
        .Execute
        If .Found Then
            aRange.Expand Unit:=wdSentence
            s = aRange.Text
            s = Replace(s, START_TAG, "")
            s = Replace(s, END_TAG, "")
            aRange.Collapse wdCollapseEnd
            If objSheet Is Nothing Then
                Set appExcel = CreateObject("Excel.Application")
                 'Change the file path to match the location of your test.xls
                Set objSheet = appExcel.workbooks.Open("C:\Temp\test.xlsx").Sheets("Sheet1")
                intRowCount = 1
            End If
            objSheet.Cells(intRowCount, 1).Formula = s
            intRowCount = intRowCount + 1
        End If
    Loop While .Found
End With
If Not objSheet Is Nothing Then
    appExcel.workbooks(1).Close True
    appExcel.Quit
    Set objSheet = Nothing
    Set appExcel = Nothing
End If
Set aRange = Nothing
End Sub

Hope that helps

Upvotes: 0

macropod
macropod

Reputation: 13490

A fairly basic problem you'll likely encounter is that VBA has no idea what a grammatical sentence is. For example, consider the following:

Mr. Smith spent $1,234.56 at Dr. John's Grocery Store, to buy: 10.25kg of potatoes; 10kg of avocados; and 15.1kg of Mrs. Green's Mt. Pleasant macadamia nuts.

For you and me, that would count as one sentence; for VBA it counts as 5 sentences. Accordingly, the following macro simply captures the whole of the paragraphs concerned. Much of the code is concerned with establishing that the workbook and worksheet exist; I haven't included error-checking as to whether the file might already be opened, though.

Sub Demo()
'Note: This code requires a VBA reference to the Excel object library
Dim xlApp As New Excel.Application, xlWkBk As Excel.Workbook
Dim xlSht As Excel.Worksheet, StrWkBkNm As String, StrWkSht As String
Dim lRow As Long, Para As Paragraph
StrWkBkNm = "C:\Temp\test.xlsx": StrWkSht = "Sheet1"
If Dir(StrWkBkNm) = "" Then
  MsgBox "Cannot find the designated workbook: " & StrWkBkNm, vbExclamation
  Exit Sub
End If
With xlApp
  .Visible = True
  ' The file is available, so open it.
  Set xlWkBk = .Workbooks.Open(FileName:=StrWkBkNm, ReadOnly:=False, AddToMru:=False)
  If xlWkBk Is Nothing Then
    MsgBox "Cannot open:" & vbCr & StrWkBkNm, vbExclamation
    .Quit
    Exit Sub
  End If
  ' Process the workbook.
  With xlWkBk
    'Ensure the worksheet exists
    If SheetExists(StrWkSht) = True Then
    Set xlSht = .Worksheets(StrWkSht)
      With xlSht
        ' Find the last-used row in column A.
        lRow = .Cells(.Rows.Count, 1).End(-4162).Row ' -4162 = xlUp
      End With
      For Each Para In ActiveDocument.Paragraphs
        With Para
          If (InStr(.Range.Text, "shall") > 0) Or (InStr(.Range.Text, "shall") > 0) Then
            lRow = lRow + 1
            xlSht.Range("A" & lRow).Value = .Range.Text
          End If
        End With
      Next
    Else
      MsgBox "Cannot find the designated worksheet: " & StrWkSht, vbExclamation
      .Close False
      xlApp.Quit
    End If
  End With
End With
' Release Excel object memory
Set xlSht = Nothing: Set xlWkBk = Nothing: Set xlApp = Nothing
End Sub

Upvotes: 1

Related Questions