Reputation: 707
My VBA string is an HTML table.
Let myTable = "<table><tr><th>LocationID</th><th>Lastname</th><th>Age</th> </tr> <tr> <td>1234</td> <td>Smith</td> <td>50</td> </tr> <tr> <td>1234</td> <td>Jackson</td> <td>94</td> </tr><tr> <td>1237</td> <td>Doe</td> <td>80</td> </tr></table>
Visually looks like this
LocationID Lastname Age
1234 Smith 50
1234 Jackson 94
1237 Doe 80
I need to make sure this string has the same LocationID
for each row. How can I parse this string and use VBA to assert the first column has the same values? Is there a build in parser for html in VBA
Upvotes: 0
Views: 455
Reputation: 166341
Here's an example approach:
Sub Tester()
Dim arr
With ActiveSheet
'parse HTML (stored in a worksheet cell)
arr = HTMLTableToArray(.Range("A1").Value)
'put the array onto the sheet
.Range("B3").Resize(UBound(arr, 1), UBound(arr, 2)).Value = arr
End With
End Sub
'convert a regular HTML table (no merged cells etc) to a 2D array
Function HTMLTableToArray(HTMLText As String)
Dim o As New HTMLDocument, tbl
Dim nr As Long, nc As Long, r As Long, c As Long
o.body.innerHTML = HTMLText
DoEvents
Set tbl = o.getElementsByTagName("table")(0)
nr = tbl.Rows.Length
nc = tbl.Rows(0).Cells.Length
ReDim arr(1 To nr, 1 To nc) 'size output array
'loop over rows and cells and fill the array
For r = 1 To nr
For c = 1 To nc
arr(r, c) = tbl.Rows(r - 1).Cells(c - 1).innerText
Next c
Next r
HTMLTableToArray = arr
End Function
Note: this assumes your HTML string is valid and contains a complete HTML table with no merged cells (ie. no use of rowspan
or colspan
)
Upvotes: 2
Reputation: 84465
You could read html from file into MSHTML.HTMLDocument using Microsoft HTML Object Library reference then gather a nodeList of all the rows. Assuming there are headers in first row then loop the the other rows and test the value of the firstChild of each row.
Option Explicit
Public Sub test()
Dim html As MSHTML.HTMLDocument, firstColumnSecondRow As String, rows As Object, i As Long
Set html = GetHTMLFileContent("C:\Users\User\Desktop\test.html")
Set rows = html.querySelectorAll("tr")
Select Case rows.Length
Case 1
Debug.Print False
Exit Sub
Case 2
Debug.Print True
Case Is >= 3
firstColumnSecondRow = rows.item(1).firstChild.innerText
For i = 3 To rows.Length - 1
If rows.item(i).firstChild.innerText <> firstColumnSecondRow Then
Debug.Print False
Exit Sub
End If
Next
Debug.Print True
End Select
End Sub
Public Function GetHTMLFileContent(ByVal filePath As String) As MSHTML.HTMLDocument
'"C:\Users\User\Desktop\test.html"
Dim fso As Object, hFile As Object, hString As String, html As MSHTML.HTMLDocument
Set html = New MSHTML.HTMLDocument
Set fso = CreateObject("Scripting.FileSystemObject")
Set hFile = fso.OpenTextFile(filePath)
Do Until hFile.AtEndOfStream
hString = hFile.ReadAll()
Loop
html.body.innerHTML = hString
Set GetHTMLFileContent = html
End Function
Public Function GetHTMLFromFile(ByVal url As String) As String
Dim fso As Object, f As Object, outputString As String
Set fso = CreateObject("Scripting.FileSystemObject")
Set f = fso.OpenTextFile("C:\Users\HarrisQ\Desktop\HTML.txt", 1)
Do Until f.AtEndOfStream
outputString = f.ReadAll()
Loop
f.Close
GetHTMLFromFile = outputString
End Function
If you have the string already you can just directly assign to html document with
Set html = New MSHTML.HTMLDocument
html.body.innerHTML = yourTableString ' no need for function call to read html and return HTMLDocument
Upvotes: 2