Reputation: 9568
In the following working code, I am trying to navigate to specific youtube channel To get the videos names into excel .. It is working but partially as the code just lists about 30 videos only
Dim x, html As Object, ele As Object, sKeyWords As String, i As Long
With CreateObject("MSXML2.ServerXMLHTTP")
.Open "GET", "youtube channel url videos", False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send
If .Status <> 200 Then MsgBox "Problem" & vbNewLine & .Status & " - " & .statusText: Exit Sub
Set html = CreateObject("htmlfile")
html.body.innerHTML = .responseText
How can I maje the code load all the content of the page ..? so as to get all the videos that are listed there.
I have found a site that lists all the videos in one table but as for the part of scraping the table, I failed to extract the video name or even dealing with the table Here's my try
Sub Post_Method()
Dim http As New XMLHTTP60
Dim html As New HTMLDocument
Dim htmla As Object
Dim trow As Object
Dim tcel As Object
Dim strArg As String
Dim c As Long
Dim x As Long
strArg = "inputType=1&stringInput=https%3A%2F%2Fwww.youtube.com%2Fchannel%2FUC43lrLHl4EhxaKQn2HrcJPQ&limit=100&keyType=default&customKey="
With http
.Open "POST", "https://youtube-playlist-analyzer.appspot.com/submit", False
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send strArg
html.body.innerHTML = .responseText
' WriteTxtFile html.body.innerHTML
End With
Dim posts As Object, elem As Object, r As Long
'This part I can't adjust
'------------------------
Set posts = html.getElementById("container").getElementById("tableContainer").getElementById("tableData")
For Each elem In posts.Children
For Each trow In elem.Cells
c = c + 1: Cells(r + 1, c) = trow.innerText
Next trow
c = 0: r = r + 1
Next elem
'----------------------------------
Stop
End Sub
Upvotes: 3
Views: 839
Reputation: 84475
You can use that endpoint then extract the javascript object from the response which contains data of interest and parse with jsonconverter.bas.
Json library:
I use jsonconverter.bas. Download raw code from here and add to standard module called JsonConverter . You then need to go VBE > Tools > References > Add reference to Microsoft Scripting Runtime. Remove the top Attribute line from the copied code.
VBA:
Option Explicit
Public Sub GetYouTubeViews()
Dim s As String, ws As Worksheet, body As String
body = "inputType=1&stringInput=https://www.youtube.com/channel/UC43lrLHl4EhxaKQn2HrcJPQ&limit=100&keyType=default"
Set ws = ThisWorkbook.Worksheets("Sheet1")
With CreateObject("MSXML2.XMLHTTP")
.Open "POST", "https://youtube-playlist-analyzer.appspot.com/submit", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send body
s = .responseText
End With
Dim results(), r As Long, jsonSource As String
Dim json As Object, item As Object, headers()
jsonSource = GetString(s, "json_items = ", ";")
If jsonSource = "No match" Then Exit Sub
Set json = JsonConverter.ParseJson(jsonSource)
headers = Array("Title", "ViewCount")
ReDim results(1 To json.Count, 1 To UBound(headers) + 1)
For Each item In json
r = r + 1
results(r, 1) = item("title")
results(r, 2) = item("viewCount")
Next
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetString(ByVal inputString As String, ByVal startPhrase As String, ByVal endPhrase As String) As String
Dim s As Long, e As Long
s = InStr(inputString, startPhrase)
If Not s > 0 Then
GetString = "No match"
Exit Function
End If
e = InStr(s + Len(startPhrase) - 1, inputString, endPhrase)
If Not e > 0 Then
GetString = "No match"
Exit Function
End If
GetString = Mid$(inputString, s + Len(startPhrase), e - (s + Len(startPhrase)))
End Function
Sample results:
Py:
A lot more concise with python
import requests, re, json ,csv
data = {
'inputType': '1',
'stringInput': 'https://www.youtube.com/channel/UC43lrLHl4EhxaKQn2HrcJPQ',
'limit': '100',
'keyType': 'default'
}
r = requests.post('https://youtube-playlist-analyzer.appspot.com/submit', data=data)
p = re.compile(r'json_items = (.*?);', re.DOTALL)
results = json.loads(p.findall(r.text)[0])
with open("data.csv", "w", encoding="utf-8-sig", newline='') as csv_file:
w = csv.writer(csv_file, delimiter = ",", quoting=csv.QUOTE_MINIMAL) #change this for locale
w.writerow(['Title','ViewCount'])
for item in results:
w.writerow([item['title'], item['viewCount']])
Upvotes: 3