Reputation: 49
I got a little powershell problem troubling me for quite a while now.
Im trying to get information from a RSS site. I download the XML and go through it. I just want certain stuff from it. That for I use .document.getElementByID().outerText
The problem is that somehow it pulls the first information correctly but after that everything fails he just picks random text or just keeps the one text from the beginning without refreshing the variable. Also Powershell ISE says "You cannot call a method on a null-valued expression." randomly.
Here is my code:
<#
AUTHOR: KOCH,MICHAEL [GRE-IT]
DESCRIPTION: RSS READER
DATE: 28.06.17
DATE LAST WRITTEN: 19.07.17
LAST CHANGE:
#>
$debug = 1 #DEBUG
$receiver="[email protected]"
$sender="[email protected]"
$smtp="A.SMTP.SERVER"
$encoding = [System.Text.Encoding]::UTF8
$path_config = "C:\RSS\Zoll\config.txt"
$output = "C:\RSS\Zoll\meldung.html"
$output_edit_path = "C:\RSS\Zoll\meldung_edit.html"
$nmbr=0
$count=0
Invoke-WebRequest -Uri 'http://www.zoll.de/SiteGlobals/Functions/RSSFeed/DE/RSSNewsfeed/RSSZollImFokus.xml' -OutFile C:\RSS\Zoll\meldungen.xml -ErrorAction Stop
[xml]$content = Get-Content C:\RSS\Zoll\meldungen.xml
$feed = $content.rss.channel
$tag = @()
if($lines=Get-Content $path_config | Measure-Object -Line) #gets the number of lines
{
while($count -ne $lines.Lines)
{
if($entrys=(Get-Content $path_config)[$nmbr]) #gets the entrys from config.txt and goes through line by line
{
$entrys >> $tag[$nmbr]
if ($debug -eq 1)
{
Write-Output "$tag[$nmbr]"
Write-Output "$entrys"
Write-Output "$count"
}
}
$count++
$nmbr++ #jumps into the next line
}
}
$ie = New-Object -ComObject "InternetExplorer.Application"
Foreach($msg in $feed.Item)
{
$link = ($msg.link)
$subject = ($msg.title)
$ie.navigate("$link")
#$return = Invoke-WebRequest -Uri $link -OutFile "C:\RSS\Zoll\link.html"
$return = $ie.document
$innertext = $return.documentElement.document.getElementById("main").outerText
$body = $innertext#.Replace('Ä', 'Ä')
<#
$body = $innertext.Replace('ä', 'ä')
$body = $innertext.Replace('Ö', 'Ö')
$body = $innertext.Replace('ö', 'ö')
$body = $innertext.Replace('Ü', 'Ü')
$body = $innertext.Replace('ü', 'ü')
$body = $innertext.Replace('ß', 'ß')
#>
if ($debug -eq 1)
{
Write-Output "Subject $subject"
Write-Output "Tag $tag"
Write-Output "Link $link"
Write-Output $body
#exit
}
if($link -match "Zigaretten") #searchs in the <link> for the string "Zigaretten"
{
if($subject -match $tag) #searches for the specified tag in config.txt !!! only one argument per line !!!
{
if($mail = Send-MailMessage -From "$sender" -To "$receiver" -Subject "Zoll Meldung: $subject" -Body "$body" -SmtpServer "$smtp" -BodyAsHtml -encoding $encoding)
{
if($debug -eq 1)
{
Write-Output "$tag"
Write-Output "Send. Tag = $tag"
}
Write-Output "Send."
}
}
}
else
{
Write-Host "Empty."
}
}
$ie.Quit()
[System.Runtime.Interopservices.Marshal]::ReleaseComObject($ie)
Remove-Variable ie
Upvotes: 0
Views: 2381
Reputation: 49
Added a wait if busy loop to make sure IE loads the full html document. Thats the solution of the problem ! :)
<#
AUTHOR: KOCH,MICHAEL [GRE-IT]
DESCRIPTION: RSS READER
DATE: 28.06.17
DATE LAST WRITTEN: 20.07.17
LAST CHANGE: ADDED WAIT IF BUSY !
#>
$debug = 0 #DEBUG
$receiver="[email protected]"
$sender="[email protected]"
$smtp="A.SMTP.SERVER"
$encoding = [System.Text.Encoding]::UTF8
$path_config = "C:\RSS\Zoll\config.txt"
$output = "C:\RSS\Zoll\meldung.html"
$output_edit_path = "C:\RSS\Zoll\meldung_edit.html"
$nmbr=0
$count=0
Invoke-WebRequest -Uri 'http://www.zoll.de/SiteGlobals/Functions/RSSFeed/DE/RSSNewsfeed/RSSZollImFokus.xml' -OutFile C:\RSS\Zoll\meldungen.xml -ErrorAction Stop
[xml]$content = Get-Content C:\RSS\Zoll\meldungen.xml
$feed = $content.rss.channel
$tag = @()
if($lines=Get-Content $path_config | Measure-Object -Line) #gets the number of lines
{
while($count -ne $lines.Lines)
{
if($entrys=(Get-Content $path_config)[$nmbr]) #gets the entrys from config.txt and goes through line by line
{
$entrys >> $tag[$nmbr]
if ($debug -eq 1)
{
Write-Output "$tag[$nmbr]"
Write-Output "$entrys"
Write-Output "$count"
}
}
$count++
$nmbr++ #jumps into the next line
}
}
$ie = New-Object -ComObject InternetExplorer.Application #creates new ComObject IE
Foreach($msg in $feed.Item)
{
$link = ($msg.link)
$subject = ($msg.title)
if ($debug -eq 1)
{
$ie.visible = $true
}
$ie.navigate("$link") #navigate with Internetexplorer to the website
while ($ie.busy -and $ie.ReadyState -ne 4){ sleep -Milliseconds 200 } # if getting the website from IE.navigate is still .busy wait 200 milliseconds
$return = $ie.document
$innertext = $return.documentelement.document.IHTMLDocument3_getElementById("main").outerText #gets the outer text from the div with the element ID "main"
while ($innertext.busy -and $innertext.ReadyState -ne 4){ sleep -Milliseconds 200 } # if getting Text is .busy wait 200 milliseconds
$body = $innertext
if ($debug -eq 1)
{
Write-Output "Subject $subject"
Write-Output "Tag $tag"
Write-Output "Link $link"
Write-Output "INNERTEXT $innertext"
Write-Output "BODY $body"
#exit
}
if($link -match "Zigaretten") #searchs in the <link> for the string "Zigaretten"
{
if($subject -match $tag) #searches for the specified tag in config.txt !!! only one argument per line !!!
{
if($mail = Send-MailMessage -From "$sender" -To "$receiver" -Subject "Zoll Meldung: $subject" -Body "$body" -SmtpServer "$smtp" -BodyAsHtml -encoding $encoding)
{
Write-Output "Send."
}
}
}
else
{
Write-Host "Empty."
}
}
$ie.Quit() #----|
[System.Runtime.Interopservices.Marshal]::ReleaseComObject($ie) # ---> Quits the Internet Explorer Session otherwise there are to many IE.exe open and no more ID's left
Remove-Variable ie
Upvotes: 2