Reputation: 227
I have some code using HTML Agility Pack. What i am trying to do is, replace all links in html contents.
For example: I will replace
http://oldserver/Documents/1.pdf
to
http://newserver/Documents/2.pdf
I can enumerate all links and can get their values but when i do doc.Save() it saves the original html source. Not the updated html. How can i get the updated html from HtmlDocument.
private string FixHyperlinks(string contentHtml, SPWeb web)
{
TextReader reader = new StringReader(contentHtml);
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.Load(reader);
List<string> hrefTags = new List<string>();
foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
{
HtmlAttribute att = link.Attributes["href"];
att.Value = RepairHyperlinkAddress(att.Value, web);
}
MemoryStream memoryStream = new MemoryStream();
doc.Save(memoryStream);
memoryStream.Seek(0, System.IO.SeekOrigin.Begin);
StreamReader streamReader = new StreamReader(memoryStream);
string result = streamReader.ReadToEnd();
return result;
}
Upvotes: 3
Views: 6337
Reputation: 139306
This should work better:
foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
{
HtmlAttribute att = link.Attributes["href"];
att.Value = RepairHyperlinkAddress(att.Value, web);
}
foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//img[@src]"))
{
HtmlAttribute att = link.Attributes["src"];
att.Value = RepairHyperlinkAddress(att.Value, web);
}
Upvotes: 3