Reputation: 95
I am trying to get the html page from this link and store the content into a specific file in C# using the HAP Class library. I amusing the Get method of the HtmlWeb class. It compiles and runs perfectly fine but the "file.txt" is never created. Here is the class and its client.Can anyone help please:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using HtmlAgilityPack;
namespace WebCrawler
{
class Crawler
{
public Crawler() { }
public Crawler(string Url)
{
this.Url = Url;
HtmlWeb page = new HtmlWeb();
Console.WriteLine(Url);
HtmlDocument doc = page.Load(Url);
page.Get(Url, "file.txt");
}
public string Url
{
get;
set;
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using HtmlAgilityPack;
namespace WebCrawler
{
class Program
{
static void Main(string[] args)
{
Crawler crawler = new Crawler("https://code.google.com/p/abot/");
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using HtmlAgilityPack;
namespace WebCrawler
{
class Program
{
static void Main(string[] args)
{
Crawler crawler = new Crawler("https://code.google.com/p/abot/");
}
}
}
Thanks
Upvotes: 1
Views: 8354
Reputation: 5047
You have to call Save method for object of HtmlDocument type. Here's example of loading index page of Google website and saving it to out.html file.
const string url = "http://google.com";
HtmlWeb page = new HtmlWeb();
HtmlDocument document = page.Load(url);
page.Get(url, "/");
document.Save("out.html");
Upvotes: 1
Reputation: 3624
Why not just do something like this
System.IO.File.WriteAllText(@"c:\file.txt", doc.DocumentNode.OuterHtml);
Upvotes: 2