Reputation: 3
I am trying to make web scraper in C# for NSE. The code works with other sites but when ran on https://www.nseindia.com/ it gives error - An error occurred while sending the request. Unable to read data from the transport connection: Operation timed out.
I have tried with two different approaches Try1() & Try2(). Can anyone please tell what I am missing in my code?
class Program
{
public void Try1() {
HtmlWeb web = new HtmlWeb();
HttpStatusCode statusCode = HttpStatusCode.OK;
web.UserAgent = GetUserAgent();
web.PostResponse = (request, response) =>
{
if (response != null)
{
statusCode = response.StatusCode;
Console.WriteLine("Status Code: " + statusCode);
}
};
Task<HtmlDocument> task = web.LoadFromWebAsync(GetURL());
HtmlDocument document = task.Result;
}
public void Try2() {
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(GetURL());
request.UserAgent = GetUserAgent();
request.Accept= "*/*;";
using (var response = (HttpWebResponse)(request.GetResponse()))
{
HttpStatusCode code = response.StatusCode;
if (code == HttpStatusCode.OK)
{
using (StreamReader streamReader = new StreamReader(response.GetResponseStream(), Encoding.UTF8))
{
HtmlDocument htmlDoc = new HtmlDocument();
htmlDoc.OptionFixNestedTags = true;
htmlDoc.Load(streamReader);
Console.WriteLine("Document Loaded.");
}
}
}
}
private string GetURL() {
// return "https://html-agility-pack.net/";
return "https://www.nseindia.com/";
}
private string GetUserAgent() {
return "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36";
}
}
Upvotes: 0
Views: 551
Reputation: 2028
Your are lack of headers towards Accept
and others so it couldn't response back.
Besides that, I would recommend you using HttpClient
instead of HttpWebRequest
public static async Task GetHtmlData(string url)
{
HttpClient httpClient = new HttpClient();
using (var request = new HttpRequestMessage(HttpMethod.Get, new Uri(url)))
{
request.Headers.TryAddWithoutValidation("Accept", "text/html,application/xhtml+xml,application/xml, charset=UTF-8, text/javascript, */*; q=0.01");
request.Headers.TryAddWithoutValidation("Accept-Encoding", "gzip, deflate, br");
request.Headers.TryAddWithoutValidation("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36 OPR/67.0.3575.137");
request.Headers.TryAddWithoutValidation("Accept-Charset", "ISO-8859-1");
request.Headers.TryAddWithoutValidation("X-Requested-With", "XMLHttpRequest");
using (var response = await httpClient.SendAsync(request).ConfigureAwait(false))
{
response.EnsureSuccessStatusCode();
using (var responseStream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false))
using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress))
using (var streamReader = new StreamReader(decompressedStream))
{
var result = await streamReader.ReadToEndAsync().ConfigureAwait(false);
HtmlDocument htmlDoc = new HtmlDocument();
htmlDoc.OptionFixNestedTags = true;
htmlDoc.LoadHtml(result);
Console.WriteLine(result);
Console.WriteLine("Document Loaded.");
}
}
}
Use it by
await GetHtmlData("https://www.nseindia.com/");
Upvotes: 2