Androme
Androme

Reputation: 2449

Scraping ASP.NET page, simulate click

I am trying to scrape alle pages of http://www.menorcarentals.com/en/villas but i am having some problems, as it give me the first page every time. My approach is the find all inputs and selects on the page, and the set the value of __EVENTTARGET to the value of the button i want to click, which have worked before, but this site just won't budge.

Method to get all Input Fields

public static Dictionary<string, string> GetInputFields(CQ dom)
{
    Dictionary<string, string> result = new Dictionary<string, string>();
    foreach (var v in dom.Find("input"))
    {
        var value = v.Cq().Attr("value");
        var key = v.Cq().Attr("name");
        if (!string.IsNullOrWhiteSpace(value))
        {
            if (!result.ContainsKey(key))
            {
                result.Add(key, value);
            }
            else
            {
                result[key] = value;
            }
        }
    }

    // Get all selects
    foreach (var s in dom.Select("select"))
    {
        var select = s.Cq();
        var key = select.Attr("name");
        foreach (var option in select.Children("option"))
        {
            var opt = option.Cq();
            if(!string.IsNullOrWhiteSpace(opt.Attr("selected")))
            {
                if (!result.ContainsKey(key))
                {
                    result.Add(key, opt.Val());
                }
                else
                {
                    result[key] = opt.Val();
                }
            }
        }
    }

    return result;
}

My code to run though the different pages

    string searchPageUrl = "http://www.menorcarentals.com/en/villas";
    var html = DownloadHelper.Download(searchPageUrl);
    while (true)
    {
        CQ dom = html;

        // parse page and get info i need here

        // Find the next page
        var pagination = dom.Select("#ctl00_Content_dpVillas").Children();
        bool foundCurrent = false;
        string clickElementName = string.Empty;
        foreach (var pagi in pagination)
        {
            if (pagi.Classes.Any(x=>x.ToLower() == "current"))
            {
                foundCurrent = true;
            }
            else if (foundCurrent)
            {
                var href = pagi.Cq().Attr("href");
                clickElementName = RegexHelper.Match(@"doPostBack\(\'([^']+)", href);
                break;
            }
        }
        if (string.IsNullOrWhiteSpace(clickElementName))
        {
            break; // no more pages
        }
        var inputFields = ScraperHelper.GetInputFields(html);

        // Simulate that we click the next button
        if (!inputFields.ContainsKey("__EVENTTARGET"))
            inputFields.Add("__EVENTTARGET", String.Empty);
        inputFields["__EVENTTARGET"] = clickElementName;

        html = DownloadHelper.Post(searchPageUrl, inputFields);
    }

Upvotes: 1

Views: 1530

Answers (1)

nazarkin659
nazarkin659

Reputation: 503

Turn off your JavaScript along with cookies in browser (delete cookies before turning off) and than see the actual page that CsQuery will use.

This might be the result of why you can't parse anything, For example an actual content of the page loads with AJAX.

Upvotes: 0

Related Questions