Reputation: 125
I'm trying to write a crawler for a specific website. At some point I have to click an link. The element is found but the click always fails. I also tried to it via the parent element with the same result. All other elements before can be accessed without problems and also clicks on them work fine. Has anbody an idea what I'm doing wrong?
public class KauflandAngebotScraperPW : IScraper, IDisposable
{
private const string URL = "https://www.kaufland.de/";
private const int KETTENID = 1;
private static SemaphoreSlim _lock = new SemaphoreSlim(initialCount: 1);
private IPlaywright _playwright;
private string _dir;
private ShopprContext _db = new ShopprContext();
private ShopprContextProcedures _procedures;
private CancellationTokenSource _ctcCrawler;
private bool _crawlerRunning = false;
private Subject<KauflandAngebotScraperPW> _scrapingFinished = new Subject<KauflandAngebotScraperPW>();
private Subject<string> _message = new Subject<string>(); // The actual data stream
private bool disposedValue;
public bool CrawlerRunning { get => _crawlerRunning; set => _crawlerRunning = value; }
public IObservable<object> ScrapingFinished => _scrapingFinished;
public IObservable<string> Message => _message;
public KauflandAngebotScraperPW()
{
_dir = Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments) + "/Shoppr/Kaufland/";
if (!Directory.Exists(_dir))
Directory.CreateDirectory(_dir);
_procedures = new ShopprContextProcedures(_db);
}
protected virtual void Dispose(bool disposing)
{
if (!disposedValue)
{
if (disposing)
{
// TODO: Verwalteten Zustand (verwaltete Objekte) bereinigen
}
// TODO: Nicht verwaltete Ressourcen (nicht verwaltete Objekte) freigeben und Finalizer überschreiben
// TODO: Große Felder auf NULL setzen
disposedValue = true;
}
}
// // TODO: Finalizer nur überschreiben, wenn "Dispose(bool disposing)" Code für die Freigabe nicht verwalteter Ressourcen enthält
// ~KauflandAngebotScraperPW()
// {
// // Ändern Sie diesen Code nicht. Fügen Sie Bereinigungscode in der Methode "Dispose(bool disposing)" ein.
// Dispose(disposing: false);
// }
public void Dispose()
{
// Ändern Sie diesen Code nicht. Fügen Sie Bereinigungscode in der Methode "Dispose(bool disposing)" ein.
Dispose(disposing: true);
GC.SuppressFinalize(this);
}
public void start()
{
if (_crawlerRunning)
return;
_crawlerRunning = true;
_ctcCrawler = new CancellationTokenSource();
var task = Task.Run(async () => {
ILoggerFactory loggerFactory = LoggerFactory.Create(builder =>
{
builder.SetMinimumLevel(LogLevel.Debug);
builder.AddFilter((f, _) => f == "PlaywrightSharp.Playwright");
});
//Auswahlmethode zurücksetzen
_playwright = await Playwright.CreateAsync(loggerFactory: loggerFactory, debug: "pw:api");
}, _ctcCrawler.Token);
task.Wait();
_ = ParseFilialenAsync();
}
public void stop()
{
if (!_crawlerRunning)
return;
_ctcCrawler.Cancel();
_crawlerRunning = false;
}
private async Task ParseFilialenAsync()
{
var filialen = _db.Filiale.Where(x => x.Kette == KETTENID);
foreach(var filiale in filialen)
{
await _lock.WaitAsync(_ctcCrawler.Token);
_ = ParseAngeboteAsync(filiale);
}
}
private async Task ParseAngeboteAsync(Filiale filiale)
{
IBrowser puppet;
IBrowserContext context;
IPage page = null;
puppet = await _playwright.Webkit.LaunchAsync(headless: false);
context = await puppet.NewContextAsync();
_message.OnNext($"Überprüfe aktuelle Angebote für Kaufland-Markt in {filiale.Straße}, {filiale.Plz} {filiale.Ort}");
try
{
if (_ctcCrawler.IsCancellationRequested)
{
return;
}
page = await context.NewPageAsync();
await page.GoToAsync(URL);
await Task.Delay(10000);
// cookie Setzen wenn notwendig
await AcceptCookieAsync(page);
//Setze den Markt
await Task.Delay(1000);
await SelectMarketAsync(filiale, page);
await Task.Delay(5000);
}
catch(Exception e)
{
_message.OnNext(e.Message);
await page?.ScreenshotAsync($"c:\\temp\\Kaufland_{filiale.Id}_{DateTime.Now.Ticks}.png");
}
finally
{
await context.CloseAsync();
await puppet.CloseAsync();
}
}
private async Task AcceptCookieAsync(IPage page)
{
try
{
var cookieAcceptButton = await page.WaitForSelectorAsync("button[class='cookie-alert-extended-button']", timeout: 5000);
await cookieAcceptButton.ClickAsync();
}
catch
{
// Is ok
}
}
private async Task SelectMarketAsync(Filiale filiale, IPage page)
{
//var marketSelektor = await page.WaitForSelectorAsync("a[class='m-store-flyout__link']", timeout: 5000);
var marketSelektor = await page.QuerySelectorAsync("div[class='m-navigation-meta__item m-navigation-meta__item-store']");
if(marketSelektor != null)
{
await marketSelektor.ClickAsync(50);
}
await Task.Delay(2000);
//li
IElementHandle elementHandle = await page.QuerySelectorAsync("a[class='a-link a-link--icon-arrow a-link--storeflyout-change']");
var changeFiliale = await elementHandle.WaitForSelectorAsync("xpath=..", WaitForState.Visible);
if(changeFiliale != null)
{
_message.OnNext((await changeFiliale.IsEnabledAsync()).ToString());
_message.OnNext((await changeFiliale.IsHiddenAsync()).ToString());
_message.OnNext((await changeFiliale.IsVisibleAsync()).ToString());
await changeFiliale.FocusAsync();
await changeFiliale.ClickAsync(40, force: true);
}
}
}
This is the code on the website
This is what the Exception is giving me:
This is the output of the Playwright log:
Navigiere zu Kaufland-Seite
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:42.048Z pw:api navigating to "https://www.kaufland.de/", waiting until "load"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:42.213Z pw:api navigated to "https://www.kaufland.de/"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:43.180Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:43.193Z pw:api navigated to "about:blank"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:43.195Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:43.203Z pw:api navigated to "about:blank"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:43.206Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:44.716Z pw:api navigated to "https://consentcdn.cookiebot.com/sdk/bc-v3.min.html"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:44.720Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.188Z pw:api navigated to "https://www.kaufland.de/"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.412Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.415Z pw:api navigated to "about:blank"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.418Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.422Z pw:api navigated to "about:blank"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.425Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:45.952Z pw:api "networkidle" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:46.677Z pw:api navigated to "https://consentcdn.cookiebot.com/sdk/bc-v3.min.html"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:46.693Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:47.188Z pw:api "networkidle" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:41:51.504Z pw:api "load" event fired
Akzeptiere Cookies
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.552Z pw:api waiting for selector "button[class='cookie-alert-extended-button']" to be visible
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.590Z pw:api selector resolved to visible <button tabindex="1" type="button" class="cookie-alert-e…>Zustimmen </button>
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.613Z pw:api attempting click action
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.613Z pw:api waiting for element to be visible, enabled and stable
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.646Z pw:api element is not stable - waiting...
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.761Z pw:api element is visible, enabled and stable
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.761Z pw:api scrolling into view if needed
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.763Z pw:api done scrolling
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.766Z pw:api checking that element receives pointer events at (865.48,541)
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.770Z pw:api element does receive pointer events
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.770Z pw:api performing click action
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.816Z pw:api click action done
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.816Z pw:api waiting for scheduled navigations to finish
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:01.816Z pw:api navigations have finished
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:03.923Z pw:api navigated to "https://kauflandstiftung.demdex.net/dest5.html?d_nsid=0#https%3A%2F%2Fwww.kaufland.de"
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:03.934Z pw:api "load" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:03.934Z pw:api "domcontentloaded" event fired
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:04.448Z pw:api "networkidle" event fired
Klicke auf Markt
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:11.887Z pw:api attempting click action
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:11.888Z pw:api waiting for element to be visible, enabled and stable
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:11.929Z pw:api element is not stable - waiting...
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.025Z pw:api element is visible, enabled and stable
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.025Z pw:api scrolling into view if needed
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.026Z pw:api done scrolling
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.029Z pw:api checking that element receives pointer events at (324.55,43)
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.032Z pw:api element does receive pointer events
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.032Z pw:api performing click action
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.308Z pw:api click action done
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.308Z pw:api waiting for scheduled navigations to finish
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:12.308Z pw:api navigations have finished
Klicke auf 'Ändere Filiale'
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:25.212Z pw:api waiting for selector "xpath=.." to be visible
PlaywrightSharp.Playwright: Information: 2021-04-03T14:42:25.227Z pw:api selector resolved to hidden <li class="m-linklist__item">…</li>
Screenshot of the element which should be clicked
Upvotes: 3
Views: 16251
Reputation: 125
After some time of trying around I found a work around. The root problem seems to be that Playwright is not recognizing the change of the visiblity in the elements after
var marketSelektor = await page.QuerySelectorAsync("div[class='m-navigation-meta__item m-navigation-meta__item-store']");
if(marketSelektor != null)
{
await marketSelektor.ClickAsync(50);
}
Therefore the execution of the following lines fails with the log that the element is not visible
IElementHandle elementHandle = await page.QuerySelectorAsync("a[class='a-link a-link--icon-arrow a-link--storeflyout-change']");
var changeFiliale = await elementHandle.WaitForSelectorAsync("xpath=..", WaitForState.Visible);
if(changeFiliale != null)
{
await changeFiliale.FocusAsync();
await changeFiliale.ClickAsync(40, force: true);
}
So I wondered if it would be the same if I execute a javascript-snippet via the playwright method WaitForFunctionAsync
and inserted the followin block
page.WaitForFunctionAsync("document.querySelector(\"a[class='a-link a-link--icon-arrow a-link--storeflyout-change']\").click()");
await Task.Delay(45000);
It has the result I want to have.
I have to do this without a await
and place the Task.Delay
afterwards because otherwise it will throw a timeout even if the elements are visible long before the 30 seconds standard timeout is reached. I still wonder why.
I also observed that the performance of WebKit, Chromium and Firefox are differ vastly with WebKit being the slowest. But this is a different matter.
Upvotes: 3