Reputation: 1
I am trying to get the company data from this website called similar web but upon making a lot of requests it recognizes my script as a bot so is there any way to bypass this check? or suggest any website to scrap data easily, we can't use LinkedIn by the way.
const puppeteer = require("puppeteer");
const searchCompany = "zoominfo.com";
const Link = `https://www.similarweb.com/website/${searchCompany}/#overview`;
// console.log(companyPage);
let page;
(async function () {
try {
let browserOpen = await puppeteer.launch({
headless: false,
// dumpio: true,
// args: ["--start-maximized"],
defaultViewport: null,
});
let newTab = await browserOpen.newPage();
await newTab.goto(Link);
await newTab.screenshot({ path: "sc.png" });
await newTab.waitForSelector(".data-company-info__row");
let ans = await newTab.evaluate(() => {
let name = document.querySelectorAll(".data-company-info__row")[0]
.textContent;
let location = document.querySelectorAll(".data-company-info__row")[3]
.textContent;
let industry = document.querySelectorAll(".data-company-info__row")[5]
.textContent;
// console.log(ans);
return { name, location, industry };
});
console.log(ans);
await browserOpen.close();
} catch (err) {
console.log(err);
}
})();
Upvotes: 0
Views: 244
Reputation: 31
Just out of curiosity - what do you use similarweb data for?
You can try using https://github.com/bda-research/node-crawler that has delays and max connections params
Upvotes: 0