Reputation: 43
I want to scrape all the data contained in this page. unfortunately, I can only extract the first three rows.
import requests
from bs4 import BeautifulSoup
response = requests.get("https://www.coingecko.com/fr/pièces/bitcoin#markets")
soup = BeautifulSoup(response.text, "html.parser")
My_table = soup.find("table",{"class":"table table-scrollable"})
My_table
data = []
rows = My_table.find_all('tr')
for row in rows:
cols = row.find_all('td')
cols = [ele.text.strip() for ele in cols]
data.append([ele for ele in cols if ele]) # Get rid of empty values
data
Thank you for your help
Upvotes: 1
Views: 1330
Reputation: 195408
The data you see on the page is loaded via Javascript. If you open Firefox/Chrome tab, you can see where the data comes from.
You can use this script to print some data:
import requests
from bs4 import BeautifulSoup
url = 'https://www.coingecko.com/fr/pi%C3%A8ces/1/markets_tab'
soup = BeautifulSoup(requests.get(url).content, 'html.parser')
data = []
for tr in soup.select('tr[class]'):
if 'sponsored' in tr['class']:
continue
_, name, paire, cours, spread, prof, *_ = tr.select('td')
data.append( (name.get_text(strip=True),
paire.get_text(strip=True),
cours.div.get_text(strip=True),
spread.get_text(strip=True),
prof.get_text(strip=True),
) )
# print the data:
print('{:<4} {:<30} {:<20} {:<20} {:<10} {:<20}'.format('No.', 'Name', 'Paire', 'Cours', 'Spread', '+2 % de profondeur'))
for i, row in enumerate(data, 1):
print('{:<4} {:<30} {:<20} {:<20} {:<10} {:<20}'.format(i, *row))
Prints:
No. Name Paire Cours Spread +2 % de profondeur
1 Binance BTC/USDT 8 094,45 $ 0.02% 1 633 105 $
2 Bitfinex BTC/USD 8 118,38 $ 0.12% 8 187 887 $
3 Bitfinex BTC/JPY 8 123,59 $ 0.11% 8 086 839 $
4 Bitfinex BTC/EUR 8 116,79 $ 0.14% 7 791 188 $
5 Coinbase Pro BTC/USD 8 092,08 $ 0.01% 597 987 $
6 Bitfinex BTC/GBP 8 074,11 $ 0.16% 7 565 745 $
7 FTX (Spot) BTC/USD 8 093,00 $ 0.01% 5 621 768 $
8 BW.com BTC/USDT 8 117,56 $ 0.02% 2 629 169 $
...and so on.
Upvotes: 5
Reputation: 71451
You can use selenium
:
from selenium import webdriver
from bs4 import BeautifulSoup as soup
d = webdriver.Chrome('/Users/jamespetullo/Downloads/chromedriver')
d.get('https://www.coingecko.com/fr/pi%C3%A8ces/bitcoin#markets')
def get_page(page):
_h, _, _, *_data = page.find_all('table', {'class':'table table-scrollable'})[-1].find_all('tr')
h, data = [i.get_text(strip=True) for i in _h.find_all('th')], [[i.get_text(strip=True) for i in b.find_all('td')] for b in _data]
return h, data
print(get_page(soup(d.page_source, 'html.parser')))
Output:
#h:
['#', "Plate-forme d'échange", 'Paire', 'Cours', 'Spread', '+2\xa0% de profondeur', '-2\xa0% de profondeur', 'Volume sur 24\xa0h', 'Volume en %', 'Dernière négociation', 'Trust Score']
#data (first ten results):
[['1', 'Binance', 'BTC/USDT', '8\u202f074,92\xa0$US8047,85 USDT', '0.01%', '2 556 042 $', '1 369 823 $', '761\u202f371\u202f363\xa0$US94288,444 BTC', '2,10%', 'Récemment', ''], ['2', 'Bitfinex', 'BTC/USD', '8\u202f046,85\xa0$US8019 USD', '0.12%', '5 743 309 $', '4 942 297 $', '117\u202f165\u202f262\xa0$US14560,393 BTC', '0,32%', 'Récemment', ''], ['3', 'Coinbase Pro', 'BTC/USD', '8\u202f056,73\xa0$US8056,73 USD', '0.0%', '688 574 $', '517 655 $', '165\u202f468\u202f767\xa0$US20537,958 BTC', '0,46%', 'Récemment', ''], ['4', 'FTX (Spot)', 'BTC/USD', '8\u202f080,32\xa0$US8053 USD', '0.02%', '5 458 954 $', '5 256 300 $', '5\u202f934\u202f100\xa0$US734,389 BTC', '0,02%', 'Récemment', ''], ['5', 'Bitfinex', 'BTC/EUR', '8\u202f040,10\xa0$US7183,8 EUR', '0.14%', '5 508 424 $', '4 944 104 $', '6\u202f581\u202f570\xa0$US818,593 BTC', '0,02%', 'Récemment', ''], ['6', 'Bitfinex', 'BTC/GBP', '8\u202f038,31\xa0$US6104,7 GBP', '0.16%', '5 757 961 $', '4 676 216 $', '1\u202f021\u202f611\xa0$US127,093 BTC', '0,00%', 'Récemment', ''], ['7', 'Kraken', 'XBT/USD', '8\u202f064,58\xa0$US8055,3 USD', '0.0%', '2 088 184 $', '1 729 815 $', '88\u202f739\u202f662\xa0$US11003,629 XBT', '0,24%', 'Récemment', ''], ['8', 'Bitfinex', 'BTC/JPY', '8\u202f043,28\xa0$US869399,17981209 JPY', '0.11%', '5 742 700 $', '4 202 762 $', '2\u202f237\u202f374\xa0$US278,167 BTC', '0,01%', 'Récemment', ''], ['9', 'Kraken', 'XBT/EUR', '8\u202f065,50\xa0$US7223,2 EUR', '0.0%', '1 974 979 $', '1 354 403 $', '66\u202f749\u202f670\xa0$US8275,946 XBT', '0,18%', 'Récemment', ''], ['10', 'BW.com', 'BTC/USDT', '8\u202f058,44\xa0$US8031,43 USDT', '0.02%', '167 744 $', '372 432 $', '101\u202f258\u202f986\xa0$US12565,579 BTC', '0,28%', 'Récemment', '']]
selenium
will also enable you to click the "show more" button to access additional data:
all_data = [get_page(soup(d.page_source, 'html.parser'))]
while True:
r = [i for i in d.find_elements_by_tag_name('a') if i.get_attribute('data-target') == 'gecko-table.showMore']
if len(r) < 2:
break
r[-1].send_keys('\n')
all_data.append(get_page(soup(d.page_source, 'html.parser')))
Upvotes: -1