Laurent
Laurent

Reputation: 43

scraping coins market with BeautifulSoup

I want to scrape all the data contained in this page. unfortunately, I can only extract the first three rows.

import requests
from bs4 import BeautifulSoup

response = requests.get("https://www.coingecko.com/fr/pièces/bitcoin#markets")

soup = BeautifulSoup(response.text, "html.parser")
My_table = soup.find("table",{"class":"table table-scrollable"})
My_table
data = []
rows = My_table.find_all('tr')
for row in rows:
    cols = row.find_all('td')
    cols = [ele.text.strip() for ele in cols]
    data.append([ele for ele in cols if ele]) # Get rid of empty values
data 

Thank you for your help

Upvotes: 1

Views: 1330

Answers (2)

Andrej Kesely
Andrej Kesely

Reputation: 195408

The data you see on the page is loaded via Javascript. If you open Firefox/Chrome tab, you can see where the data comes from.

You can use this script to print some data:

import requests
from bs4 import BeautifulSoup

url = 'https://www.coingecko.com/fr/pi%C3%A8ces/1/markets_tab'

soup = BeautifulSoup(requests.get(url).content, 'html.parser')

data = []
for tr in soup.select('tr[class]'):
    if 'sponsored' in tr['class']:
        continue
    _, name, paire, cours, spread, prof, *_ = tr.select('td')

    data.append( (name.get_text(strip=True),
                  paire.get_text(strip=True),
                  cours.div.get_text(strip=True),
                  spread.get_text(strip=True),
                  prof.get_text(strip=True),
                  ) )

# print the data:
print('{:<4} {:<30} {:<20} {:<20} {:<10} {:<20}'.format('No.', 'Name', 'Paire', 'Cours', 'Spread', '+2 % de profondeur'))
for i, row in enumerate(data, 1):
    print('{:<4} {:<30} {:<20} {:<20} {:<10} {:<20}'.format(i, *row))

Prints:

No.  Name                           Paire                Cours                Spread     +2 % de profondeur  
1    Binance                        BTC/USDT             8 094,45 $           0.02%      1 633 105 $         
2    Bitfinex                       BTC/USD              8 118,38 $           0.12%      8 187 887 $         
3    Bitfinex                       BTC/JPY              8 123,59 $           0.11%      8 086 839 $         
4    Bitfinex                       BTC/EUR              8 116,79 $           0.14%      7 791 188 $         
5    Coinbase Pro                   BTC/USD              8 092,08 $           0.01%      597 987 $           
6    Bitfinex                       BTC/GBP              8 074,11 $           0.16%      7 565 745 $         
7    FTX (Spot)                     BTC/USD              8 093,00 $           0.01%      5 621 768 $         
8    BW.com                         BTC/USDT             8 117,56 $           0.02%      2 629 169 $         

...and so on.

Upvotes: 5

Ajax1234
Ajax1234

Reputation: 71451

You can use selenium:

from selenium import webdriver
from bs4 import BeautifulSoup as soup
d = webdriver.Chrome('/Users/jamespetullo/Downloads/chromedriver')
d.get('https://www.coingecko.com/fr/pi%C3%A8ces/bitcoin#markets')
def get_page(page):
  _h, _, _, *_data = page.find_all('table', {'class':'table table-scrollable'})[-1].find_all('tr')
  h, data = [i.get_text(strip=True) for i in _h.find_all('th')], [[i.get_text(strip=True) for i in b.find_all('td')] for b in _data] 
  return h, data

print(get_page(soup(d.page_source, 'html.parser')))

Output:

#h:
['#', "Plate-forme d'échange", 'Paire', 'Cours', 'Spread', '+2\xa0% de profondeur', '-2\xa0% de profondeur', 'Volume sur 24\xa0h', 'Volume en %', 'Dernière négociation', 'Trust Score']
#data (first ten results):
[['1', 'Binance', 'BTC/USDT', '8\u202f074,92\xa0$US8047,85 USDT', '0.01%', '2 556 042 $', '1 369 823 $', '761\u202f371\u202f363\xa0$US94288,444 BTC', '2,10%', 'Récemment', ''], ['2', 'Bitfinex', 'BTC/USD', '8\u202f046,85\xa0$US8019 USD', '0.12%', '5 743 309 $', '4 942 297 $', '117\u202f165\u202f262\xa0$US14560,393 BTC', '0,32%', 'Récemment', ''], ['3', 'Coinbase Pro', 'BTC/USD', '8\u202f056,73\xa0$US8056,73 USD', '0.0%', '688 574 $', '517 655 $', '165\u202f468\u202f767\xa0$US20537,958 BTC', '0,46%', 'Récemment', ''], ['4', 'FTX (Spot)', 'BTC/USD', '8\u202f080,32\xa0$US8053 USD', '0.02%', '5 458 954 $', '5 256 300 $', '5\u202f934\u202f100\xa0$US734,389 BTC', '0,02%', 'Récemment', ''], ['5', 'Bitfinex', 'BTC/EUR', '8\u202f040,10\xa0$US7183,8 EUR', '0.14%', '5 508 424 $', '4 944 104 $', '6\u202f581\u202f570\xa0$US818,593 BTC', '0,02%', 'Récemment', ''], ['6', 'Bitfinex', 'BTC/GBP', '8\u202f038,31\xa0$US6104,7 GBP', '0.16%', '5 757 961 $', '4 676 216 $', '1\u202f021\u202f611\xa0$US127,093 BTC', '0,00%', 'Récemment', ''], ['7', 'Kraken', 'XBT/USD', '8\u202f064,58\xa0$US8055,3 USD', '0.0%', '2 088 184 $', '1 729 815 $', '88\u202f739\u202f662\xa0$US11003,629 XBT', '0,24%', 'Récemment', ''], ['8', 'Bitfinex', 'BTC/JPY', '8\u202f043,28\xa0$US869399,17981209 JPY', '0.11%', '5 742 700 $', '4 202 762 $', '2\u202f237\u202f374\xa0$US278,167 BTC', '0,01%', 'Récemment', ''], ['9', 'Kraken', 'XBT/EUR', '8\u202f065,50\xa0$US7223,2 EUR', '0.0%', '1 974 979 $', '1 354 403 $', '66\u202f749\u202f670\xa0$US8275,946 XBT', '0,18%', 'Récemment', ''], ['10', 'BW.com', 'BTC/USDT', '8\u202f058,44\xa0$US8031,43 USDT', '0.02%', '167 744 $', '372 432 $', '101\u202f258\u202f986\xa0$US12565,579 BTC', '0,28%', 'Récemment', '']]

selenium will also enable you to click the "show more" button to access additional data:

all_data = [get_page(soup(d.page_source, 'html.parser'))]
while True:
  r = [i for i in d.find_elements_by_tag_name('a') if i.get_attribute('data-target') == 'gecko-table.showMore']
  if len(r) < 2:
    break
  r[-1].send_keys('\n')
  all_data.append(get_page(soup(d.page_source, 'html.parser')))

Upvotes: -1

Related Questions