Reputation: 61
I've been trying to retrieve stock price from Yahoo! Finance, like for Apple Inc.. My code is like this:(using Python 2)
import requests
from bs4 import BeautifulSoup as bs
html='http://finance.yahoo.com/quote/AAPL/profile?p=AAPL'
r = requests.get(html)
soup = bs(r.text)
The problem is when I see raw HTML behind this webpage, the class is dynamic, see figure below. This makes it hard for BeautifulSoup to get tags. How to understand the class and how to get data?
PS: 1) I know pandas_datareader.data, but that's for historical data. I want the real-time stock data;
2) I don't want to use selenium to open a new browser window.
Upvotes: 6
Views: 3815
Reputation: 13459
Not sure what you mean by 'dynamic' in this case, but have you considered using CSS selectors?
With Beautifulsoup you could get it e.g like this:
soup.select('div#quote-header-info section span')[0]
And there are some variations you could use on the pattern, such as using the '>' filter.
You could get the same with just lxml
, no need for BeautifulSoup:
import lxml.html as html
page = html.parse(url).getroot()
content = page.cssselect('div#quote-header-info section > span:first-child')[0].text
Which immediately illustrates a more specific selector.
If you're interested in more efficient DOM-traversal, research xpaths.
Upvotes: 4
Reputation: 180391
The data is obviously populated using reactjs so you won't be able to parse it reliably using class names etc.. You can get all the data in json format from the page source from the root.App.main
script :
import requests
from bs4 import BeautifulSoup
import re
from json import loads
soup = BeautifulSoup(requests.get("http://finance.yahoo.com/quote/AAPL/profile?p=AAPL").content)
script = soup.find("script",text=re.compile("root.App.main")).text
data = loads(re.search("root.App.main\s+=\s+(\{.*\})", script).group(1))
print(data)
Which gives you a whole load of json, you can go through the data and pick what you need like below :
stores = data["context"]["dispatcher"]["stores"]
from pprint import pprint as pp
pp(stores[u'QuoteSummaryStore'])
Which gives you:
{u'price': {u'averageDailyVolume10Day': {u'fmt': u'63.06M',
u'longFmt': u'63,056,525',
u'raw': 63056525},
u'averageDailyVolume3Month': {u'fmt': u'36.53M',
u'longFmt': u'36,527,196',
u'raw': 36527196},
u'currency': u'USD',
u'currencySymbol': u'$',
u'exchange': u'NMS',
u'exchangeName': u'NasdaqGS',
u'longName': u'Apple Inc.',
u'marketState': u'PRE',
u'maxAge': 1,
u'openInterest': {},
u'postMarketChange': {u'fmt': u'0.11', u'raw': 0.11000061},
u'postMarketChangePercent': {u'fmt': u'0.10%',
u'raw': 0.0009687416},
u'postMarketPrice': {u'fmt': u'113.66', u'raw': 113.66},
u'postMarketSource': u'DELAYED',
u'postMarketTime': 1474502277,
u'preMarketChange': {u'fmt': u'0.42', u'raw': 0.41999817},
u'preMarketChangePercent': {u'fmt': u'0.37%',
u'raw': 0.0036987949},
u'preMarketPrice': {u'fmt': u'113.97', u'raw': 113.97},
u'preMarketSource': u'FREE_REALTIME',
u'preMarketTime': 1474536411,
u'quoteType': u'EQUITY',
u'regularMarketChange': {u'fmt': u'-0.02', u'raw': -0.019996643},
u'regularMarketChangePercent': {u'fmt': u'-0.02%',
u'raw': -0.00017607327},
u'regularMarketDayHigh': {u'fmt': u'113.99', u'raw': 113.989},
u'regularMarketDayLow': {u'fmt': u'112.44', u'raw': 112.441},
u'regularMarketOpen': {u'fmt': u'113.82', u'raw': 113.82},
u'regularMarketPreviousClose': {u'fmt': u'113.57',
u'raw': 113.57},
u'regularMarketPrice': {u'fmt': u'113.55', u'raw': 113.55},
u'regularMarketSource': u'FREE_REALTIME',
u'regularMarketTime': 1474488000,
u'regularMarketVolume': {u'fmt': u'31.57M',
u'longFmt': u'31,574,028.00',
u'raw': 31574028},
u'shortName': u'Apple Inc.',
u'strikePrice': {},
u'symbol': u'AAPL',
u'underlyingSymbol': None},
u'price,summaryDetail': {},
u'quoteType': {u'exchange': u'NMS',
u'headSymbol': None,
u'longName': u'Apple Inc.',
u'market': u'us_market',
u'messageBoardId': u'finmb_24937',
u'quoteType': u'EQUITY',
u'shortName': u'Apple Inc.',
u'symbol': u'AAPL',
u'underlyingExchangeSymbol': None,
u'underlyingSymbol': None,
u'uuid': u'8b10e4ae-9eeb-3684-921a-9ab27e4d87aa'},
u'summaryDetail': {u'ask': {u'fmt': u'114.00', u'raw': 114},
u'askSize': {u'fmt': u'100',
u'longFmt': u'100',
u'raw': 100},
u'averageDailyVolume10Day': {u'fmt': u'63.06M',
u'longFmt': u'63,056,525',
u'raw': 63056525},
u'averageVolume': {u'fmt': u'36.53M',
u'longFmt': u'36,527,196',
u'raw': 36527196},
u'averageVolume10days': {u'fmt': u'63.06M',
u'longFmt': u'63,056,525',
u'raw': 63056525},
u'beta': {u'fmt': u'1.52', u'raw': 1.51744},
u'bid': {u'fmt': u'113.68', u'raw': 113.68},
u'bidSize': {u'fmt': u'400',
u'longFmt': u'400',
u'raw': 400},
u'dayHigh': {u'fmt': u'113.99', u'raw': 113.989},
u'dayLow': {u'fmt': u'112.44', u'raw': 112.441},
u'dividendRate': {u'fmt': u'2.28', u'raw': 2.28},
u'dividendYield': {u'fmt': u'2.01%', u'raw': 0.0201},
u'exDividendDate': {u'fmt': u'2016-08-04',
u'raw': 1470268800},
u'expireDate': {},
u'fiftyDayAverage': {u'fmt': u'108.61',
u'raw': 108.608284},
u'fiftyTwoWeekHigh': {u'fmt': u'123.82', u'raw': 123.82},
u'fiftyTwoWeekLow': {u'fmt': u'89.47', u'raw': 89.47},
u'fiveYearAvgDividendYield': {},
u'forwardPE': {u'fmt': u'12.70', u'raw': 12.701344},
u'marketCap': {u'fmt': u'611.86B',
u'longFmt': u'611,857,399,808',
u'raw': 611857399808},
u'maxAge': 1,
u'navPrice': {},
u'open': {u'fmt': u'113.82', u'raw': 113.82},
u'openInterest': {},
u'payoutRatio': {u'fmt': u'24.80%', u'raw': 0.248},
u'previousClose': {u'fmt': u'113.57', u'raw': 113.57},
u'priceToSalesTrailing12Months': {u'fmt': u'2.78',
u'raw': 2.777534},
u'regularMarketDayHigh': {u'fmt': u'113.99',
u'raw': 113.989},
u'regularMarketDayLow': {u'fmt': u'112.44',
u'raw': 112.441},
u'regularMarketOpen': {u'fmt': u'113.82', u'raw': 113.82},
u'regularMarketPreviousClose': {u'fmt': u'113.57',
u'raw': 113.57},
u'regularMarketVolume': {u'fmt': u'31.57M',
u'longFmt': u'31,574,028',
u'raw': 31574028},
u'strikePrice': {},
u'totalAssets': {},
u'trailingAnnualDividendRate': {u'fmt': u'2.13',
u'raw': 2.13},
u'trailingAnnualDividendYield': {u'fmt': u'1.88%',
u'raw': 0.018754954},
u'trailingPE': {u'fmt': u'13.24', u'raw': 13.240438},
u'twoHundredDayAverage': {u'fmt': u'102.39',
u'raw': 102.39367},
u'volume': {u'fmt': u'31.57M',
u'longFmt': u'31,574,028',
u'raw': 31574028},
u'yield': {},
u'ytdReturn': {}},
u'symbol': u'AAPL'}
Upvotes: 8