Reputation: 460
I am trying to read from this source - Yahoo Finance from line 17 onwards. I want to get the date, highprice, lowprice, etc, which are the 6 columns.
My code:
import pandas as pd
import datetime
import urllib
web = 'http://chartapi.finance.yahoo.com/instrument/1.0/EURUSD=X/chartdata;type=quote;range=1y/csv'
urlToVisit = 'http://chartapi.finance.yahoo.com/instrument/1.0/EURUSD=X/chartdata;type=quote;range=1y/csv'
with urllib.request.urlopen(urlToVisit) as response:
sourcePage = response.read().decode()
df = pd.read_csv(sourcePage, skiprows=17, header=None)
df.head()
OSError Traceback (most recent call last)
<ipython-input-10-bf04141dec86> in <module>()
7 with urllib.request.urlopen(urlToVisit) as response:
8 sourcePage = response.read().decode()
----> 9 df = pd.read_csv(sourcePage, skiprows=17, header=None)
10 df.head()
/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
496 skip_blank_lines=skip_blank_lines)
497
--> 498 return _read(filepath_or_buffer, kwds)
499
500 parser_f.__name__ = name
/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
273
274 # Create the parser.
--> 275 parser = TextFileReader(filepath_or_buffer, **kwds)
276
277 if (nrows is not None) and (chunksize is not None):
/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
588 self.options['has_index_names'] = kwds['has_index_names']
589
--> 590 self._make_engine(self.engine)
591
592 def _get_options_with_defaults(self, engine):
/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
729 def _make_engine(self, engine='c'):
730 if engine == 'c':
--> 731 self._engine = CParserWrapper(self.f, **self.options)
732 else:
733 if engine == 'python':
/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
1101 kwds['allow_leading_cols'] = self.index_col is not False
1102
-> 1103 self._reader = _parser.TextReader(src, **kwds)
1104
1105 # XXX
pandas/parser.pyx in pandas.parser.TextReader.__cinit__ (pandas/parser.c:3246)()
pandas/parser.pyx in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:6111)()
Would greatly appreciate if someone could help me rectify! Thank you.
Upvotes: 0
Views: 302
Reputation: 2729
According to the document, pandas.read_csv
need a file-like object as the first parameter.
So you can either save the file locally and re-read it using pandas.read_csv
method, or you can use io.StringIO
in Python 3.x or StringIO.StringIO
in Python 2.x to convert the data.
Here is sample code:
import io
import urllib
import urllib.request
import pandas as pd
urlToVisit = 'http://chartapi.finance.yahoo.com/instrument/1.0/EURUSD=X/chartdata;type=quote;range=1y/csv'
with urllib.request.urlopen(urlToVisit) as response:
sourcePage = response.read().decode()
df = pd.read_csv(io.StringIO(sourcePage), skiprows=18, header=None, sep=",")
print(df.head())
Hope it helps.
Upvotes: 1