Reputation: 1513
I am attempting to download/subset a list of gridded parameters from the UCAR THREDDS servers.
Here is how I am setting things up:
import pandas as pd
import xarray as xr
import concurrent.futures
from functools import partial
from tqdm import tqdm
from siphon.catalog import TDSCatalog
bounds = [-10,-10,10,10]
cat_url = 'https://thredds.rda.ucar.edu/thredds/catalog/files/g/ds633.0/e5.oper.an.pl/200001/catalog.xml'
dataset_names = ['e5.oper.an.pl.128_131_u.ll025uv.2000010100_2000010123.nc',
'e5.oper.an.pl.128_132_v.ll025uv.2000010100_2000010123.nc',
'e5.oper.an.pl.128_130_t.ll025sc.2000010100_2000010123.nc',
'e5.oper.an.pl.128_133_q.ll025sc.2000010100_2000010123.nc',
'e5.oper.an.pl.128_129_z.ll025sc.2000010100_2000010123.nc']
var_strs = ['U', 'V', 'T', 'Q', 'Z']
start_date = pd.to_datetime('2000-01-01 00:00:00')
end_date = pd.to_datetime('2000-01-02 00:00:00')
def pull_data_from_url(dataset_name, var, cat_url, bounds, start_date, end_date):
catalog = TDSCatalog(cat_url)
ds = catalog.datasets[dataset_name]
ncss = ds.subset()
query = ncss.query()
query.lonlat_box(east=bounds[2], west=bounds[0], south=bounds[1], north=bounds[3])
query.time_range(start_date, end_date)
query.variables(var)
nc = ncss.get_data(query)
data = xr.open_dataset(xr.backends.NetCDF4DataStore(nc))
return data
If I fun the function in a loop (in series), it works fine:
for dat, var in zip(dataset_names, tqdm(var_strs)):
pull_data_from_url(dat, var, cat_url, bounds, start_date, end_date)
However, if I try to do it in parallel:
partial_pull_data = partial(pull_data_from_url, bounds=bounds, start_date=start_date, end_date=end_date, cat_url=cat_url)
with concurrent.futures.ThreadPoolExecutor() as executor:
list(tqdm(executor.map(lambda p: partial_pull_data(*p), zip(dataset_names, var_strs)), total=len(dataset_names)))
I get the following error message:
Traceback (most recent call last):
File ~\AppData\Local\miniconda3\lib\site-packages\urllib3\connectionpool.py:715 in urlopen
httplib_response = self._make_request(
File ~\AppData\Local\miniconda3\lib\site-packages\urllib3\connectionpool.py:467 in _make_request
six.raise_from(e, None)
File <string>:3 in raise_from
File ~\AppData\Local\miniconda3\lib\site-packages\urllib3\connectionpool.py:462 in _make_request
httplib_response = conn.getresponse()
File ~\AppData\Local\miniconda3\lib\http\client.py:1377 in getresponse
response.begin()
File ~\AppData\Local\miniconda3\lib\http\client.py:320 in begin
version, status, reason = self._read_status()
File ~\AppData\Local\miniconda3\lib\http\client.py:289 in
_read_status
raise RemoteDisconnected("Remote end closed connection without"
RemoteDisconnected: Remote end closed connection without response
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File ~\AppData\Local\miniconda3\lib\site-packages\requests\adapters.py:486 in send
resp = conn.urlopen(
File ~\AppData\Local\miniconda3\lib\site-packages\urllib3\connectionpool.py:799 in urlopen
retries = retries.increment(
File ~\AppData\Local\miniconda3\lib\site-packages\urllib3\util\retry.py:550 in increment
raise six.reraise(type(error), error, _stacktrace)
File ~\AppData\Local\miniconda3\lib\site-packages\urllib3\packages\six.py:769 in reraise
raise value.with_traceback(tb)
File ~\AppData\Local\miniconda3\lib\site-packages\urllib3\connectionpool.py:715 in urlopen
httplib_response = self._make_request(
File ~\AppData\Local\miniconda3\lib\site-packages\urllib3\connectionpool.py:467 in _make_request
six.raise_from(e, None)
File <string>:3 in raise_from
File ~\AppData\Local\miniconda3\lib\site-packages\urllib3\connectionpool.py:462 in _make_request
httplib_response = conn.getresponse()
File ~\AppData\Local\miniconda3\lib\http\client.py:1377 in getresponse
response.begin()
File ~\AppData\Local\miniconda3\lib\http\client.py:320 in begin
version, status, reason = self._read_status()
File ~\AppData\Local\miniconda3\lib\http\client.py:289 in
_read_status
raise RemoteDisconnected("Remote end closed connection without"
ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File ~\AppData\Local\miniconda3\lib\site-packages\spyder_kernels\py3compat.py:356 in compat_exec
exec(code, globals, locals)
File c:\users\kchudler\onedrive - research triangle institute\documents\hydromet_tools\era5_tools\untitled0.py:55
list(tqdm(executor.map(lambda p: partial_pull_data(*p), zip(dataset_names, var_strs)), total=len(dataset_names)))
File ~\AppData\Local\miniconda3\lib\site-packages\tqdm\std.py:1178 in __iter__
for obj in iterable:
File ~\AppData\Local\miniconda3\lib\concurrent\futures\_base.py:609 in result_iterator
yield fs.pop().result()
File ~\AppData\Local\miniconda3\lib\concurrent\futures\_base.py:446 in result
return self.__get_result()
File ~\AppData\Local\miniconda3\lib\concurrent\futures\_base.py:391 in __get_result
raise self._exception
File ~\AppData\Local\miniconda3\lib\concurrent\futures\thread.py:58 in run
result = self.fn(*self.args, **self.kwargs)
File c:\users\kchudler\onedrive - research triangle institute\documents\hydromet_tools\era5_tools\untitled0.py:55 in <lambda>
list(tqdm(executor.map(lambda p: partial_pull_data(*p), zip(dataset_names, var_strs)), total=len(dataset_names)))
File c:\users\kchudler\onedrive - research triangle institute\documents\hydromet_tools\era5_tools\untitled0.py:31 in pull_data_from_url
nc = ncss.get_data(query)
File ~\AppData\Local\miniconda3\lib\site-packages\siphon\ncss.py:114 in get_data
resp = self.get_query(query)
File ~\AppData\Local\miniconda3\lib\site-packages\siphon\http_util.py:410 in get_query
return self.get(url, query)
File ~\AppData\Local\miniconda3\lib\site-packages\siphon\http_util.py:486 in get
resp = self._session.get(path, params=params)
File ~\AppData\Local\miniconda3\lib\site-packages\requests\sessions.py:602 in get
return self.request("GET", url, **kwargs)
File ~\AppData\Local\miniconda3\lib\site-packages\requests\sessions.py:589 in request
resp = self.send(prep, **send_kwargs)
File ~\AppData\Local\miniconda3\lib\site-packages\requests\sessions.py:703 in send
r = adapter.send(request, **kwargs)
File ~\AppData\Local\miniconda3\lib\site-packages\requests\adapters.py:501 in send
raise ConnectionError(err, request=request)
ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Can someone provide help on this matter? The files take a while to retrieve, so I would like a faster way to get them then iterating over a loop.
Upvotes: 0
Views: 95
Reputation: 5853
My first guess is that the host of that data is disconnecting you due to too many simultaneous requests. See if it works if you use ThreadPoolExecutor(max_workers=1)
. If that works, then you can try to gradually increase max_workers
, though I would go absolutely no higher than 4. Remember, that server is a shared resource and it's important to be a nice user of free and open resources and not overwhelm the server.
Upvotes: 1