Reputation: 47
I am trying to prevent unnecessary downloading of large datasets by reading the publicly available files directly from their online location. Surprisingly I cannot find an answer to my question on StackOverflow already.
I use JupyterLab, and have tried the following:
import xarray as xr
url="https://thredds.met.no/thredds/catalog/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/catalog.html?dataset=metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc"
data = xr.open_dataset(url)
This produces the following error message:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/file_manager.py:211, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
210 try:
--> 211 file = self._cache[self._key]
212 except KeyError:
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/lru_cache.py:56, in LRUCache.__getitem__(self, key)
55 with self._lock:
---> 56 value = self._cache[key]
57 self._cache.move_to_end(key)
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('https://thredds.met.no/thredds/catalog/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/catalog.html?dataset=metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False)), 'd2d8feab-7dab-434f-ae9c-a79c655b259b']
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
Cell In[4], line 3
1 #read in dataset from website
2 url="https://thredds.met.no/thredds/catalog/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/catalog.html?dataset=metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc"
----> 3 data = xr.open_dataset(url)
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/api.py:611, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
599 decoders = _resolve_decoders_kwargs(
600 decode_cf,
601 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...)
607 decode_coords=decode_coords,
608 )
610 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 611 backend_ds = backend.open_dataset(
612 filename_or_obj,
613 drop_variables=drop_variables,
614 **decoders,
615 **kwargs,
616 )
617 ds = _dataset_from_backend_dataset(
618 backend_ds,
619 filename_or_obj,
(...)
629 **kwargs,
630 )
631 return ds
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:649, in NetCDF4BackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, format, clobber, diskless, persist, lock, autoclose)
628 def open_dataset( # type: ignore[override] # allow LSP violation, not supporting **kwargs
629 self,
630 filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
(...)
646 autoclose=False,
647 ) -> Dataset:
648 filename_or_obj = _normalize_path(filename_or_obj)
--> 649 store = NetCDF4DataStore.open(
650 filename_or_obj,
651 mode=mode,
652 format=format,
653 group=group,
654 clobber=clobber,
655 diskless=diskless,
656 persist=persist,
657 lock=lock,
658 autoclose=autoclose,
659 )
661 store_entrypoint = StoreBackendEntrypoint()
662 with close_on_error(store):
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:410, in NetCDF4DataStore.open(cls, filename, mode, format, group, clobber, diskless, persist, lock, lock_maker, autoclose)
404 kwargs = dict(
405 clobber=clobber, diskless=diskless, persist=persist, format=format
406 )
407 manager = CachingFileManager(
408 netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
409 )
--> 410 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:357, in NetCDF4DataStore.__init__(self, manager, group, mode, lock, autoclose)
355 self._group = group
356 self._mode = mode
--> 357 self.format = self.ds.data_model
358 self._filename = self.ds.filepath()
359 self.is_remote = is_remote_uri(self._filename)
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:419, in NetCDF4DataStore.ds(self)
417 @property
418 def ds(self):
--> 419 return self._acquire()
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:413, in NetCDF4DataStore._acquire(self, needs_lock)
412 def _acquire(self, needs_lock=True):
--> 413 with self._manager.acquire_context(needs_lock) as root:
414 ds = _nc4_require_group(root, self._group, self._mode)
415 return ds
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/contextlib.py:137, in _GeneratorContextManager.__enter__(self)
135 del self.args, self.kwds, self.func
136 try:
--> 137 return next(self.gen)
138 except StopIteration:
139 raise RuntimeError("generator didn't yield") from None
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/file_manager.py:199, in CachingFileManager.acquire_context(self, needs_lock)
196 @contextlib.contextmanager
197 def acquire_context(self, needs_lock=True):
198 """Context manager for acquiring a file."""
--> 199 file, cached = self._acquire_with_cache_info(needs_lock)
200 try:
201 yield file
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/file_manager.py:217, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
215 kwargs = kwargs.copy()
216 kwargs["mode"] = self._mode
--> 217 file = self._opener(*self._args, **kwargs)
218 if self._mode == "w":
219 # ensure file doesn't get overridden when opened again
220 self._mode = "a"
File src/netCDF4/_netCDF4.pyx:2470, in netCDF4._netCDF4.Dataset.__init__()
File src/netCDF4/_netCDF4.pyx:2107, in netCDF4._netCDF4._ensure_nc_success()
OSError: [Errno -75] NetCDF: Malformed or unexpected Constraint: 'https://thredds.met.no/thredds/catalog/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/catalog.html?dataset=metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc'
Is it in any way possible to read these files into an xarray dataset directly from their online location?
Thank you!
Upvotes: 1
Views: 45
Reputation: 32326
Your url
is pointing to the catalog of the THREDDS server. This is a valid URL for a website, with a dataset
constraint, but it is not the path to a netCDF file. If you use the url
in a web browser and click the link, you'll be taken to a page that has the correct URLs to download the data for all the access protocols that the server supports. You'll want the OpenDAP link for use with xarray
.
Usually (by default?), OpenDAP links are available through the dodsC
sub-path after the thredds
path. So you should keep the root of the catalog link up to (but not including) the catalog
path, branch to dodsC
, then everything after the dataset
constraint. So that is:
url = "https://thredds.met.no/thredds/dodsC/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc"
In R
, I get this:
library(ncdfCF)
url <- "https://thredds.met.no/thredds/dodsC/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc"
(ds <- open_ncdf(url))
#> <Dataset> tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231
#> Resource : https://thredds.met.no/thredds/dodsC/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc
#> Format : classic
#> Type : generic netCDF data
#> Conventions: CF-1.4
#> Keep open : FALSE
#>
#> Variables:
#> name long_name units data_type axes
#> tas Near-Surface Air Temperature K NC_FLOAT x, y, time, height
#>
#> Axes:
#> id axis name long_name length unlim values unit
#> 0 T time Time 365 U [2014-01-01 11:30:00.288 ... 2014-12-31 11:30:0... days since 1984-09-01 00:00:00.0
#> 3 X x X Coordinate Of Projection 629 [0 ... 6908000] m
#> 4 Y y Y Coordinate Of Projection 709 [0 ... 7788000] m
#> 2 maxStrlen64 64 [1 ... 64]
#> Z height Height 1 [2] m
#>
#> Attributes:
#> id name type length value
#> 0 CDI NC_CHAR 64 Climate Data Interface version 2.0.5 (https://m...
#> 1 Conventions NC_CHAR 6 CF-1.4
#> 2 institute_id NC_CHAR 8 HCLIMcom
#> 3 model_id NC_CHAR 14 HCLIM43_Arctic
#> 4 experiment_id NC_CHAR 41 ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist
#> 5 domain NC_CHAR 5 ARC11
#> 6 frequency NC_CHAR 3 day
#> 7 driving_model_id NC_CHAR 4 ERA5
#> 8 creation_date NC_CHAR 24 Sun May 5 01:20:07 2024
#> 9 title NC_CHAR 28 Near-Surface Air Temperature
#> 10 comment NC_CHAR 21 Created with gl/xtool
#> 11 history NC_CHAR 1800 Wed Nov 13 14:44:44 2024: cdo mergetime tas_fp_...
#> 12 NCO NC_CHAR 95 netCDF Operators version 4.8.1 (Homepage = http...
#> 13 CDO NC_CHAR 64 Climate Data Operators version 2.0.5 (https://m...
#> 14 DODS.strlen NC_INT 1 0
#> 15 DODS_EXTRA.Unlimited_Dimension NC_CHAR 4 time
Upvotes: 3