Source code for daops.ops.subset

from clisops.ops.subset import subset as clisops_subset
from roocs_utils.parameter import parameterise

from daops.processor import process
from daops.utils import consolidate
from daops.utils import normalise

__all__ = [
    "subset",
]


[docs]def subset( collection, time=None, area=None, level=None, output_dir=None, output_type="netcdf", split_method="time:auto", file_namer="standard", ): """ Subset input dataset according to parameters. Can be subsetted by level, area and time. Parameters ---------- collection: Collection of datasets to process, sequence or string of comma separated dataset identifiers. time: Time period - Time range to subset over, sequence of two time values or string of two / separated time values area: Area to subset over, sequence or string of comma separated lat and lon bounds. Must contain 4 values. level: Level range - Level values to subset over, sequence of two level values or string of two / separated level values output_dir: str or path like object describing output directory for output files. output_type: {"netcdf", "nc", "zarr", "xarray"} split_method: {"time:auto"} file_namer: {"standard", "simple"} Returns ------- List of outputs in the selected type: a list of xarray Datasets or file paths. Examples -------- | collection: ("cmip6.ukesm1.r1.gn.tasmax.v20200101",) | time: ("1999-01-01T00:00:00", "2100-12-30T00:00:00") | area: (-5.,49.,10.,65) | level: (1000.,) | output_type: "netcdf" | output_dir: "/cache/wps/procs/req0111" | split_method: "time:decade" | file_namer: "facet_namer" """ parameters = parameterise(collection=collection, time=time, area=area, level=level) # Consolidate data inputs so they can be passed to Xarray collection = consolidate.consolidate( parameters.get("collection"), time=parameters.get("time") ) # Normalise (i.e. "fix") data inputs based on "character" norm_collection = normalise.normalise(collection) rs = normalise.ResultSet(vars()) # change name of data ref here for dset, norm_collection in norm_collection.items(): # Process each input dataset (either in series or # parallel) rs.add( dset, process( clisops_subset, norm_collection, **{ "time": parameters.get("time"), "area": parameters.get("area"), "level": parameters.get("level"), "output_type": output_type, "output_dir": output_dir, "split_method": split_method, "file_namer": file_namer, }, ), ) return rs