Skip to content

climate_ref_esmvaltool.recipe #

as_facets(group) #

Convert a group from the datasets dataframe to ESMValTool facets.

Parameters:

Name Type Description Default
group DataFrame

A group of datasets representing a single instance_id.

required

Returns:

Type Description
A :obj:`dict` containing facet-value pairs.
Source code in packages/climate-ref-esmvaltool/src/climate_ref_esmvaltool/recipe.py
def as_facets(
    group: pd.DataFrame,
) -> dict[str, Any]:
    """Convert a group from the datasets dataframe to ESMValTool facets.

    Parameters
    ----------
    group:
        A group of datasets representing a single instance_id.

    Returns
    -------
        A :obj:`dict` containing facet-value pairs.

    """
    facets = {}
    project = group.iloc[0].instance_id.split(".", 2)[0]
    facets["project"] = project
    for esmvaltool_name, ref_name in FACETS[project].items():
        values = group[ref_name].unique().tolist()
        facets[esmvaltool_name] = values if len(values) > 1 else values[0]
    timerange = as_timerange(group)
    if timerange is not None:
        facets["timerange"] = timerange
    return facets

as_isodate(timestamp) #

Format a timestamp as an ISO 8601 datetime.

For example, '2014-12-16 12:00:00' will be formatted as '20141216T120000'.

Parameters:

Name Type Description Default
timestamp Timestamp

The timestamp to format.

required
Source code in packages/climate-ref-esmvaltool/src/climate_ref_esmvaltool/recipe.py
def as_isodate(timestamp: pd.Timestamp) -> str:
    """Format a timestamp as an ISO 8601 datetime.

    For example, '2014-12-16 12:00:00' will be formatted as '20141216T120000'.

    Parameters
    ----------
    timestamp
        The timestamp to format.

    """
    return str(timestamp).replace(" ", "T").replace("-", "").replace(":", "")

as_timerange(group) #

Format the timeranges from a dataframe as an ESMValTool timerange.

Parameters:

Name Type Description Default
group DataFrame

The dataframe describing a single dataset.

required

Returns:

Type Description
A timerange.
Source code in packages/climate-ref-esmvaltool/src/climate_ref_esmvaltool/recipe.py
def as_timerange(group: pd.DataFrame) -> str | None:
    """Format the timeranges from a dataframe as an ESMValTool timerange.

    Parameters
    ----------
    group
        The dataframe describing a single dataset.

    Returns
    -------
        A timerange.
    """
    # TODO: apply some rounding to avoid problems?
    # https://github.com/ESMValGroup/ESMValCore/issues/2048
    start_times = group.start_time.dropna()
    if start_times.empty:
        return None
    end_times = group.end_time.dropna()
    if end_times.empty:
        return None  # pragma: no cover
    return f"{as_isodate(start_times.min())}/{as_isodate(end_times.max())}"

dataframe_to_recipe(files, group_by=('instance_id',), equalize_timerange=False) #

Convert the datasets dataframe to a recipe "variables" section.

Parameters:

Name Type Description Default
files DataFrame

The pandas dataframe describing the input files.

required
group_by tuple[str, ...]

The columns to group the input files by.

('instance_id',)
equalize_timerange bool

If True, use the timerange that is covered by all datasets.

False

Returns:

Type Description
A "variables" section that can be used in an ESMValTool recipe.
Source code in packages/climate-ref-esmvaltool/src/climate_ref_esmvaltool/recipe.py
def dataframe_to_recipe(
    files: pd.DataFrame,
    group_by: tuple[str, ...] = ("instance_id",),
    equalize_timerange: bool = False,
) -> dict[str, Any]:
    """Convert the datasets dataframe to a recipe "variables" section.

    Parameters
    ----------
    files
        The pandas dataframe describing the input files.
    group_by
        The columns to group the input files by.
    equalize_timerange
        If True, use the timerange that is covered by all datasets.

    Returns
    -------
        A "variables" section that can be used in an ESMValTool recipe.
    """
    variables: dict[str, Any] = {}
    for _, group in files.groupby(list(group_by)):
        facets = as_facets(group)
        short_name = facets.pop("short_name")
        if short_name not in variables:
            variables[short_name] = {"additional_datasets": []}
        variables[short_name]["additional_datasets"].append(facets)

    if equalize_timerange:
        # Select a timerange covered by all datasets.
        start_times, end_times = [], []
        for variable in variables.values():
            for dataset in variable["additional_datasets"]:
                if "timerange" in dataset:
                    start, end = dataset["timerange"].split("/")
                    start_times.append(start)
                    end_times.append(end)
        timerange = f"{max(start_times)}/{min(end_times)}"
        for variable in variables.values():
            for dataset in variable["additional_datasets"]:
                if "timerange" in dataset:
                    dataset["timerange"] = timerange

    return variables

load_recipe(recipe) #

Load a recipe.

Parameters:

Name Type Description Default
recipe str

The name of an ESMValTool recipe.

required

Returns:

Type Description
The loaded recipe.
Source code in packages/climate-ref-esmvaltool/src/climate_ref_esmvaltool/recipe.py
def load_recipe(recipe: str) -> Recipe:
    """Load a recipe.

    Parameters
    ----------
    recipe
        The name of an ESMValTool recipe.

    Returns
    -------
        The loaded recipe.
    """
    filename = _RECIPES.fetch(recipe)

    def normalize(obj: Any) -> Any:
        # Ensure objects in the recipe are not shared.
        if isinstance(obj, dict):
            return {k: normalize(v) for k, v in obj.items()}
        if isinstance(obj, list):
            return [normalize(item) for item in obj]
        return obj

    return normalize(yaml.safe_load(Path(filename).read_text(encoding="utf-8")))  # type: ignore[no-any-return]

prepare_climate_data(datasets, climate_data_dir) #

Symlink the input files from the Pandas dataframe into a directory tree.

This ensures that ESMValTool can find the data and only uses the requested data.

Parameters:

Name Type Description Default
datasets DataFrame

The pandas dataframe describing the input datasets.

required
climate_data_dir Path

The directory where ESMValTool should look for input data.

required
Source code in packages/climate-ref-esmvaltool/src/climate_ref_esmvaltool/recipe.py
def prepare_climate_data(datasets: pd.DataFrame, climate_data_dir: Path) -> None:
    """Symlink the input files from the Pandas dataframe into a directory tree.

    This ensures that ESMValTool can find the data and only uses the
    requested data.

    Parameters
    ----------
    datasets
        The pandas dataframe describing the input datasets.
    climate_data_dir
        The directory where ESMValTool should look for input data.
    """
    for row in datasets.itertuples():
        if not isinstance(row.instance_id, str):  # pragma: no branch
            msg = f"Invalid instance_id encountered in {row}"
            raise ValueError(msg)
        if not isinstance(row.path, str):  # pragma: no branch
            msg = f"Invalid path encountered in {row}"
            raise ValueError(msg)
        if row.instance_id.startswith("obs4MIPs."):
            version = row.instance_id.split(".")[-1]
            subdirs: list[str] = ["obs4MIPs", row.source_id, version]  # type: ignore[list-item]
        else:
            subdirs = row.instance_id.split(".")
        tgt = climate_data_dir.joinpath(*subdirs) / Path(row.path).name
        tgt.parent.mkdir(parents=True, exist_ok=True)
        tgt.symlink_to(row.path)