`climate_ref_esmvaltool.recipe` #

`as_facets(group)` #

Convert a group from the datasets dataframe to ESMValTool facets.

Parameters:

Name	Type	Description	Default
`group`	`DataFrame`	A group of datasets representing a single instance_id.	required

Returns:

Type	Description
A :obj:`dict` containing facet-value pairs.

Source code in packages/climate-ref-esmvaltool/src/climate_ref_esmvaltool/recipe.py

def as_facets(
    group: pd.DataFrame,
) -> dict[str, Any]:
    """Convert a group from the datasets dataframe to ESMValTool facets.

    Parameters
    ----------
    group:
        A group of datasets representing a single instance_id.

    Returns
    -------
        A :obj:`dict` containing facet-value pairs.

    """
    facets = {}
    project = group.iloc[0].instance_id.split(".", 2)[0]
    facets["project"] = project
    for esmvaltool_name, ref_name in FACETS[project].items():
        values = group[ref_name].unique().tolist()
        facets[esmvaltool_name] = values if len(values) > 1 else values[0]
    timerange = as_timerange(group)
    if timerange is not None:
        facets["timerange"] = timerange
    return facets

`as_isodate(timestamp)` #

Format a timestamp as an ISO 8601 datetime.

For example, '2014-12-16 12:00:00' will be formatted as '20141216T120000'.

Parameters:

Name	Type	Description	Default
`timestamp`	`Timestamp`	The timestamp to format.	required

Source code in packages/climate-ref-esmvaltool/src/climate_ref_esmvaltool/recipe.py

def as_isodate(timestamp: pd.Timestamp) -> str:
    """Format a timestamp as an ISO 8601 datetime.

    For example, '2014-12-16 12:00:00' will be formatted as '20141216T120000'.

    Parameters
    ----------
    timestamp
        The timestamp to format.

    """
    return str(timestamp).replace(" ", "T").replace("-", "").replace(":", "")

`as_timerange(group)` #

Format the timeranges from a dataframe as an ESMValTool timerange.

Parameters:

Name	Type	Description	Default
`group`	`DataFrame`	The dataframe describing a single dataset.	required

Returns:

Type	Description
`A timerange.`

Source code in packages/climate-ref-esmvaltool/src/climate_ref_esmvaltool/recipe.py

def as_timerange(group: pd.DataFrame) -> str | None:
    """Format the timeranges from a dataframe as an ESMValTool timerange.

    Parameters
    ----------
    group
        The dataframe describing a single dataset.

    Returns
    -------
        A timerange.
    """
    # TODO: apply some rounding to avoid problems?
    # https://github.com/ESMValGroup/ESMValCore/issues/2048
    start_times = group.start_time.dropna()
    if start_times.empty:
        return None
    end_times = group.end_time.dropna()
    if end_times.empty:
        return None  # pragma: no cover
    return f"{as_isodate(start_times.min())}/{as_isodate(end_times.max())}"

`dataframe_to_recipe(files, group_by=('instance_id',), equalize_timerange=False)` #

Convert the datasets dataframe to a recipe "variables" section.

Parameters:

Name	Type	Description	Default
`files`	`DataFrame`	The pandas dataframe describing the input files.	required
`group_by`	`tuple[str, ...]`	The columns to group the input files by.	`('instance_id',)`
`equalize_timerange`	`bool`	If True, use the timerange that is covered by all datasets.	`False`

Returns:

Type	Description
`A "variables" section that can be used in an ESMValTool recipe.`

Source code in packages/climate-ref-esmvaltool/src/climate_ref_esmvaltool/recipe.py

def dataframe_to_recipe(
    files: pd.DataFrame,
    group_by: tuple[str, ...] = ("instance_id",),
    equalize_timerange: bool = False,
) -> dict[str, Any]:
    """Convert the datasets dataframe to a recipe "variables" section.

    Parameters
    ----------
    files
        The pandas dataframe describing the input files.
    group_by
        The columns to group the input files by.
    equalize_timerange
        If True, use the timerange that is covered by all datasets.

    Returns
    -------
        A "variables" section that can be used in an ESMValTool recipe.
    """
    variables: dict[str, Any] = {}
    for _, group in files.groupby(list(group_by)):
        facets = as_facets(group)
        short_name = facets.pop("short_name")
        if short_name not in variables:
            variables[short_name] = {"additional_datasets": []}
        variables[short_name]["additional_datasets"].append(facets)

    if equalize_timerange:
        # Select a timerange covered by all datasets.
        start_times, end_times = [], []
        for variable in variables.values():
            for dataset in variable["additional_datasets"]:
                if "timerange" in dataset:
                    start, end = dataset["timerange"].split("/")
                    start_times.append(start)
                    end_times.append(end)
        timerange = f"{max(start_times)}/{min(end_times)}"
        for variable in variables.values():
            for dataset in variable["additional_datasets"]:
                if "timerange" in dataset:
                    dataset["timerange"] = timerange

    return variables

`load_recipe(recipe)` #

Load a recipe.

Parameters:

Name	Type	Description	Default
`recipe`	`str`	The name of an ESMValTool recipe.	required

Returns:

Type	Description
`The loaded recipe.`

Source code in packages/climate-ref-esmvaltool/src/climate_ref_esmvaltool/recipe.py

def load_recipe(recipe: str) -> Recipe:
    """Load a recipe.

    Parameters
    ----------
    recipe
        The name of an ESMValTool recipe.

    Returns
    -------
        The loaded recipe.
    """
    filename = _RECIPES.fetch(recipe)

    def normalize(obj: Any) -> Any:
        # Ensure objects in the recipe are not shared.
        if isinstance(obj, dict):
            return {k: normalize(v) for k, v in obj.items()}
        if isinstance(obj, list):
            return [normalize(item) for item in obj]
        return obj

    return normalize(yaml.safe_load(Path(filename).read_text(encoding="utf-8")))  # type: ignore[no-any-return]

`prepare_climate_data(datasets, climate_data_dir)` #

Symlink the input files from the Pandas dataframe into a directory tree.

This ensures that ESMValTool can find the data and only uses the requested data.

Parameters:

Name	Type	Description	Default
`datasets`	`DataFrame`	The pandas dataframe describing the input datasets.	required
`climate_data_dir`	`Path`	The directory where ESMValTool should look for input data.	required

Source code in packages/climate-ref-esmvaltool/src/climate_ref_esmvaltool/recipe.py

def prepare_climate_data(datasets: pd.DataFrame, climate_data_dir: Path) -> None:
    """Symlink the input files from the Pandas dataframe into a directory tree.

    This ensures that ESMValTool can find the data and only uses the
    requested data.

    Parameters
    ----------
    datasets
        The pandas dataframe describing the input datasets.
    climate_data_dir
        The directory where ESMValTool should look for input data.
    """
    for row in datasets.itertuples():
        if not isinstance(row.instance_id, str):  # pragma: no branch
            msg = f"Invalid instance_id encountered in {row}"
            raise ValueError(msg)
        if not isinstance(row.path, str):  # pragma: no branch
            msg = f"Invalid path encountered in {row}"
            raise ValueError(msg)
        if row.instance_id.startswith("obs4MIPs."):
            version = row.instance_id.split(".")[-1]
            subdirs: list[str] = ["obs4MIPs", row.source_id, version]  # type: ignore[list-item]
        else:
            subdirs = row.instance_id.split(".")
        tgt = climate_data_dir.joinpath(*subdirs) / Path(row.path).name
        tgt.parent.mkdir(parents=True, exist_ok=True)
        tgt.symlink_to(row.path)

climate_ref_esmvaltool.recipe #

as_facets(group) #

as_isodate(timestamp) #

as_timerange(group) #

dataframe_to_recipe(files, group_by=('instance_id',), equalize_timerange=False) #

load_recipe(recipe) #

prepare_climate_data(datasets, climate_data_dir) #

`climate_ref_esmvaltool.recipe` #

`as_facets(group)` #

`as_isodate(timestamp)` #

`as_timerange(group)` #

`dataframe_to_recipe(files, group_by=('instance_id',), equalize_timerange=False)` #

`load_recipe(recipe)` #

`prepare_climate_data(datasets, climate_data_dir)` #