Skip to content

climate_ref_pmp.pmp_driver #

build_glob_pattern(paths) #

Generate a glob pattern that matches files based on common path, prefix, and suffix.

Generate a glob pattern that matches all files in the given list of paths, based on their common directory, filename prefix, and suffix.

Parameters:

Name Type Description Default
paths list of str

A list of full file paths. The paths should point to actual files, and should have enough similarity in their structure and naming to extract common patterns.

required

Returns:

Type Description
str

A glob pattern string that can be used with glob.glob(pattern, recursive=True) to match all the provided files and others with the same structural pattern.

Examples:

>>> paths = [
...     "/home/user/data/folder1/file1.txt",
...     "/home/user/data/folder1/file2.txt",
...     "/home/user/data/folder2/file3.txt",
... ]
>>> pattern = build_glob_pattern(paths)
>>> print(pattern)
/home/user/data/**/file*.txt
Source code in packages/climate-ref-pmp/src/climate_ref_pmp/pmp_driver.py
def build_glob_pattern(paths: list[str]) -> str:
    """
    Generate a glob pattern that matches files based on common path, prefix, and suffix.

    Generate a glob pattern that matches all files in the given list of paths,
    based on their common directory, filename prefix, and suffix.

    Parameters
    ----------
    paths : list of str
        A list of full file paths. The paths should point to actual files,
        and should have enough similarity in their structure and naming
        to extract common patterns.

    Returns
    -------
    str
        A glob pattern string that can be used with `glob.glob(pattern, recursive=True)`
        to match all the provided files and others with the same structural pattern.

    Examples
    --------
    >>> paths = [
    ...     "/home/user/data/folder1/file1.txt",
    ...     "/home/user/data/folder1/file2.txt",
    ...     "/home/user/data/folder2/file3.txt",
    ... ]
    >>> pattern = build_glob_pattern(paths)
    >>> print(pattern)
    /home/user/data/**/file*.txt
    """
    if not paths:
        raise ValueError("The path list is empty.")

    # Find the common directory path
    common_path = os.path.commonpath(paths)

    # Extract filenames and parent directories
    filenames = [os.path.basename(path) for path in paths]
    dirnames = [os.path.dirname(path) for path in paths]
    same_dir = all(d == dirnames[0] for d in dirnames)

    # Helper to find common prefix
    def common_prefix(strings: list[str]) -> str:
        if not strings:
            return ""
        prefix = strings[0]
        for s in strings[1:]:
            while not s.startswith(prefix):
                prefix = prefix[:-1]
                if not prefix:
                    break
        return prefix

    # Helper to find common suffix
    def common_suffix(strings: list[str]) -> str:
        reversed_strings = [s[::-1] for s in strings]
        reversed_suffix = common_prefix(reversed_strings)
        return reversed_suffix[::-1]

    prefix = common_prefix(filenames)
    suffix = common_suffix(filenames)

    # Use simpler pattern if all files are in the same directory
    if same_dir:
        pattern = os.path.join(dirnames[0], f"{prefix}*{suffix}")
    else:
        pattern = os.path.join(common_path, "**", f"{prefix}*{suffix}")

    return pattern

build_pmp_command(driver_file, parameter_file, **kwargs) #

Run a PMP driver script via a conda environment

This function runs a PMP driver script using a specific conda environment. The driver script is responsible for running the PMP diagnostics and producing output. The output consists of a JSON file that contains the executions of the PMP diagnostics, and a set of PNG and data files that are produced by the diagnostics.

The PMP driver scripts are installed in the PMP conda environment, but absolute paths should be used for non-PMP scripts.

Parameters:

Name Type Description Default
driver_file str

Filename of the PMP driver script to run

required
parameter_file str

Filename of the parameter file to use

required
kwargs str | int | float | list[str] | None

Additional arguments to pass to the driver script

{}
Source code in packages/climate-ref-pmp/src/climate_ref_pmp/pmp_driver.py
def build_pmp_command(
    driver_file: str,
    parameter_file: str,
    **kwargs: str | int | float | list[str] | None,
) -> list[str]:
    """
    Run a PMP driver script via a conda environment

    This function runs a PMP driver script using a specific conda environment.
    The driver script is responsible for running the PMP diagnostics and producing output.
    The output consists of a JSON file that contains the executions of the PMP diagnostics,
    and a set of PNG and data files that are produced by the diagnostics.

    The PMP driver scripts are installed in the PMP conda environment,
    but absolute paths should be used for non-PMP scripts.

    Parameters
    ----------
    driver_file
        Filename of the PMP driver script to run
    parameter_file
        Filename of the parameter file to use
    kwargs
        Additional arguments to pass to the driver script
    """
    # Note this uses the driver script from the REF env *not* the PMP conda env
    _parameter_file = _get_resource("climate_ref_pmp.params", parameter_file, use_resources=True)

    # Run the driver script inside the PMP conda environment
    cmd = [
        driver_file,
        "-p",
        _parameter_file,
    ]

    # Loop through additional arguments if they exist
    if kwargs:  # pragma: no cover
        for key, value in kwargs.items():
            if isinstance(value, list):
                cmd.extend([f"--{key}"] + [str(v) for v in value])
            elif value:
                cmd.extend([f"--{key}", str(value)])
            else:
                cmd.extend([f"--{key}"])

    logger.info(f"PMP Command: {cmd}")

    return cmd

process_json_result(json_filename, png_files, data_files) #

Process a PMP JSON result into the appropriate CMEC bundles

Parameters:

Name Type Description Default
json_filename Path

Filename of the JSON file that is written out by PMP

required
png_files list[Path]

List of PNG files to be included in the output

required
data_files list[Path]

List of data files to be included in the output

required

Returns:

Type Description
tuple of CMEC output and diagnostic bundles
Source code in packages/climate-ref-pmp/src/climate_ref_pmp/pmp_driver.py
def process_json_result(
    json_filename: pathlib.Path, png_files: list[pathlib.Path], data_files: list[pathlib.Path]
) -> tuple[CMECOutput, CMECMetric]:
    """
    Process a PMP JSON result into the appropriate CMEC bundles

    Parameters
    ----------
    json_filename
        Filename of the JSON file that is written out by PMP
    png_files
        List of PNG files to be included in the output
    data_files
        List of data files to be included in the output

    Returns
    -------
        tuple of CMEC output and diagnostic bundles
    """
    with open(json_filename) as fh:
        json_result = json.load(fh)

    cmec_output = CMECOutput.create_template()
    cmec_output["provenance"] = {**cmec_output["provenance"], **json_result["provenance"]}

    # Add the plots and data files
    for fname in png_files:
        cmec_output["plots"][fname.name] = {
            "filename": str(fname),
            "long_name": "Plot",
            "description": "Plot produced by the diagnostic",
        }
    for fname in data_files:
        cmec_output["data"][fname.name] = {
            "filename": str(fname),
            "long_name": "Output data",
            "description": "Data produced by the diagnostic",
        }

    cmec_metric = CMECMetric.create_template()
    cmec_metric["DIMENSIONS"] = {}
    dimensions = json_result["DIMENSIONS"]

    if "dimensions" in dimensions:  # pragma: no branch
        # Merge the contents of inner "dimensions" into the parent "DIMENSIONS"
        dimensions.update(dimensions["dimensions"])
        del dimensions["dimensions"]

    results = json_result["RESULTS"]

    cmec_metric["RESULTS"] = results
    cmec_metric["DIMENSIONS"] = dimensions

    if "provenance" in json_result:  # pragma: no branch
        cmec_metric["PROVENANCE"] = json_result["provenance"]

    logger.info(f"cmec_output: {pretty_repr(cmec_output)}")
    logger.info(f"cmec_metric: {pretty_repr(cmec_metric)}")

    return CMECOutput(**cmec_output), CMECMetric(**cmec_metric)