Source code for subsettools.clm

"""Functions to configure CLM for coupled ParFlow-CLM simulations."""

import os
from datetime import timedelta
import numpy as np
import hf_hydrodata
from ._common import (
    get_utc_time,
    get_hf_gridded_data,
)
from ._error_checking import (
    _validate_dir,
    _validate_grid_bounds,
    _validate_date,
)


_VEGM_COLUMNS = 25



[docs]
def config_clm(ij_bounds, start, end, dataset, write_dir, time_zone="UTC"):
    """Modify template CLM driver files for a desired subdomain and run duration.

    This function will obtain template clm driver files (specifically vegm, vep
    and drv_clmin) from the existing national simulations on HydroData and modify
    them to reflect the desired subdomain (indicated by the ij_bounds) and run
    duration (indicated by the start and end dates). The modified files will be
    written out to a user specified directory. These files are required if you
    are going to run a ParFlow-CLM simulation.

    Args:
        ij_bounds (tuple[int]): bounding box for subset. This should be given as
            i,j index values where 0,0 is the lower left hand corner of a domain.
            ij_bounds are given relative to whatever grid is being used for the
            subset.
        start (str): start date (inclusive), in the form 'yyyy-mm-dd'
        end (str): end date (exlusive), in the form 'yyyy-mm-dd'
        dataset (str): the dataset that the files should be obtained from name
            e.g. "conus1_baseline_mod"
        write_dir (str): directory where the subset files will be written
        timezone (str): timezone information for start and end dates.
            Defaults to "UTC".

    Returns:
        A dictionary mapping the CLM file types ("vegp", "vegm", "drv_clm") to
        the corresponging filepaths where the CLM files were written.

    Example:

    .. code-block:: python

        filepaths = config_clm(
            ij_bounds=(375, 239, 487, 329),
            start="2005-10-01",
            end="2006-10-01",
            dataset="conus1_baseline_mod",
            write_dir="/path/to/your/chosen/directory"
        )
    """
    _validate_grid_bounds(ij_bounds)
    _validate_date(start)
    _validate_date(end)
    if not isinstance(dataset, str):
        raise TypeError("dataset name must be a string.")
    _validate_dir(write_dir)
    if not isinstance(time_zone, str):
        raise TypeError("time_zone must be a string.")

    # get the pfb version of the vegm file
    file_type_list = ["vegp", "pfb", "drv_clm"]
    file_paths = {}
    for file_type in file_type_list:
        if file_type == "vegp":
            file_path = os.path.join(write_dir, "drv_vegp.dat")
            try:
                hf_hydrodata.get_raw_file(
                    file_path,
                    dataset=dataset,
                    file_type=file_type,
                    variable="clm_run",
                    temporal_resolution="static",
                )
            except ValueError as err:
                print(f"Failed to get {file_type} file for dataset '{dataset}':", err)
            else:
                file_paths[file_type] = file_path
                print("copied vegp")
        elif file_type == "pfb":
            options = {
                "dataset": dataset,
                "file_type": file_type,
                "variable": "clm_run",
                "temporal_resolution": "static",
                "grid_bounds": ij_bounds,
            }
            subset_data = get_hf_gridded_data(options)
            land_cover_data = _reshape_ndarray_to_vegm_format(subset_data)
            file_path = _write_land_cover(land_cover_data, write_dir)
            file_paths[file_type] = file_path
            print("subset vegm")
        elif file_type == "drv_clm":
            file_path = os.path.join(write_dir, "drv_clmin.dat")
            try:
                hf_hydrodata.get_raw_file(
                    file_path,
                    dataset=dataset,
                    file_type=file_type,
                    variable="clm_run",
                    temporal_resolution="static",
                )
            except ValueError as err:
                print(f"Failed to get {file_type} file for dataset '{dataset}':", err)
            else:
                print("copied drv_clmin")
                _edit_drvclmin(
                    file_path=file_path, start=start, end=end, time_zone=time_zone
                )
                file_paths[file_type] = file_path
                print("edited drv_clmin")
    return file_paths



def _reshape_ndarray_to_vegm_format(data):
    """Reshape ndarray returned by datacatalog to vegm format.

    Args:
        data (ndarray): raw subset vegm data (2d array)

    Returns:
        Ndarray reshaped to vegm format.
    """
    _, nj, ni = data.shape
    indices = np.indices((nj, ni)) + 1
    indices = indices[::-1, :, :]
    data = np.vstack([indices, data])  # stack x,y indices on vegm
    # transpose and reshape back into expected 2D vegm file format for the subset
    return data.transpose(1, 2, 0).reshape(-1, _VEGM_COLUMNS)


def _write_land_cover(land_cover_data, write_dir):
    """Write the land cover ndarray in vegm format.

    Read in a gridded landcover dataset and write it out as a vegm file which is
    correctly formatted for CLM.

    Args:
        land_cover_data (ndarray): formatted vegm data (2d array)
        write_dir (str): path to output directory

    Returns:
        str:
            path to output vegm file.
    """
    heading = (
        "x y lat lon sand clay color fractional coverage of grid, by "
        "vegetation class (Must/Should Add to 1.0) "
    )
    vegm_col_names = (
        "",
        "",
        "(Deg)",
        "(Deg)",
        "(%/100)",
        "",
        "index",
        "1",
        "2",
        "3",
        "4",
        "5",
        "6",
        "7",
        "8",
        "9",
        "10",
        "11",
        "12",
        "13",
        "14",
        "15",
        "16",
        "17",
        "18",
    )
    header = "\n".join([heading, " ".join(vegm_col_names)])
    file_path = os.path.join(write_dir, "drv_vegm.dat")
    np.savetxt(
        fname=file_path,
        X=land_cover_data,
        delimiter=" ",
        comments="",
        header=header,
        fmt=["%d"] * 2 + ["%.6f"] * 2 + ["%.2f"] * 2 + ["%d"] * 19,
    )
    return file_path


def _edit_drvclmin(
    file_path,
    start=None,
    end=None,
    time_zone="UTC",
    startcode=2,
    vegp_name="drv_vegp.dat",
    vegm_name="drv_vegm.dat",
):
    """Edit a template CLM driver for a new simulation.

    Update the start and end dates, timezone, restart type and vegm and vegp
    input file names for a new simulation.

    Args:
        file_path (str): clm driver file path
        start (str): start date (inclusive), in the form 'yyyy-mm-dd'
        end (str): end date (exlusive), in the form 'yyyy-mm-dd'
        time_zone (str): time_zone used to calculate start/end dates. Defaults
            to "UTC".
        startcode (int): startcode for the parflow simulation
        vegp_name (str): vegp filename
        vegm_name (str): vegm filename

    Raises:
        AssertionError: If one of start or end is None and the other is not.
    """
    assert (start is None and end is None) or (start is not None and end is not None)

    with open(file_path, encoding="utf-8") as f:
        lines = f.readlines()

    for i, line in enumerate(lines):
        if "vegtf" in line:
            lines[
                i
            ] = f"{'vegtf':<15}{vegm_name:<37} Vegetation Tile Specification File\n"
        elif "vegpf" in line:
            lines[i] = f"{'vegpf':<15}{vegp_name:<37} Vegetation Type Parameter\n"
        elif "startcode" in line:
            lines[i] = f"{'startcode':<15}{startcode:<37} 1=restart file, 2=defined\n"
        elif "clm_ic" in line:
            lines[i] = f"{'clm_ic':<15}{startcode:<37} 1=restart file, 2=defined\n"

    if start is not None:
        start_date = get_utc_time(start, time_zone)
        end_date = get_utc_time(end, time_zone) - timedelta(hours=1)

        for i, line in enumerate(lines):
            if "shr" in line:
                lines[i] = f"{'shr':<15}{start_date.hour:<37} Starting Hour\n"
            if "sda" in line:
                lines[i] = f"{'sda':<15}{start_date.day:<37} Starting Day\n"
            elif "smo" in line:
                lines[i] = f"{'smo':<15}{start_date.month:<37} Starting Month\n"
            elif "syr" in line:
                lines[i] = f"{'syr':<15}{start_date.year:<37} Starting Year\n"
            elif "ehr" in line:
                lines[i] = f"{'ehr':<15}{end_date.hour:<37} Ending Hour\n"
            elif "eda" in line:
                lines[i] = f"{'eda':<15}{end_date.day:<37} Ending Day\n"
            elif "emo" in line:
                lines[i] = f"{'emo':<15}{end_date.month:<37} Ending Month\n"
            elif "eyr" in line:
                lines[i] = f"{'eyr':<15}{end_date.year:<37} Ending Year\n"

    with open(file_path, "w", encoding="utf-8") as f:
        f.writelines(lines)