Source code for supy._supy_module

# ###########################################################################
# SuPy: SUEWS for Python
#
# Authors:
# Ting Sun, ting.sun@reading.ac.uk
#
# History:
# 20 Jan 2018: first alpha release
# 01 Feb 2018: performance improvement
# 03 Feb 2018: improvement in output processing
# 08 Mar 2018: pypi packaging
# 04 Oct 2018: overhaul of structure
# 05 Oct 2018: added sample run data
# 28 Apr 2019: added support for parallel run
###########################################################################

import logging
import os
import sys
import time
import pandas
from pathlib import Path
from typing import Tuple

import numpy as np
import pandas as pd

from ._check import check_forcing, check_state
from ._env import logger_supy, trv_supy_module
from ._load import (
    load_InitialCond_grid_df,
    load_SUEWS_Forcing_met_df_raw,
    load_SUEWS_dict_ModConfig,
    load_df_state,
    resample_forcing_met,
)
from ._run import run_supy_par, run_supy_ser
from ._save import get_save_info, save_df_output, save_df_state, save_initcond_nml


# set up logging module
logger_supy.setLevel(logging.INFO)


##############################################################################
# 1. compact wrapper for loading SUEWS settings
# @functools.lru_cache(maxsize=16)
[docs]def init_supy(
    path_init: str,
    force_reload=True,
    check_input=False,
) -> pd.DataFrame:
    """Initialise supy by loading initial model states.

    Parameters
    ----------
    path_init : str
        Path to a file that can initialise SuPy, which can be either of the follows:
            * SUEWS :ref:`RunControl.nml<suews:RunControl.nml>`: a namelist file for SUEWS configurations
            * SuPy `df_state.csv`: a CSV file including model states produced by a SuPy run via :py:func:`supy.save_supy`

    force_reload: boolean, optional
        Flag to force reload all initialisation files by clearing all cached states, with default value `True` (i.e., force reload all files).
        Note: If the number of simulation grids is large (e.g., > 100), `force_reload=False` is strongly recommended for better performance.

    check_input: boolean, optional
        flag for checking validity of input: `df_forcing` and `df_state_init`.
        If set to `True`, any detected invalid input will stop SuPy simulation;
        a `False` flag will bypass such validation and may incur kernel error if any invalid input.
        *Note: such checking procedure may take some time if the input is large.*
        (the default is `False`, which bypasses the validation).




    Returns
    -------
    df_state_init: pandas.DataFrame
        Initial model states.
        See `df_state_var` for details.

    Examples
    --------
    1. Use :ref:`RunControl.nml<suews:RunControl.nml>` to initialise SuPy

    >>> path_init = "~/SUEWS_sims/RunControl.nml"
    >>> df_state_init = supy.init_supy(path_init)

    2. Use ``df_state.csv`` to initialise SuPy

    >>> path_init = "~/SuPy_res/df_state_test.csv"
    >>> df_state_init = supy.init_supy(path_init)

    """

    try:
        path_init_x = Path(path_init).expanduser().resolve()
    except FileNotFoundError:
        logger_supy.exception(f"{path_init_x} does not exists!")
    else:
        if path_init_x.suffix == ".nml":
            # SUEWS `RunControl.nml`:
            df_state_init = load_InitialCond_grid_df(
                path_init_x,
                force_reload=force_reload,
            )
        elif path_init_x.suffix == ".csv":
            # SuPy `df_state.csv`:
            df_state_init = load_df_state(path_init_x)
        else:
            logger_supy.critical(
                f"{path_init_x} is NOT a valid file to initialise SuPy!"
            )
            raise RuntimeError("{path_init_x} is NOT a valid file to initialise SuPy!")
        if check_input:
            try:
                list_issues = check_state(df_state_init)
                if isinstance(list_issues, list):
                    logger_supy.critical(
                        f"`df_state_init` loaded from {path_init_x} is NOT valid to initialise SuPy!"
                    )
            except:
                raise RuntimeError(
                    "{path_init_x} is NOT a valid file to initialise SuPy!"
                )

        return df_state_init


# # TODO:
# def load_forcing(path_pattern: str, grid: int = 0) -> pd.DataFrame:
#     pass


# TODO:
# to be superseded by a more generic wrapper: load_forcing
[docs]def load_forcing_grid(
    path_runcontrol: str,
    grid: int,
    check_input=False,
    force_reload=True,
) -> pd.DataFrame:
    """Load forcing data for a specific grid included in the index of `df_state_init </data-structure/supy-io.ipynb#df_state_init:-model-initial-states>`.

    Parameters
    ----------

    path_runcontrol : str
        Path to SUEWS :ref:`RunControl.nml <suews:RunControl.nml>`
    grid : int
        Grid number
    check_input : bool, optional
        flag for checking validity of input: `df_forcing` and `df_state_init`.
        If set to `True`, any detected invalid input will stop SuPy simulation;
        a `False` flag will bypass such validation and may incur kernel error if any invalid input.
        *Note: such checking procedure may take some time if the input is large.*
        (the default is `False`, which bypasses the validation).

    Returns
    -------
    df_forcing: pandas.DataFrame
        Forcing data. See `df_forcing_var` for details.

    Examples
    --------
    >>> path_runcontrol = "~/SUEWS_sims/RunControl.nml"  # a valid path to `RunControl.nml`
    >>> df_state_init = supy.init_supy(path_runcontrol) # get `df_state_init`
    >>> grid = df_state_init.index[0] # first grid number included in `df_state_init`
    >>> df_forcing = supy.load_forcing_grid(path_runcontrol, grid) # get df_forcing


    """

    try:
        path_runcontrol = Path(path_runcontrol).expanduser().resolve()
    except FileNotFoundError:
        logger_supy.exception(f"{path_runcontrol} does not exists!")
    else:
        dict_mod_cfg = load_SUEWS_dict_ModConfig(path_runcontrol)
        df_state_init = init_supy(path_runcontrol, force_reload)

        # load setting variables from dict_mod_cfg
        (
            filecode,
            kdownzen,
            tstep_met_in,
            tstep_ESTM_in,
            multiplemetfiles,
            multipleestmfiles,
            dir_input_cfg,
        ) = (
            dict_mod_cfg[x]
            for x in [
                "filecode",
                "kdownzen",
                "resolutionfilesin",
                "resolutionfilesinestm",
                "multiplemetfiles",
                "multipleestmfiles",
                "fileinputpath",
            ]
        )
        tstep_mod, lat, lon, alt, timezone = df_state_init.loc[
            grid, [(x, "0") for x in ["tstep", "lat", "lng", "alt", "timezone"]]
        ].values

        path_site = path_runcontrol.parent
        path_input = path_site / dict_mod_cfg["fileinputpath"]

        # load raw data
        # met forcing
        df_forcing_met = load_SUEWS_Forcing_met_df_raw(
            path_input, filecode, grid, tstep_met_in, multiplemetfiles
        )

        # resample raw data from tstep_in to tstep_mod
        df_forcing_met_tstep = resample_forcing_met(
            df_forcing_met, tstep_met_in, tstep_mod, lat, lon, alt, timezone, kdownzen
        )

        # coerced precision here to prevent numerical errors inside Fortran
        df_forcing = df_forcing_met_tstep.round(10)

        # new columns for later use in main calculation
        df_forcing[["iy", "id", "it", "imin"]] = df_forcing[
            ["iy", "id", "it", "imin"]
        ].astype(np.int64)

    if check_input:
        try:
            list_issues = check_forcing(df_forcing)
            if isinstance(list_issues, list):
                logger_supy.critical(
                    f"`df_forcing` loaded from {path_input} is NOT valid to drive SuPy!"
                )
        except:
            sys.exit()

    return df_forcing


# load sample data for quickly starting a demo run
# TODO: to deprecate this by renaming for case consistency: load_SampleData-->load_sample_data
[docs]def load_SampleData() -> Tuple[pandas.DataFrame, pandas.DataFrame]:
    """Load sample data for quickly starting a demo run.

    Returns
    -------
    df_state_init, df_forcing: Tuple[pandas.DataFrame, pandas.DataFrame]
        - df_state_init: `initial model states <df_state_var>`
        - df_forcing: `forcing data <df_forcing_var>`

    Examples
    --------

    >>> df_state_init, df_forcing = supy.load_SampleData()

    """
    from ._env import trv_supy_module
    trv_SampleData = trv_supy_module / "sample_run"
    p=trv_SampleData.resolve()
    path_runcontrol = trv_SampleData / "RunControl.nml"
    df_state_init = init_supy(path_runcontrol, force_reload=False)
    df_forcing = load_forcing_grid(path_runcontrol, df_state_init.index[0])
    return df_state_init, df_forcing


# input processing code end here
##############################################################################


##############################################################################
# 2. compact wrapper for running a whole simulation
# # main calculation
# input as DataFrame
[docs]def run_supy(
    df_forcing: pandas.DataFrame,
    df_state_init: pandas.DataFrame,
    save_state=False,
    chunk_day=3660,
    logging_level=logging.INFO,
    check_input=False,
    serial_mode=False,
) -> Tuple[pandas.DataFrame, pandas.DataFrame]:
    """Perform supy simulation.

    Parameters
    ----------
    df_forcing : pandas.DataFrame
        forcing data for all grids in `df_state_init`.
    df_state_init : pandas.DataFrame
        initial model states;
        or a collection of model states with multiple timestamps, whose last temporal record will be used as the initial model states.
    save_state : bool, optional
        flag for saving model states at each time step, which can be useful in diagnosing model runtime performance or performing a restart run.
        (the default is False, which instructs supy not to save runtime model states).
    chunk_day : int, optional
        chunk size (`chunk_day` days) to split simulation periods so memory usage can be reduced.
        (the default is 3660, which implies ~10-year forcing chunks used in simulations).
    logging_level: logging level
        one of these values [50 (CRITICAL), 40 (ERROR), 30 (WARNING), 20 (INFO), 10 (DEBUG)].
        A lower value informs SuPy for more verbose logging info.
    check_input : bool, optional
        flag for checking validity of input: `df_forcing` and `df_state_init`.
        If set to `True`, any detected invalid input will stop SuPy simulation;
        a `False` flag will bypass such validation and may incur kernel error if any invalid input.
        *Note: such checking procedure may take some time if the input is large.*
        (the default is `False`, which bypasses the validation).
    serial_mode : bool, optional
        If set to `True`, SuPy simulation will be conducted in serial mode;
        a `False` flag will try parallel simulation if possible (Windows not supported, i.e., always serial).
        (the default is `False`).


    Returns
    -------
    df_output, df_state_final : Tuple[pandas.DataFrame, pandas.DataFrame]
        - df_output: `output results <df_output_var>`
        - df_state_final: `final model states <df_state_var>`

    Examples
    --------

    >>> df_output, df_state_final = supy.run_supy(df_forcing, df_state_init)


    """
    # validate input dataframes
    if check_input:
        # forcing:
        list_issues_forcing = check_forcing(df_forcing)
        if isinstance(list_issues_forcing, list):
            logger_supy.critical(f"`df_forcing` is NOT valid to drive SuPy!")
            raise RuntimeError(
                "SuPy stopped entering simulation due to invalid forcing!"
            )
        # initial model states:
        list_issues_state = check_state(df_state_init)
        if isinstance(list_issues_state, list):
            logger_supy.critical(f"`df_state_init` is NOT valid to initialise SuPy!")
            raise RuntimeError(
                "SuPy stopped entering simulation due to invalid initial states!"
            )

    # set up a timer for simulation time
    start = time.time()

    # adjust logging level
    logger_supy.setLevel(logging_level)

    # save df_init without changing its original data
    # df.copy() in pandas works as a standard python deepcopy
    # df_init = df_state_init.copy()

    # print some diagnostic info
    logger_supy.info(f"====================")
    logger_supy.info(f"Simulation period:")
    logger_supy.info(f"  Start: {df_forcing.index[0]}")
    logger_supy.info(f"  End: {df_forcing.index[-1]}")
    logger_supy.info("")
    list_grid = df_state_init.index.get_level_values("grid").unique()
    n_grid = list_grid.size
    logger_supy.info(f"No. of grids: {n_grid}")

    if n_grid > 1 and os.name != "nt" and (not serial_mode):
        logger_supy.info(f"SuPy is running in parallel mode")
        df_output, df_state_final = run_supy_par(
            df_forcing, df_state_init, save_state, chunk_day
        )
    else:
        logger_supy.info(f"SuPy is running in serial mode")
        df_output, df_state_final = run_supy_ser(
            df_forcing, df_state_init, save_state, chunk_day
        )

    # show simulation time
    end = time.time()
    logger_supy.info(f"Execution time: {(end - start):.1f} s")
    logger_supy.info(f"====================\n")

    return df_output, df_state_final


##############################################################################
# 3. save results of a supy run
[docs]def save_supy(
    df_output: pandas.DataFrame,
    df_state_final: pandas.DataFrame,
    freq_s: int = 3600,
    site: str = "",
    path_dir_save: str = Path("."),
    path_runcontrol: str = None,
    save_tstep=False,
    logging_level=50,
    output_level=1,
    debug=False,
) -> list:
    """Save SuPy run results to files

    Parameters
    ----------
    df_output : pandas.DataFrame
        DataFrame of output
    df_state_final : pandas.DataFrame
        DataFrame of final model states
    freq_s : int, optional
        Output frequency in seconds (the default is 3600, which indicates hourly output)
    site : str, optional
        Site identifier (the default is '', which indicates site identifier will be left empty)
    path_dir_save : str, optional
        Path to directory to saving the files (the default is Path('.'), which indicates the current working directory)
    path_runcontrol : str, optional
        Path to SUEWS :ref:`RunControl.nml <suews:RunControl.nml>`, which, if set, will be preferably used to derive `freq_s`, `site` and `path_dir_save`.
        (the default is None, which is unset)
    save_tstep : bool, optional
        whether to save results in temporal resolution as in simulation (which may result very large files and slow progress), by default False.
    logging_level: logging level
        one of these values [50 (CRITICAL), 40 (ERROR), 30 (WARNING), 20 (INFO), 10 (DEBUG)].
        A lower value informs SuPy for more verbose logging info.
    output_level : integer, optional
        option to determine selection of output variables, by default 1.
        Notes: 0 for all but snow-related; 1 for all; 2 for a minimal set without land cover specific information.
    debug : bool, optional
        whether to enable debug mode (e.g., writing out in serial mode, and other debug uses), by default False.


    Returns
    -------
    list
        a list of paths of saved files

    Examples
    --------
    1. save results of a supy run to the current working directory with default settings

    >>> list_path_save = supy.save_supy(df_output, df_state_final)


    2. save results according to settings in :ref:`RunControl.nml <suews:RunControl.nml>`

    >>> list_path_save = supy.save_supy(df_output, df_state_final, path_runcontrol='path/to/RunControl.nml')


    3. save results of a supy run at resampling frequency of 1800 s (i.e., half-hourly results) under the site code ``Test`` to a customised location 'path/to/some/dir'

    >>> list_path_save = supy.save_supy(df_output, df_state_final, freq_s=1800, site='Test', path_dir_save='path/to/some/dir')
    """
    # adjust logging level
    logger_supy.setLevel(logging_level)

    # get necessary information for saving procedure
    if path_runcontrol is not None:
        freq_s, path_dir_save, site, save_tstep, output_level = get_save_info(
            path_runcontrol
        )

    # determine `save_snow` option
    snowuse = df_state_final.iloc[-1].loc["snowuse"].values.item()
    save_snow = True if snowuse == 1 else False

    # check if directory for saving results exists; if not, create one.
    path_dir_save = Path(path_dir_save)
    if not path_dir_save.exists():
        path_dir_save.mkdir(parents=True)

    # save df_output to several files
    list_path_save = save_df_output(
        df_output,
        freq_s,
        site,
        path_dir_save,
        save_tstep,
        output_level,
        save_snow,
        debug,
    )

    # save df_state
    if path_runcontrol is not None:
        # save as nml as SUEWS binary
        list_path_nml = save_initcond_nml(df_state_final, site, path_dir_save)
        list_path_save = list_path_save + list_path_nml
    else:
        # save as supy csv for later use
        path_state_save = save_df_state(df_state_final, site, path_dir_save)
        # update list_path_save
        list_path_save.append(path_state_save)

    return list_path_save