Source code for pygypsy.forward_simulation

#pylint: disable=invalid-name
"""Simulation"""
# TODO: change module/funct name - 'forward' is not necessary or strictly true
from __future__ import division
import logging
import datetime
from collections import defaultdict

import pandas as pd

from pygypsy.utils import _log_loop_progress, estimate_species_composition
from pygypsy.basal_area_factor import get_basal_area_factors_for_all_species
from pygypsy.basal_area_simulate import (
    sim_basal_area_aw,
    sim_basal_area_sw,
    sim_basal_area_sb,
    sim_basal_area_pl,
)
from pygypsy.volume import(
    merchantable_volume,
    gross_total_volume,
    DEFAULT_UTILIZATIONS
)
from pygypsy.density import (
    estimate_density_aw,
    estimate_density_pl,
    estimate_density_sb,
    estimate_density_sw,
)
from pygypsy.asaCompileAgeGivenSpSiHt \
    import ComputeGypsyTreeHeightGivenSiteIndexAndTotalAge


LOGGER = logging.getLogger(__name__)
SPECIES = ('Aw', 'Sw', 'Sb', 'Pl')


def _get_initial_basal_area(current_basal_area):
    initial_basal_area = 0.001

    if initial_basal_area > current_basal_area * 0.5:
        initial_basal_area /= 2

    return initial_basal_area


# note: the complexity from basal area/factor finder simulation could be moved
# here. simulation supports using the values from the year of observation for
# all years before the year of observation. that could be enforced here, or
# with an intermediate function. it would be key to get that complexity out of
# the basal area simulation functions to vectorize them
# TODO: this could also be vectorized instead of in a loop
# DESIGN: could use callbacks here instead of calling each of the functions
# DESIGN: should the responsibility to return 0 be in the called functions or
#         in if clauses here?
[docs]def simulate_densities_speciescomp_topheight(n_years=250, start_at_data_year=False, **kwargs): '''Estimate, species composition, top height for all species along time Time is counted independently for each species. :param in duration: :param float startTage: It uses the oldest species as a reference to become the stand age :param float startTageAw, startTageSw, startTageSb, and startTagePl: age for respective species :param float SDF_Pl0: Stand Density Factor of species Pl :param float SDF_Aw0: Stand Density Factor of species Aw :param float SDF_Sb0: Stand Density Factor of species Sb :param float SDF_Sw0: Stand Density Factor of species Sw :param float SI_bh_Sw: site index of species Sw :param float SI_bh_Aw: site index of species Aw :param float SI_bh_Sb: site index of species Sb :param float SI_bh_Pl: site index of species Pl :param float y2bh_Aw: time elapseed in years from zero to breast height age of sp Aw :param float y2bh_Sw: time elapseed in years from zero to breast height age of sp Sw :param float y2bh_Sb: time elapseed in years from zero to breast height age of sp Sb :param float y2bh_Pl: time elapseed in years from zero to breast height age of sp Pl ''' starttage = kwargs['startTage'] starttageaw = kwargs['startTageAw'] y2bh_aw = kwargs['y2bh_Aw'] starttagesw = kwargs['startTageSw'] y2bh_sw = kwargs['y2bh_Sw'] starttagesb = kwargs['startTageSb'] y2bh_sb = kwargs['y2bh_Sb'] starttagepl = kwargs['startTagePl'] y2bh_pl = kwargs['y2bh_Pl'] sdf_aw0 = kwargs['SDF_Aw0'] sdf_sw0 = kwargs['SDF_Sw0'] sdf_pl0 = kwargs['SDF_Pl0'] sdf_sb0 = kwargs['SDF_Sb0'] si_bh_aw = kwargs['SI_bh_Aw'] si_bh_sw = kwargs['SI_bh_Sw'] si_bh_sb = kwargs['SI_bh_Sb'] si_bh_pl = kwargs['SI_bh_Pl'] densities_along_time = [] end_year = n_years if not start_at_data_year else n_years + starttage year = 0 if not start_at_data_year else starttage tage_aw = starttageaw - starttage if not start_at_data_year else starttageaw tage_sw = starttagesw - starttage if not start_at_data_year else starttagesw tage_pl = starttagepl - starttage if not start_at_data_year else starttagepl tage_sb = starttagesb - starttage if not start_at_data_year else starttagesb while year < end_year: bhage_aw = tage_aw - y2bh_aw bhage_sw = tage_sw - y2bh_sw bhage_pl = tage_pl - y2bh_pl bhage_sb = tage_sb - y2bh_sb if bhage_aw < 0: n_bh_awt = 0 else: # bhage instead of tage, as specified by the GYPSY model itself n_bh_awt = estimate_density_aw(sdf_aw0, bhage_aw, si_bh_aw) if tage_sb < 0: n_bh_sbt = 0 else: n_bh_sbt = estimate_density_sb(sdf_sb0, tage_sb, si_bh_sb) if tage_sw < 0: n_bh_swt = 0 else: n_bh_swt = estimate_density_sw(sdf_sw0, sdf_aw0, tage_sw, si_bh_sw) if tage_pl < 0: n_bh_plt = 0 else: n_bh_plt = estimate_density_pl(sdf_aw0, sdf_sw0, sdf_sb0, sdf_pl0, tage_pl, si_bh_pl) if n_bh_awt <= 0: topheight_aw = 0 else: topheight_aw = ComputeGypsyTreeHeightGivenSiteIndexAndTotalAge( 'Aw', si_bh_aw, tage_aw ) if n_bh_sbt <= 0: topheight_sb = 0 else: topheight_sb = ComputeGypsyTreeHeightGivenSiteIndexAndTotalAge( 'Sb', si_bh_sb, tage_sb ) if n_bh_swt <= 0: topheight_sw = 0 else: topheight_sw = ComputeGypsyTreeHeightGivenSiteIndexAndTotalAge( 'Sw', si_bh_sw, tage_sw ) if n_bh_plt <= 0: topheight_pl = 0 else: topheight_pl = ComputeGypsyTreeHeightGivenSiteIndexAndTotalAge( 'Pl', si_bh_pl, tage_pl ) sc_aw, sc_sw, sc_sb, sc_pl = estimate_species_composition( n_bh_awt, n_bh_sbt, n_bh_swt, n_bh_plt ) densities_along_time.append({ 'N_bh_AwT': n_bh_awt, 'N_bh_SwT': n_bh_swt, 'N_bh_SbT': n_bh_sbt, 'N_bh_PlT': n_bh_plt, 'SC_Aw': sc_aw, 'SC_Sw': sc_sw, 'SC_Sb':sc_sb, 'SC_Pl': sc_pl, 'tage_Aw': tage_aw, 'tage_Sw': tage_sw, 'tage_Sb': tage_sb, 'tage_Pl': tage_pl, 'bhage_Aw': bhage_aw, 'bhage_Sw': bhage_sw, 'bhage_Sb': bhage_sb, 'bhage_Pl': bhage_pl, 'topHeight_Aw': topheight_aw, 'topHeight_Sw': topheight_sw, 'topHeight_Sb': topheight_sb, 'topHeight_Pl': topheight_pl }) year += 1 tage_aw += 1 tage_sw += 1 tage_pl += 1 tage_sb += 1 return densities_along_time
def _simulate_row(row, utiliz_params=None, n_years=250, backwards=True, year_of_data_acquisition=0): """ Simulate a single plot :param row: row of pandas data frame :param utliz_params: dictionary of utilization parameters :param int n_years: number of years to simulate :param bool backwards: whether the simulation from year zero to the year of the data should be done :param year_of_data_acquisition: year in which data was acquired, used to set year index .. note: access a single row like df.ix[row, ] """ if utiliz_params is None: utiliz_params = DEFAULT_UTILIZATIONS SI_bh_Aw = row.at['SI_Aw'] SI_bh_Sw = row.at['SI_Sw'] SI_bh_Pl = row.at['SI_Pl'] SI_bh_Sb = row.at['SI_Sb'] N_bh_AwT = row.at['N_Aw'] N_bh_SwT = row.at['N_Sw'] N_bh_PlT = row.at['N_Pl'] N_bh_SbT = row.at['N_Sb'] y2bh_Aw = row.at['y2bh_Aw'] y2bh_Sw = row.at['y2bh_Sw'] y2bh_Sb = row.at['y2bh_Sb'] y2bh_Pl = row.at['y2bh_Pl'] tage_AwT = row.at['tage_Aw'] tage_SwT = row.at['tage_Sw'] tage_PlT = row.at['tage_Pl'] tage_SbT = row.at['tage_Sb'] BA_AwT = row.at['BA_Aw'] BA_SwT = row.at['BA_Sw'] BA_PlT = row.at['BA_Pl'] BA_SbT = row.at['BA_Sb'] SDF_Aw0 = row.at['SDF_Aw'] SDF_Sw0 = row.at['SDF_Sw'] SDF_Pl0 = row.at['SDF_Pl'] SDF_Sb0 = row.at['SDF_Sb'] N0_Aw = row.at['N0_Aw'] N0_Sw = row.at['N0_Sw'] N0_Pl = row.at['N0_Pl'] N0_Sb = row.at['N0_Sb'] BA_Aw0 = _get_initial_basal_area(BA_AwT) BA_Sw0 = _get_initial_basal_area(BA_SwT) BA_Sb0 = _get_initial_basal_area(BA_SbT) BA_Pl0 = _get_initial_basal_area(BA_PlT) SC_Aw, SC_Sw, SC_Sb, SC_Pl = estimate_species_composition( N0_Aw, N0_Sb, N0_Sw, N0_Pl ) tageData = [tage_AwT, tage_SwT, tage_PlT, tage_SbT] startTageAw = tageData[0] startTageSw = tageData[1] startTagePl = tageData[2] startTageSb = tageData[3] tageData = sorted(tageData, reverse=True) startTage = int(tageData[0]) densities = simulate_densities_speciescomp_topheight( n_years=n_years, start_at_data_year=False if backwards else True, startTage=startTage, startTageAw=startTageAw, y2bh_Aw=y2bh_Aw, startTageSw=startTageSw, y2bh_Sw=y2bh_Sw, startTageSb=startTageSb, y2bh_Sb=y2bh_Sb, startTagePl=startTagePl, y2bh_Pl=y2bh_Pl, SDF_Aw0=SDF_Aw0, SDF_Sw0=SDF_Sw0, SDF_Pl0=SDF_Pl0, SDF_Sb0=SDF_Sb0, SI_bh_Aw=SI_bh_Aw, SI_bh_Sw=SI_bh_Sw, SI_bh_Sb=SI_bh_Sb, SI_bh_Pl=SI_bh_Pl ) species_factors = defaultdict(lambda: 1) if not backwards else \ get_basal_area_factors_for_all_species( startTage=startTage, startTageAw=startTageAw, startTageSb=startTageSb, startTageSw=startTageSw, startTagePl=startTagePl, y2bh_Aw=y2bh_Aw, y2bh_Sb=y2bh_Sb, y2bh_Sw=y2bh_Sw, y2bh_Pl=y2bh_Pl, SC_Aw=SC_Aw, SC_Sb=SC_Sb, SC_Sw=SC_Sw, SC_Pl=SC_Pl, SI_bh_Aw=SI_bh_Aw, SI_bh_Sb=SI_bh_Sb, SI_bh_Sw=SI_bh_Sw, SI_bh_Pl=SI_bh_Pl, N_bh_AwT=N_bh_AwT, N_bh_SbT=N_bh_SbT, N_bh_SwT=N_bh_SwT, N_bh_PlT=N_bh_PlT, N0_Aw=N0_Aw, N0_Sb=N0_Sb, N0_Sw=N0_Sw, N0_Pl=N0_Pl, BA_Aw0=BA_Aw0, BA_Sb0=BA_Sb0, BA_Sw0=BA_Sw0, BA_Pl0=BA_Pl0, BA_AwT=BA_AwT, BA_SbT=BA_SbT, BA_SwT=BA_SwT, BA_PlT=BA_PlT, SDF_Pl0=SDF_Pl0, SDF_Sb0=SDF_Sb0, SDF_Aw0=SDF_Aw0, SDF_Sw0=SDF_Sw0, densities=densities ) # use_correction_factor_future here is True because aspen was peculiar, # empirically demonstrated that using the factor for the whole # simulation yielded better results, probably because of variability in # density and basal area unique to aspen basal_area_aw_arr = sim_basal_area_aw( startTage, SI_bh_Aw, N0_Aw, BA_Aw0, SDF_Aw0, species_factors['f_Aw'], densities, use_correction_factor_future=True if backwards else False, stop_at_initial_age=False, force_use_densities=False if backwards else True ) basal_area_sb_arr = sim_basal_area_sb( startTage, SI_bh_Sb, N0_Sb, BA_Sb0, species_factors['f_Sb'], densities, use_correction_factor_future=False, stop_at_initial_age=False, fix_proportion_and_density_to_initial_age=False, species_proportion_at_bh_age=SC_Sb, present_density=N_bh_SbT, force_use_densities=False if backwards else True ) basal_area_sw_arr = sim_basal_area_sw( startTage, SI_bh_Sw, N0_Sw, SDF_Aw0, SDF_Pl0, SDF_Sb0, BA_Sw0, species_factors['f_Sw'], densities, use_correction_factor_future=False, stop_at_initial_age=False, fix_proportion_and_density_to_initial_age=False, species_proportion_at_bh_age=SC_Sw, present_density=N_bh_SwT, force_use_densities=False if backwards else True ) basal_area_pl_arr = sim_basal_area_pl( startTage, SI_bh_Pl, N0_Pl, SDF_Aw0, SDF_Sw0, SDF_Sb0, BA_Pl0, species_factors['f_Pl'], densities, use_correction_factor_future=False, stop_at_initial_age=False, fix_proportion_and_density_to_initial_age=False, species_proportion_at_bh_age=SC_Pl, present_density=N_bh_PlT, force_use_densities=False if backwards else True ) output_df_aw = pd.DataFrame(basal_area_aw_arr, columns=['BA_Aw']) output_df_sw = pd.DataFrame(basal_area_sw_arr, columns=['BA_Sw']) output_df_sb = pd.DataFrame(basal_area_sb_arr, columns=['BA_Sb']) output_df_pl = pd.DataFrame(basal_area_pl_arr, columns=['BA_Pl']) densities_df = pd.DataFrame(densities) output_df = pd.concat( [densities_df, output_df_aw, output_df_sw, output_df_sb, output_df_pl], axis=1 ) for spec in SPECIES: output_df['Gross_Total_Volume_%s' % spec] = gross_total_volume( spec, output_df['BA_%s' % spec], output_df['topHeight_%s' % spec] ) output_df['MerchantableVolume%s' % spec] = merchantable_volume( spec, output_df['N_bh_%sT' % spec], output_df['BA_%s' % spec], output_df['topHeight_%s' % spec], output_df['Gross_Total_Volume_%s' % spec], top_dib=utiliz_params[spec.lower()]['topDiamInsideBark'], stump_dob=utiliz_params[spec.lower()]['stumpDiamOutsideBark'], stump_height=utiliz_params[spec.lower()]['stumpHeight'] ) output_df['Gross_Total_Volume_Con'] = output_df['Gross_Total_Volume_Sw'] \ + output_df['Gross_Total_Volume_Sb'] \ + output_df['Gross_Total_Volume_Pl'] output_df['Gross_Total_Volume_Dec'] = output_df['Gross_Total_Volume_Aw'] output_df['Gross_Total_Volume_Tot'] = output_df['Gross_Total_Volume_Con'] \ + output_df['Gross_Total_Volume_Dec'] output_df['MerchantableVolume_Con'] = output_df['MerchantableVolumeSw'] \ + output_df['MerchantableVolumeSb'] \ + output_df['MerchantableVolumePl'] output_df['MerchantableVolume_Dec'] = output_df['MerchantableVolumeAw'] output_df['MerchantableVolume_Tot'] = output_df['MerchantableVolume_Con'] \ + output_df['MerchantableVolume_Dec'] output_df.reset_index(inplace=True) output_df = output_df.rename(columns={'index': 'year'}) output_df['year'] = output_df['year'] + year_of_data_acquisition + 1 output_df['id_l1'] = int(row['id_l1']) return output_df
[docs]def simulate_forwards_df(plot_df, **kwargs): """Simulate the evolution of plot characteristics through time This begins with the simulation of densities, species, and top height. With the time series of those items in hand, the simulation from time zero (primary succession) to the time of observation can be skipped. If it is not skipped, an optimization routine is used to find factors for each species. The factors are multiplied with a parameter of the basal area increment to ensure that the simulation time timer zero to the time of observation passes through a basal area of 0 and the observed basal area. Between time zero and the time of observation, fixed values may optionally be used for density, species compositon. This optimization is expensive. Once the simulation reaches the year of observation, the simulated densities, species composition, and top heights are used in the increment functions, instead of using fixed values. :param plot_df: pandas.DataFrame with plot data :param kwargs: keyword arguments to func:`_simulate_row` :return: multi-index data frame with plot id and year as index and values of the simulated plot characteristics .. warning:: the backwards parameter must be set to True in order to utilize the correction factors, which ensure the data passes through 0 and the observation. without the backwards simulation, this is already ensured. However, for Aspen, the factor found is also used for the forward simulation """ output_dict = {} n_rows = plot_df.shape[0] for _, row in plot_df.iterrows(): start = datetime.datetime.now() _log_loop_progress(_, n_rows) plot_id = str(int(row['id_l1'])) LOGGER.debug('Starting simulation for plot: %s', plot_id) try: output_df = _simulate_row(row, **kwargs) output_dict[plot_id] = output_df except Exception as err: #pylint: disable=broad-except LOGGER.critical('Unhandled error: %s', err.message) end = datetime.datetime.now() duration = end - start LOGGER.debug('plot %s took %f seconds', plot_id, duration.total_seconds()) complete_output_df = pd.concat(output_dict.values(), copy=False) complete_output_df.set_index(['id_l1', 'year'], inplace=True) return complete_output_df