Source code for farmingpy.apsim.optimizer

"""
Optimizers for APSIM simulation models.
"""

import numpy as np
import pandas as pd
import nlopt
class OptimizerBase(object):

    def __init__(self, model, params, obs_yield, obs_lai, harvest_date, zone_names = None, multithread = True):

        obs_yield.rename(str.lower, axis=1, inplace=True)
        obs_lai.rename(str.lower, axis=1, inplace=True)

        obs_lai.columns = obs_lai.columns.str.replace(r".*lai.*", "obs_lai", regex=True)
        obs_yield.columns = obs_yield.columns.str.replace(r".*yield.*", "obs_yield", regex=True)

        if zone_names is not None:
            if type(zone_names) == str:
                zone_names = [zone_names]
            obs_yield = obs_yield[obs_yield.zone.apply(lambda x : x in zone_names)]
            obs_lai = obs_lai[obs_lai.zone.apply(lambda x : x in zone_names)]

        self.zone_names = zone_names
        self.model = model
        self.obs_lai = obs_lai
        self.obs_yield = obs_yield
        self.harvest_date = harvest_date
        self.multithread = multithread

        self.optvars = sorted(params.keys())
        self.params = params.copy()
        self.N = len(self.optvars)

        self._iter = 0
        self.print_interval = 10
        self.log_level = 1

    def _print(self, clai, cyield, charvest, cost):
        if (self._iter % self.print_interval) == 0:
            print(f"\tIteration {self._iter}, cost: LAI {clai:.3f} Yield {cyield:.3f}, Harvest {charvest:.3f}, Total {cost:.3f}")

    def _merge_sim_obs(self):
        df = self.model.results.copy()
        df.rename(str.lower, axis=1, inplace=True)
        df["sim_lai"] = df["lai"]
        sim_lai = df[["clocktoday", "zone", "sim_lai"]].copy()
        sim_lai["clocktoday"] = sim_lai.clocktoday.dt.floor("D")
        sim_lai = self.obs_lai.merge(sim_lai, how="left")

        sim_yield = df[["zone", "yield", "lai"]].groupby("zone", as_index=False).agg(np.max)
        sim_yield.rename({"yield" : "sim_yield", "lai" : "sim_lai"}, axis=1, inplace=True)
        sim_yield =  sim_yield.merge(self.obs_yield)

        htime = self.model.harvest_date
        #print(htime)
        if htime is None or htime.shape[0] == 0:
            print("Out of range")
            harvest_time = df["clocktoday"].max()
        else:
            harvest_time = htime.iloc[0]["ClockToday"]
        return sim_yield, sim_lai, harvest_time

    def _calculate_cost(self, p, grad = []):
        self._step(p, grad)
        self._run()
        self._iter += 1
        sim_yield, lai_df, harvest_time = self._merge_sim_obs()
        # Calculate errors
        e_lai = (lai_df.sim_lai - lai_df.obs_lai)
        e_yield = (sim_yield.sim_yield - sim_yield.obs_yield)
        e_harvest = (harvest_time - self.harvest_date).total_seconds() /(24*60*60)
        # Apply cost function
        return self._scaled_lsq(e_lai, e_yield, e_harvest)

    def _scaled_lsq(self, e_lai, e_yield, e_harvest):
        cost_lai = np.mean((e_lai)**2)/np.max(self.obs_lai["obs_lai"])
        cost_yield = np.mean(np.abs(e_yield))/np.max(self.obs_yield["obs_yield"].max())
        cost_harvest = abs(e_harvest) / 7 # Error or 1 week in same scale with other variables
        total_cost =  cost_lai + cost_yield + cost_harvest
        if self.log_level > 0:
            self._print(cost_lai, cost_yield, cost_harvest, total_cost)
        return total_cost

    def _rand_start(self, r):
        l, h = r
        vs = np.linspace(l,h, 10)
        idx = np.random.randint(0, len(vs))
        return vs[idx]

    @property
    def optimized_data(self):
        return self._merge_sim_obs()

    @property
    def optimized_parameters(self):
        return pd.DataFrame({self.optvars[i] :self.opt_values[i] for i in range(self.N)}, index=[0])


    def _print_optimized(self):
        pass

    def _step(self, p, grad=[]):
        pass

    def _run(self):
        self.model.run(self.zone_names, clean=False, multithread = self.multithread)

    def optimize(self, alg = nlopt.GN_DIRECT_L, maxeval = 5):
        """Run the optimizer

        Parameters
        ----------
        alg, optional
            nlopt algorithm to use, by default nlopt.GN_DIRECT_L
        maxeval, optional
            Maximum number of iterations, by default 5
        """
        if self.log_level > 0:
            print(f"Optimizing {self.N} parameters, max {maxeval} iterations")
        opt = nlopt.opt(alg, self.N)
        opt.set_min_objective(self._calculate_cost)

        opt.set_lower_bounds([self.params[v][0] for v in self.optvars])
        opt.set_upper_bounds([self.params[v][1] for v in self.optvars])

        opt.set_maxeval(maxeval)
        init = [self._rand_start(self.params[v]) for v in self.optvars]

        self.opt = opt
        self.opt_values = opt.optimize(init)

        self._step(self.opt_values)
        if self.log_level > 0:
            print(f"Done after {self._iter} iterations")
            self._print_optimized()


[docs] class SoilOptimizer(OptimizerBase): """Optimize soil paramters."""
[docs] def __init__(self, model, params, obs_yield, obs_lai, harvest_date, zone_names=None, multithread=True): """Optimize model parameters Parameters ---------- model APSIMX object params Dictionary of soil parameters to optimize with allowed ranges. The range indicates difference to original values. e.g. ``{"ll15" : (-0.01, 0.05), "dul" : (0.0, -0.05)}``. Supported parameters are: ``ll15, dul, no3, nh4, urea``. The same change is applied to all soil layers. obs_yield Dataframe with observed yield for each zone. obs_lai Dataframe with observed LAI for each zone. harvest_date Harvest date as datetime zone_names, optional Name of zones to optimize. multithread, optional Allow APSIM to use multiple threads, by default True """ super(SoilOptimizer, self).__init__(model, params, obs_yield, obs_lai, harvest_date, zone_names, multithread) #Save starting values self.ll15 = self.model.get_ll15().copy() self.dul = self.model.get_dul().copy() self.sat = self.model.get_sat().copy() self.cll = self.model.get_crop_ll().copy() self.initial_no3 = self.model.get_initial_no3().copy() self.initial_nh4 = self.model.get_initial_nh4().copy() self.initial_urea = self.model.get_initial_urea().copy()
def _print_optimized(self): print(str.join(", ", [f"{self.optvars[i]} = {self.opt_values[i]:.2f}" for i in range(self.N)])) def _step(self, p, grad=[]): #for zone in self.zone_names: zones = self.zone_names for i in range(self.N): v = self.optvars[i] if v == "ll15": self.model.set_ll15(self.ll15 + p[i], zones) if v == "dul": new_dul = self.dul + p[i] self.model.set_dul(new_dul, zones) self.model.set_sat(self.sat + p[i], zones) if v.lower() == "no3": self.model.set_initial_no3(self.initial_no3 + p[i]) if v.lower() == "nh4": self.model.set_initial_nh4(self.initial_nh4 + p[i]) if v.lower() == "urea": self.model.set_initial_urea(self.initial_urea + p[i])
[docs] class PhenologyOptimizer(OptimizerBase): """Optimize cultivar parameters."""
[docs] def __init__(self, model, params, obs_yield, obs_lai, harvest_date, zone_names=None, multithread=True): """Optimize model parameters Parameters ---------- model APSIMX object params Dictionary of cultivar parameters to optimize with allowed ranges. obs_yield Dataframe with observed yield for each zone. obs_lai Dataframe with observed LAI for each zone. harvest_date Harvest date as datetime zone_names, optional Name of zones to optimize. multithread, optional Allow APSIM to use multiple threads, by default True """ super(PhenologyOptimizer, self).__init__(model, params, obs_yield, obs_lai, harvest_date, zone_names, multithread)
def _step(self, p, grad=[]): c = {self.optvars[i] : p[i] for i in range(self.N)} self.model.update_cultivar(c, self.zone_names) def _print_optimized(self): print(str.join("\n", [f"{self.optvars[i]} = {self.opt_values[i]:.2f}" for i in range(self.N)]))