Source code for irlc.ex02.dp_model

# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
import numpy as np

[docs]class DPModel: """ Code for the base DP model class. The lecture notes contains further discussino about the distinction of models/environments/agents which I recommend reading first. Briefly, the role of the DP model is to define (in details) the basic N-step decision problem corresponding to the equations: x_{k+1} = f_k(x_k, u_k, w_k) cost = g_k(x_k, u_k, w_k). terminal cost = g_N(x_N) The above corresponds to the first 3 methods below. The rest of the functions specify the available states and actions (see definition of the basic problem in the lecture notes). """
[docs] def __init__(self, N): self.N = N
def f(self, x, u, w, k): raise NotImplementedError("Return f_k(x,u,w)") def g(self, x, u, w, k): raise NotImplementedError("Return g_k(x,u,w)") def gN(self, x): raise NotImplementedError("Return g_N(x)") def S(self, k): raise NotImplementedError("Return state space as set S_k = {x_1, x_2, ...}") def A(self, x, k): raise NotImplementedError("Return action space as set A(x_k) = {u_1, u_2, ...}") def Pw(self, x, u, k): """ At step k, given x_k, u_k, compute the set of random noise disturbances w and their probabilities as a dict {..., w_i: pw_i, ...} such that pw_i = P_k(w_i | x, u) """ return {'w_dummy': 1/3, 42: 2/3} # P(w_k="w_dummy") = 1/3, P(w_k =42)=2/3. def w_rnd(self, x, u, k): """ generate random disturbances w ~ P_k(x, u) (useful for simulation) """ pW = self.Pw(x, u, k) w, pw = zip(*pW.items()) # seperate w and p(w) return np.random.choice(a=w, p=pw)