Source code for pomdp_py.algorithms.bsp.blqr

"""Implementation of B-LQR algorithm described in "Belief space planning
assuming maximum likelihood observations" :cite:`platt2010belief`
"""

import pomdp_py
import numpy as np
from scipy import optimize



[docs]
class BLQR(pomdp_py.Planner):
    def __init__(
        self,
        func_sysd,
        func_obs,
        jac_sysd,
        jac_obs,
        jac_sysd_u,
        noise_obs,
        noise_sysd,
        Qlarge,
        L,
        Q,
        R,
        planning_horizon=15,
    ):
        """To initialize the planenr of B-LQR, one needs to supply parameters
        that describe the underlying linear gaussian system, and also
        matrices that describe the cost function. Note that math symbols
        (e.g. xt, ut) are vectors or matrices represented as np.arrays.
        The B-LQR cost function (Eq.14 in the paper):

        :math:`J(b_{t:T},u_{t:T}) = \bar{m}_T^TQ_{large}\bar{m}_T+\bar{s}_T^T\Lambda\bar{s}_T + \sum_{k=t}^{T-1}\bar{m}_k^TQ\bar{m}_k+\bar{u}_t^TR\bar{u}_t`

        Args:
            func_sysd (function): f: (xt, ut) -> xt+1
            func_obs (function): g: (xt) -> zt
            jac_sysd (function): dfdx: (mt, ut) -> At
            jac_obs (function): dgdx: (mt) -> Ct
            jac_sysd_u (function): dfdu (mt, ut) -> Bt
            noise_obs (function): (xt) -> pomdp_py.Gaussian. Potentially
                state-dependent observation noise
            noise_sysd (function): (xt) -> pomdp_py.Gaussian. Potentially
                state-dependent system dynamics noise
            Qlarge (np.array) matrix of shape :math:`d \times d` where :math:`d`
                is the dimensionality of the state vector.
            L (np.array) matrix of shape :math:`d \times d` where :math:`d`
                is the dimensionality of the state vector.
                (Same as :math:`\Lambda` in the equation)
            Q (np.array) matrix of shape :math:`d \times d` where :math:`d`
                is the dimensionality of the state vector.
            R (np.array) matrix of shape :math:`c \times c` where :math:`c`
                is the dimensionality of the control vector.
            planning_horizon (int): This is the :math:`T`, the planning horizon.

        """
        self._func_sysd = func_sysd
        self._func_obs = func_obs
        self._jac_sysd = jac_sysd
        self._jac_obs = jac_obs
        self._jac_sysd = jac_sysd_u
        self._noise_obs = noise_obs
        self._noise_sysd = noise_sysd
        self._Qlarge = Qlarge
        self._L = L  # Lambda
        self._Q = Q
        self._R = R
        self._planning_horizon = planning_horizon
        self._dim_state = self._Q.shape[0]
        self._dim_control = self._R.shape[0]


[docs]
    def ekf_update_mlo(self, bt, ut):
        """
        Performs the ekf belief update assuming maximum likelihood observation.
        This follows equations 12, 13 in the paper. It's the function :math:`F`.

        Args:
            bt (tuple): a belief point bt = (mt, Cov_t) representing the belief state.
                where mt is np.array of shape (d,) and Cov_t is np.array of shape (d,d).
                The cost function equation needs to turn Cov_t into a long vector consist
                of the columns of Cov_t stiched together.
            ut (np.array): control vector
            noise_t (pomdp_py.Gaussian): The Gaussian noise with "possibly state-dependent"
                covariance matrix Wt.
            noise_sysd (np.array): A noise term (e.g. Gaussian noise) added to the
                system dynamics (:math:`v` in Eq.12). This array should have the sam
                dimensionality as mt.
            noise_obs_cov (np.array): The covariance matrix of the Gaussian noise of the
                observation function. This corresponds to Wt in equation 13.
        """
        # TODO: FIX
        mt, Cov_t = bt
        At = self._jac_sysd(mt, ut)
        Ct = self._jac_obs(mt)  # based on maximum likelihood observation
        Wt = self._noise_obs(mt).cov
        Gat = np.dot(np.dot(At, Cov_t), At.transpose())  # Gamma_t = At*Cov_t*At^T
        CGC_W_inv = np.linalg.inv(np.dot(np.dot(Ct, Gat), Ct.transpose()) + Wt)
        Cov_next = Gat - np.dot(
            np.dot(np.dot(np.dot(Gat, Ct.transpose()), CGC_W_inv), Ct), Gat
        )
        m_next = self._func_sysd(mt, ut) + self._noise_sysd(mt).random()
        return (m_next, Cov_next)


    def _b_u_seg(self, x, i):
        """Returns the elements of x that corresponds to the belief and controls,
        as a tuple (m_i, Covi, u_i)"""
        d = self._dim_state
        c = self._dim_control
        start = i * (d + (d * d) + c)
        end = (i + 1) * (d + (d * d) + c)
        m_i_end = i * (d + (d * d) + c) + d
        Covi_end = i * (d + (d * d) + c) + d + d * d
        u_end = i * (d + (d * d) + c) + d + d * d + c
        return x[start:m_i_end], x[m_i_end:Covi_end], x[Covi_end:u_end]

    def _control_max_constraint(self, x, i, max_val):
        _, _, u_i = self._b_u_seg(x, i)
        return max_val - u_i

    def _control_min_constraint(self, x, i, min_val):
        _, _, u_i = self._b_u_seg(x, i)
        return u_i - min_val

    def _mean_final_constraint(self, x, m_des, num_segments):
        m_k, _, _ = self._b_u_seg(x, num_segments - 1)
        return m_k - m_des

    def _mean_start_constraint(self, x, m_0):
        m_k, _, _ = self._b_u_seg(x, 0)
        return m_k - m_0

    def _belief_constraint(self, x, i, num_segments):
        m_i, s_i, u_i = self._b_u_seg(x, i)
        Cov_i = s_i.reshape(self._dim_state, self._dim_state).transpose()
        m_ip1, Cov_ip1 = self.integrate_belief_segment((m_i, Cov_i), u_i, num_segments)
        s_ip1 = Cov_ip1.transpose().reshape(
            -1,
        )
        b_i_vec = np.append(m_i, s_i)
        b_ip1_vec = np.append(m_ip1, s_ip1)
        return b_i_vec - b_ip1_vec


[docs]
    def integrate_belief_segment(self, b_i, u_i, num_segments):
        """This is to represent equation 18.

        phi(b_i', u_i') = F(b_i', u_i') +   sum    F(b_{t+1}, u_i) - F(b_t, u_i)
                                          t in seg

        This essentially returns b_{i+1}'
        """
        m_i, Cov_i = b_i
        b_seg = [self.ekf_update_mlo((m_i, Cov_i), u_i)]
        for t in range(1, self._planning_horizon // num_segments):
            b_seg.append(self.ekf_update_mlo(b_seg[t - 1], u_i))
        sum_mean = b_seg[0][0]
        sum_Cov = b_seg[0][1]
        for t in range(len(b_seg)):
            if t + 1 < len(b_seg) - 1:
                sum_mean += b_seg[t + 1][0] - b_seg[t][0]
                sum_Cov += b_seg[t + 1][1] - b_seg[t][1]
        return sum_mean, sum_Cov


    def _opt_cost_func_seg(self, x, b_des, u_des, num_segments):
        """This will be used as part of the objective function for scipy
        optimizer. Therefore we only take in one input vector, x.  We require
        the structure of x to be:

        [ m1 | Cov 1 | u1 ] ... [ m_i | Cov i | u_i ]

        Use the _b_u_seg function to obtain the individual parts.
        """
        bu_traj = []
        for i in range(num_segments):
            m_i, s_i, u_i = self._b_u_seg(x, i)
            Cov_i = s_i.reshape(self._dim_state, self._dim_state).transpose()
            bu_traj.append(((m_i, Cov_i), u_i))
        return self.segmented_cost_function(bu_traj, b_des, u_des, num_segments)


[docs]
    def segmented_cost_function(self, bu_traj, b_des, u_des, num_segments):
        """The cost function in eq 17.

        Args:
            b_des (tuple): The desired belief (mT, CovT). This is needed to compute the cost function.
            u_des (list): A list of desired controls at the beginning of each segment.
                If this information is not available, pass in an empty list.
        """
        if len(u_des) > 0 and len(u_des) != num_segments:
            raise ValueError(
                "The list of desired controls, if provided, must have one control"
                "per segment"
            )

        b_T, u_T = bu_traj[-1]
        m_T, Cov_T = b_T
        s_T = Cov_T.transpose().reshape(
            -1,
        )

        sLs = np.dot(np.dot(s_T.transpose(), self._L), s_T)
        Summation = 0
        for i in range(num_segments):
            b_i, u_i = bu_traj[i]
            m_i, _ = b_i
            m_i_diff = m_i - b_des[0]
            if len(u_des) > 0:
                u_i_des = u_des[i]
            else:
                u_i_des = np.zeros(self._dim_control)
            u_i_diff = u_i - u_i_des
            Summation += np.dot(
                np.dot(m_i_diff.transpose(), self._Q), m_i_diff
            ) + np.dot(np.dot(u_i_diff.transpose(), self._R), u_i_diff)
        return sLs + Summation



[docs]
    def create_plan(
        self,
        b_0,
        b_des,
        u_init,
        num_segments=5,
        control_bounds=None,
        opt_options={},
        opt_callback=None,
    ):
        """Solves the SQP problem using direct transcription, and produce belief points
        and controls at segments.
        Reference: https://docs.scipy.org/doc/scipy/reference/tutorial/optimize.html"""
        # build initial guess from initial belief and the given trajectory.
        x_0 = []
        b_i = b_0
        for i in range(num_segments):
            m_i, Cov_i = b_i
            s_i = Cov_i.transpose().reshape(
                -1,
            )
            u_i = u_init[i]
            x_0.extend(m_i)
            x_0.extend(s_i)
            x_0.extend(u_i)
            b_i = self.integrate_belief_segment(b_i, u_i, num_segments)

        # constraints
        constraints = []
        ## initial belief constraint
        cons_start = {
            "type": "eq",
            "fun": self._mean_start_constraint,
            "args": [b_0[0]],
        }
        constraints.append(cons_start)
        ## belief dynamics constraints and control bounds
        for i in range(num_segments):
            if i + 1 < num_segments - 1:
                cons_belief = {
                    "type": "eq",
                    "fun": self._belief_constraint,
                    "args": [i, num_segments],
                }
                constraints.append(cons_belief)

                # control bounds
                if control_bounds is not None:
                    cons_control_min = {
                        "type": "ineq",
                        "fun": self._control_min_constraint,
                        "args": [i, control_bounds[0]],
                    }
                    cons_control_max = {
                        "type": "ineq",
                        "fun": self._control_max_constraint,
                        "args": [i, control_bounds[1]],
                    }
                    constraints.append(cons_control_min)
                    constraints.append(cons_control_max)

        # final belief constraint
        cons_final = {
            "type": "eq",
            "fun": self._mean_final_constraint,
            "args": [b_des[0], num_segments],
        }
        constraints.append(cons_final)

        opt_res = optimize.minimize(
            self._opt_cost_func_seg,
            x_0,
            method="SLSQP",
            args=(b_des, u_init, num_segments),
            constraints=constraints,
            options=opt_options,
            callback=opt_callback,
        )
        return opt_res



[docs]
    def interpret_sqp_plan(self, opt_res, num_segments):
        x_res = opt_res.x
        plan = []
        for i in range(num_segments):
            m_i, s_i, u_i = self._b_u_seg(x_res, i)
            Cov_i = s_i.reshape(self._dim_state, self._dim_state).transpose()
            plan.append((m_i, Cov_i, u_i))
        return plan
Navigation

Related Topics

Donate/support

Source code for pomdp_py.algorithms.bsp.blqr