Source code for pomdp_py.problems.tag.models.transition_model

"""The Tag problem. Implemented according to the paper `Anytime Point-Based
Approximations for Large POMDPs <https://arxiv.org/pdf/1110.0027.pdf>`_.

Transition model: the robot moves deterministically. The target's movement
    depends on the robot; With Pr=0.8 the target moves away from the robot,
    and with Pr=0.2, the target stays at the same place. The target never
    moves closer to the robot.
"""

import copy
import pomdp_py
import pomdp_py.problems.tag.constants as constants
from pomdp_py.problems.tag.domain.action import *


[docs] class TagTransitionModel(pomdp_py.TransitionModel): def __init__(self, grid_map, target_motion_policy): self._grid_map = grid_map self.target_motion_policy = target_motion_policy
[docs] @classmethod def if_move_by(cls, grid_map, position, action): if isinstance(action, MotionAction): dx, dy = action.motion next_position = (position[0] + dx, position[1] + dy) if grid_map.valid_pose(next_position): return next_position return position
[docs] def probability(self, next_state, state, action, **kwargs): # Robot motion expected_robot_position = TagTransitionModel.if_move_by( self._grid_map, state.robot_position, action ) if expected_robot_position != next_state.robot_position: return constants.EPSILON if isinstance(action, TagAction): if next_state.target_position == next_state.robot_position: if next_state.target_found: return 1.0 - constants.EPSILON else: return constants.EPSILON else: if next_state.target_found: return constants.EPSILON else: return 1.0 - constants.EPSILON # Target motion valid_target_motion_actions = self._grid_map.valid_motions( state.target_position ) return self.target_motion_policy.probability( next_state.target_position, state.target_position, state.robot_position, valid_target_motion_actions, )
[docs] def sample(self, state, action, argmax=False): # Robot motion next_state = copy.deepcopy(state) next_state.robot_position = TagTransitionModel.if_move_by( self._grid_map, state.robot_position, action ) # If Tag action if isinstance(action, TagAction): if not state.target_found: if state.robot_position == state.target_position: next_state.target_found = True return next_state # Target motion valid_target_motion_actions = self._grid_map.valid_motions( state.target_position ) if not argmax: next_state.target_position = self.target_motion_policy.random( state.robot_position, state.target_position, valid_target_motion_actions ) else: next_state.target_position = self.target_motion_policy.mpe( state.robot_position, state.target_position, valid_target_motion_actions ) return next_state
[docs] def argmax(self, state, action, **kwargs): return self.sample(state, action, argmax=True)