Source code for problems.multi_object_search.models.policy_model

"""Policy model for 2D Multi-Object Search domain.
It is optional for the agent to be equipped with an occupancy
grid map of the environment.
"""

import pomdp_py
import random
from pomdp_py.problems.multi_object_search.domain.action import *


[docs] class PolicyModel(pomdp_py.RolloutPolicy): """Simple policy model. All actions are possible at any state.""" def __init__(self, robot_id, grid_map=None): """FindAction can only be taken after LookAction""" self.robot_id = robot_id self._grid_map = grid_map
[docs] def sample(self, state, **kwargs): return random.sample(self._get_all_actions(**kwargs), 1)[0]
[docs] def probability(self, action, state, **kwargs): raise NotImplementedError
[docs] def argmax(self, state, **kwargs): """Returns the most likely action""" raise NotImplementedError
[docs] def get_all_actions(self, state=None, history=None): """note: find can only happen after look.""" can_find = False if history is not None and len(history) > 1: # last action last_action = history[-1][0] if isinstance(last_action, LookAction): can_find = True find_action = [Find] if can_find else [] if state is None: return ALL_MOTION_ACTIONS + [Look] + find_action else: if self._grid_map is not None: valid_motions = self._grid_map.valid_motions( self.robot_id, state.pose(self.robot_id), ALL_MOTION_ACTIONS ) return list(valid_motions) + [Look] + find_action else: return ALL_MOTION_ACTIONS + [Look] + find_action
[docs] def rollout(self, state, history=None): return random.sample(self.get_all_actions(state=state, history=history), 1)[0]