Source code for pomdp_py.problems.multi_object_search.models.reward_model
"""Reward model for 2D Multi-object Search domain"""
import pomdp_py
from pomdp_py.problems.multi_object_search.domain.action import *
[docs]
class MosRewardModel(pomdp_py.RewardModel):
def __init__(self, target_objects, big=1000, small=1, robot_id=None):
"""
robot_id (int): This model is the reward for one agent (i.e. robot),
If None, then this model could be for the environment.
target_objects (set): a set of objids for target objects.
"""
self._robot_id = robot_id
self.big = big
self.small = small
self._target_objects = target_objects
[docs]
def probability(
self, reward, state, action, next_state, normalized=False, **kwargs
):
if reward == self._reward_func(state, action):
return 1.0
else:
return 0.0
[docs]
def sample(self, state, action, next_state, normalized=False, robot_id=None):
# deterministic
return self._reward_func(state, action, next_state, robot_id=robot_id)
[docs]
def argmax(self, state, action, next_state, normalized=False, robot_id=None):
"""Returns the most likely reward"""
return self._reward_func(state, action, next_state, robot_id=robot_id)
[docs]
class GoalRewardModel(MosRewardModel):
"""
This is a reward where the agent gets reward only for detect-related actions.
"""
def _reward_func(self, state, action, next_state, robot_id=None):
if robot_id is None:
assert (
self._robot_id is not None
), "Reward must be computed with respect to one robot."
robot_id = self._robot_id
reward = 0
# If the robot has detected all objects
if len(state.object_states[robot_id]["objects_found"]) == len(
self._target_objects
):
return 0 # no reward or penalty; the task is finished.
if isinstance(action, MotionAction):
reward = reward - self.small - action.distance_cost
elif isinstance(action, LookAction):
reward = reward - self.small
elif isinstance(action, FindAction):
if state.object_states[robot_id]["camera_direction"] is None:
# The robot didn't look before detect. So nothing is in the field of view.
reward -= self.big
else:
# transition function should've taken care of the detection.
new_objects_count = len(
set(next_state.object_states[robot_id].objects_found)
- set(state.object_states[robot_id].objects_found)
)
if new_objects_count == 0:
# No new detection. "detect" is a bad action.
reward -= self.big
else:
# Has new detection. Award.
reward += self.big
return reward