Source code for pomdp_py.problems.tag.models.reward_model
import pomdp_py
from pomdp_py.problems.tag.domain.action import *
[docs]
class TagRewardModel(pomdp_py.RewardModel):
def __init__(self, small=1, big=10):
self.small = small
self.big = big
[docs]
def probability(
self, reward, state, action, next_state, normalized=False, **kwargs
):
if reward == self._reward_func(state, action):
return 1.0
else:
return 0.0
[docs]
def sample(self, state, action, next_state):
# deterministic
return self._reward_func(state, action, next_state)
def _reward_func(self, state, action, next_state):
if isinstance(action, MotionAction):
return -self.small
else:
# Tag action
assert isinstance(action, TagAction)
if next_state.target_position == next_state.robot_position:
if next_state.target_found:
return self.big
return -self.big