Source code for tensortrade.env.default.rewards


from abc import abstractmethod

import numpy as np
import pandas as pd

from tensortrade.env.generic import RewardScheme, TradingEnv
from tensortrade.feed.core import Stream, DataFeed
import math


[docs]class TensorTradeRewardScheme(RewardScheme): """An abstract base class for reward schemes for the default environment. """
[docs] def reward(self, env: 'TradingEnv') -> float: return self.get_reward(env.action_scheme.portfolio)
[docs] @abstractmethod def get_reward(self, portfolio) -> float: """Gets the reward associated with current step of the episode. Parameters ---------- portfolio : `Portfolio` The portfolio associated with the `TensorTradeActionScheme`. Returns ------- float The reward for the current step of the episode. """ raise NotImplementedError()
[docs]class SimpleProfit(TensorTradeRewardScheme): """A simple reward scheme that rewards the agent for incremental increases in net worth. Parameters ---------- window_size : int The size of the look back window for computing the reward. Attributes ---------- window_size : int The size of the look back window for computing the reward. """ def __init__(self, window_size: int = 1): self._window_size = self.default('window_size', window_size)
[docs] def get_reward(self, portfolio: 'Portfolio') -> float: """Rewards the agent for incremental increases in net worth over a sliding window. Parameters ---------- portfolio : `Portfolio` The portfolio being used by the environment. Returns ------- float The cumulative percentage change in net worth over the previous `window_size` time steps. """ net_worths = [nw['net_worth'] for nw in portfolio.performance.values()] if len(net_worths) > 1: return net_worths[-1] / net_worths[-min(len(net_worths), self._window_size + 1)] - 1.0 else: return 0.0
[docs]class RiskAdjustedReturns(TensorTradeRewardScheme): """A reward scheme that rewards the agent for increasing its net worth, while penalizing more volatile strategies. Parameters ---------- return_algorithm : {'sharpe', 'sortino'}, Default 'sharpe'. The risk-adjusted return metric to use. risk_free_rate : float, Default 0. The risk free rate of returns to use for calculating metrics. target_returns : float, Default 0 The target returns per period for use in calculating the sortino ratio. window_size : int The size of the look back window for computing the reward. """ def __init__(self, return_algorithm: str = 'sharpe', risk_free_rate: float = 0., target_returns: float = 0., window_size: int = 1) -> None: algorithm = self.default('return_algorithm', return_algorithm) assert algorithm in ['sharpe', 'sortino'] if algorithm == 'sharpe': return_algorithm = self._sharpe_ratio elif algorithm == 'sortino': return_algorithm = self._sortino_ratio self._return_algorithm = return_algorithm self._risk_free_rate = self.default('risk_free_rate', risk_free_rate) self._target_returns = self.default('target_returns', target_returns) self._window_size = self.default('window_size', window_size) def _sharpe_ratio(self, returns: 'pd.Series') -> float: """Computes the sharpe ratio for a given series of a returns. Parameters ---------- returns : `pd.Series` The returns for the `portfolio`. Returns ------- float The sharpe ratio for the given series of a `returns`. References ---------- .. [1] https://en.wikipedia.org/wiki/Sharpe_ratio """ return (np.mean(returns) - self._risk_free_rate + 1e-9) / (np.std(returns) + 1e-9) def _sortino_ratio(self, returns: 'pd.Series') -> float: """Computes the sortino ratio for a given series of a returns. Parameters ---------- returns : `pd.Series` The returns for the `portfolio`. Returns ------- float The sortino ratio for the given series of a `returns`. References ---------- .. [1] https://en.wikipedia.org/wiki/Sortino_ratio """ downside_returns = returns.copy() downside_returns[returns < self._target_returns] = returns ** 2 expected_return = np.mean(returns) downside_std = np.sqrt(np.std(downside_returns)) return (expected_return - self._risk_free_rate + 1e-9) / (downside_std + 1e-9)
[docs] def get_reward(self, portfolio: 'Portfolio') -> float: """Computes the reward corresponding to the selected risk-adjusted return metric. Parameters ---------- portfolio : `Portfolio` The current portfolio being used by the environment. Returns ------- float The reward corresponding to the selected risk-adjusted return metric. """ net_worths = [nw['net_worth'] for nw in portfolio.performance.values()][-(self._window_size + 1):] returns = pd.Series(net_worths).pct_change().dropna() risk_adjusted_return = self._return_algorithm(returns) return risk_adjusted_return
[docs]class PBR(TensorTradeRewardScheme): """A reward scheme for position-based returns. * Let :math:`p_t` denote the price at time t. * Let :math:`x_t` denote the position at time t. * Let :math:`R_t` denote the reward at time t. Then the reward is defined as, :math:`R_{t} = (p_{t} - p_{t-1}) \cdot x_{t}`. Parameters ---------- price : `Stream` The price stream to use for computing rewards. """ registered_name = "pbr" def __init__(self, price: 'Stream') -> None: super().__init__() self.position = -1 r = Stream.sensor(price, lambda p: p.value, dtype="float").diff() position = Stream.sensor(self, lambda rs: rs.position, dtype="float") reward = (position * r).fillna(0).rename("reward") self.feed = DataFeed([reward]) self.feed.compile()
[docs] def on_action(self, action: int) -> None: self.position = -1 if action == 0 else 1
[docs] def get_reward(self, portfolio: 'Portfolio') -> float: return self.feed.next()["reward"]
[docs] def reset(self) -> None: """Resets the `position` and `feed` of the reward scheme.""" self.position = -1 self.feed.reset()
_registry = { 'simple': SimpleProfit, 'risk-adjusted': RiskAdjustedReturns, 'pbr': PBR, }
[docs]def get(identifier: str) -> 'TensorTradeRewardScheme': """Gets the `RewardScheme` that matches with the identifier. Parameters ---------- identifier : str The identifier for the `RewardScheme` Returns ------- `TensorTradeRewardScheme` The reward scheme associated with the `identifier`. Raises ------ KeyError: Raised if identifier is not associated with any `RewardScheme` """ if identifier not in _registry.keys(): msg = f"Identifier {identifier} is not associated with any `RewardScheme`." raise KeyError(msg) return _registry[identifier]()