Source code for tensortrade.agents.parallel.parallel_dqn_trainer

# Copyright 2019 The TensorTrade Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License


from deprecated import deprecated
import numpy as np

from typing import Callable
from multiprocessing import Process, Queue


[docs]@deprecated(version='1.0.4', reason="Builtin agents are being deprecated in favor of external implementations (ie: Ray)") class ParallelDQNTrainer(Process): def __init__(self, agent: 'ParallelDQNAgent', create_env: Callable[[None], 'TrainingEnvironment'], memory_queue: Queue, model_update_queue: Queue, done_queue: Queue, n_steps: int, n_episodes: int, eps_end: int = 0.05, eps_start: int = 0.99, eps_decay_steps: int = 2000, update_target_every: int = 2): super().__init__() self.agent = agent self.env = create_env() self.memory_queue = memory_queue self.model_update_queue = model_update_queue self.done_queue = done_queue self.n_steps = n_steps or np.iinfo(np.int32).max self.n_episodes = n_episodes or np.iinfo(np.int32).max self.eps_start = eps_start self.eps_end = eps_end self.eps_decay_steps = eps_decay_steps self.update_target_every = update_target_every self.env.agent_id = self.agent.id
[docs] def run(self): episode = 0 steps_done = 0 total_reward = 0 stop_training = False while episode < self.n_episodes and not stop_training: if self.model_update_queue.qsize() > 0: while self.model_update_queue.qsize() > 0: model = self.model_update_queue.get() self.agent.model.update_networks(model) state = self.env.reset() done = False print('==== EPISODE ID ({}/{}): {} ===='.format(episode + 1, self.n_episodes, self.env.episode_id)) while not done: threshold = self.eps_end + (self.eps_start - self.eps_end) * \ np.exp(-steps_done / self.eps_decay_steps) action = self.agent.get_action(state, threshold=threshold) next_state, reward, done, _ = self.env.step(action) self.memory_queue.put((state, action, reward, next_state, done)) state = next_state total_reward += reward steps_done += 1 if self.n_steps and steps_done >= self.n_steps: stop_training = True done = True if steps_done % self.update_target_every == 0: self.agent.update_target_network() episode += 1 mean_reward = total_reward / steps_done self.done_queue.put(mean_reward)