--- tags: - Taxi-v3 - q-learning - reinforcement-learning - custom-implementation model-index: - name: q-Taxi-v1-5x5 results: - task: type: reinforcement-learning name: reinforcement-learning dataset: name: Taxi-v3 type: Taxi-v3 metrics: - type: mean_reward value: 7.36 +/- 2.47 name: mean_reward verified: false --- # **Q-Learning** Agent playing1 **Taxi-v3** This is a trained model of a **Q-Learning** agent playing **Taxi-v3** . ## Usage ```python from huggingface_sb3 import load_from_hub import gymnasium as gym from tqdm import tqdm import numpy as np import pickle def greedy_policy(Qtable, state): action = np.argmax(Qtable[state, :]) return action def evaluate_agent(env: gym.Env, max_steps: int, n_eval_episodes: int, Q: np.ndarray, seed: list[int]): """ Evaluate the agent for ``n_eval_episodes`` episodes and returns average reward and std of reward. :param env: The evaluation environment :param max_steps: Maximum number of steps per episode :param n_eval_episodes: Number of episode to evaluate the agent :param Q: The Q-table :param seed: The evaluation seed array (for taxi-v3) """ episode_rewards = [] for episode in tqdm(range(n_eval_episodes)): if seed: state, info = env.reset(seed=seed[episode]) else: state, info = env.reset() truncated = False terminated = False total_rewards_ep = 0 for step in range(max_steps): action = greedy_policy(Q, state) new_state, reward, terminated, truncated, info = env.step(action) total_rewards_ep += reward if terminated or truncated: break state = new_state episode_rewards.append(total_rewards_ep) mean_reward = np.mean(episode_rewards) std_reward = np.std(episode_rewards) return float(mean_reward), float(std_reward) if __name__ == "__main__": file_path = load_from_hub(repo_id="BobChuang/q-Taxi-v1-5x5", filename="q-learning.pkl") with open(file_path, "rb") as f: model = pickle.load(f) env = gym.make(model["env_id"], render_mode="rgb_array") max_steps = model["max_steps"] n_eval_episodes = model["n_eval_episodes"] qtable = model["qtable"] eval_seed = model["eval_seed"] mean_reward, std_reward = evaluate_agent(env, max_steps, n_eval_episodes, qtable, eval_seed) print(f"\n{ mean_reward = }, { std_reward = }") ```