|
--- |
|
tags: |
|
- Taxi-v3 |
|
- q-learning |
|
- reinforcement-learning |
|
- custom-implementation |
|
model-index: |
|
- name: q-Taxi-v1-5x5 |
|
results: |
|
- task: |
|
type: reinforcement-learning |
|
name: reinforcement-learning |
|
dataset: |
|
name: Taxi-v3 |
|
type: Taxi-v3 |
|
metrics: |
|
- type: mean_reward |
|
value: 7.36 +/- 2.47 |
|
name: mean_reward |
|
verified: false |
|
--- |
|
|
|
# **Q-Learning** Agent playing1 **Taxi-v3** |
|
This is a trained model of a **Q-Learning** agent playing **Taxi-v3** . |
|
|
|
## Usage |
|
|
|
```python |
|
from huggingface_sb3 import load_from_hub |
|
import gymnasium as gym |
|
from tqdm import tqdm |
|
import numpy as np |
|
import pickle |
|
|
|
def greedy_policy(Qtable, state): |
|
action = np.argmax(Qtable[state, :]) |
|
return action |
|
|
|
def evaluate_agent(env: gym.Env, max_steps: int, n_eval_episodes: int, Q: np.ndarray, seed: list[int]): |
|
""" |
|
Evaluate the agent for ``n_eval_episodes`` episodes and returns average reward and std of reward. |
|
:param env: The evaluation environment |
|
:param max_steps: Maximum number of steps per episode |
|
:param n_eval_episodes: Number of episode to evaluate the agent |
|
:param Q: The Q-table |
|
:param seed: The evaluation seed array (for taxi-v3) |
|
""" |
|
episode_rewards = [] |
|
for episode in tqdm(range(n_eval_episodes)): |
|
if seed: |
|
state, info = env.reset(seed=seed[episode]) |
|
else: |
|
state, info = env.reset() |
|
|
|
truncated = False |
|
terminated = False |
|
total_rewards_ep = 0 |
|
|
|
for step in range(max_steps): |
|
action = greedy_policy(Q, state) |
|
new_state, reward, terminated, truncated, info = env.step(action) |
|
total_rewards_ep += reward |
|
|
|
if terminated or truncated: |
|
break |
|
state = new_state |
|
|
|
episode_rewards.append(total_rewards_ep) |
|
|
|
mean_reward = np.mean(episode_rewards) |
|
std_reward = np.std(episode_rewards) |
|
|
|
return float(mean_reward), float(std_reward) |
|
|
|
if __name__ == "__main__": |
|
file_path = load_from_hub(repo_id="BobChuang/q-Taxi-v1-5x5", filename="q-learning.pkl") |
|
with open(file_path, "rb") as f: |
|
model = pickle.load(f) |
|
|
|
env = gym.make(model["env_id"], render_mode="rgb_array") |
|
max_steps = model["max_steps"] |
|
n_eval_episodes = model["n_eval_episodes"] |
|
qtable = model["qtable"] |
|
eval_seed = model["eval_seed"] |
|
|
|
mean_reward, std_reward = evaluate_agent(env, max_steps, n_eval_episodes, qtable, eval_seed) |
|
print(f"\n{ mean_reward = }, { std_reward = }") |
|
``` |
|
|