File size: 1,211 Bytes
01c9c37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gym
import pickle
import time
import numpy as np

# Load the saved model
with open("q_learning_model.pkl", "rb") as f:
    model_data = pickle.load(f)

Q_table = model_data["Q_table"]
state_bins = model_data["state_bins"]

def discretize_state(state):
    state_low = env.observation_space.low
    state_high = env.observation_space.high
    bins = [np.linspace(state_low[i], state_high[i], state_bins[i]) for i in range(len(state))]
    state_indices = [np.digitize(state[i], bins[i]) - 1 for i in range(len(state))]
    return tuple(state_indices)

env = gym.make("MountainCar-v0", render_mode="human")

test_episodes = 10

for episode in range(test_episodes):
    state, _ = env.reset()
    state = discretize_state(state)
    done = False
    total_reward = 0

    print(f"Testing Episode {episode + 1}")

    while not done:
        action = np.argmax(Q_table[state])  
        next_state, reward, done, truncated, _ = env.step(action)
        state = discretize_state(next_state)
        total_reward += reward

        
        env.render()  
        time.sleep(0.03)  

    print(f"Total reward for Episode {episode + 1}: {total_reward}")

env.close()