|
import gym
|
|
import pickle
|
|
import time
|
|
import numpy as np
|
|
|
|
|
|
with open("q_learning_model.pkl", "rb") as f:
|
|
model_data = pickle.load(f)
|
|
|
|
Q_table = model_data["Q_table"]
|
|
state_bins = model_data["state_bins"]
|
|
|
|
def discretize_state(state):
|
|
state_low = env.observation_space.low
|
|
state_high = env.observation_space.high
|
|
bins = [np.linspace(state_low[i], state_high[i], state_bins[i]) for i in range(len(state))]
|
|
state_indices = [np.digitize(state[i], bins[i]) - 1 for i in range(len(state))]
|
|
return tuple(state_indices)
|
|
|
|
env = gym.make("MountainCar-v0", render_mode="human")
|
|
|
|
test_episodes = 10
|
|
|
|
for episode in range(test_episodes):
|
|
state, _ = env.reset()
|
|
state = discretize_state(state)
|
|
done = False
|
|
total_reward = 0
|
|
|
|
print(f"Testing Episode {episode + 1}")
|
|
|
|
while not done:
|
|
action = np.argmax(Q_table[state])
|
|
next_state, reward, done, truncated, _ = env.step(action)
|
|
state = discretize_state(next_state)
|
|
total_reward += reward
|
|
|
|
|
|
env.render()
|
|
time.sleep(0.03)
|
|
|
|
print(f"Total reward for Episode {episode + 1}: {total_reward}")
|
|
|
|
env.close()
|
|
|