MohamedMaged262's picture
Upload 3 files
01c9c37 verified
import gym
import pickle
import time
import numpy as np
# Load the saved model
with open("q_learning_model.pkl", "rb") as f:
model_data = pickle.load(f)
Q_table = model_data["Q_table"]
state_bins = model_data["state_bins"]
def discretize_state(state):
state_low = env.observation_space.low
state_high = env.observation_space.high
bins = [np.linspace(state_low[i], state_high[i], state_bins[i]) for i in range(len(state))]
state_indices = [np.digitize(state[i], bins[i]) - 1 for i in range(len(state))]
return tuple(state_indices)
env = gym.make("MountainCar-v0", render_mode="human")
test_episodes = 10
for episode in range(test_episodes):
state, _ = env.reset()
state = discretize_state(state)
done = False
total_reward = 0
print(f"Testing Episode {episode + 1}")
while not done:
action = np.argmax(Q_table[state])
next_state, reward, done, truncated, _ = env.step(action)
state = discretize_state(next_state)
total_reward += reward
env.render()
time.sleep(0.03)
print(f"Total reward for Episode {episode + 1}: {total_reward}")
env.close()