import gym import pickle import time import numpy as np # Load the saved model with open("q_learning_model.pkl", "rb") as f: model_data = pickle.load(f) Q_table = model_data["Q_table"] state_bins = model_data["state_bins"] def discretize_state(state): state_low = env.observation_space.low state_high = env.observation_space.high bins = [np.linspace(state_low[i], state_high[i], state_bins[i]) for i in range(len(state))] state_indices = [np.digitize(state[i], bins[i]) - 1 for i in range(len(state))] return tuple(state_indices) env = gym.make("MountainCar-v0", render_mode="human") test_episodes = 10 for episode in range(test_episodes): state, _ = env.reset() state = discretize_state(state) done = False total_reward = 0 print(f"Testing Episode {episode + 1}") while not done: action = np.argmax(Q_table[state]) next_state, reward, done, truncated, _ = env.step(action) state = discretize_state(next_state) total_reward += reward env.render() time.sleep(0.03) print(f"Total reward for Episode {episode + 1}: {total_reward}") env.close()