|
|
|
import gymnasium as gym |
|
import numpy |
|
import numpy as np |
|
from Qlearning_pole import Qlearning |
|
import os |
|
|
|
|
|
|
|
|
|
|
|
Q1 = Qlearning() |
|
|
|
Q1.train() |
|
|
|
(obtainedRewardsOptimal, env1) = Q1.simulateLearnedStrategy() |
|
|
|
|
|
env1.close() |
|
|
|
np.sum(obtainedRewardsOptimal) |
|
import matplotlib.pyplot as plt |
|
|
|
(obtainedRewardsRandom, env2) = Q1.simulateRandomStrategy() |
|
plt.figure(figsize=(12, 5)) |
|
|
|
numpy.save("Qmatrix_new.npy",Q1.Q) |
|
plt.plot(Q1.sumRewardsEpisode, color='blue', linewidth=1) |
|
plt.xlabel('Episode') |
|
plt.ylabel('Reward') |
|
plt.yscale('log') |
|
plt.savefig('convergence.png') |
|
plt.title("Convergence of rewards") |
|
plt.show() |
|
|
|
|
|
|
|
env1.close() |
|
|
|
np.sum(obtainedRewardsOptimal) |
|
|
|
|
|
obtainedRewardsRandom = [] |
|
for i in range(50): |
|
(rewardsRandom, env2) = Q1.simulateRandomStrategy() |
|
obtainedRewardsRandom.append(rewardsRandom) |
|
plt.title("Rewards with random strategy") |
|
plt.hist(obtainedRewardsRandom) |
|
plt.xlabel('Sum of rewards') |
|
plt.ylabel('Percentage') |
|
plt.savefig('histogram.png') |
|
plt.show() |
|
|
|
|
|
(obtainedRewardsOptimal, env1) = Q1.simulateLearnedStrategy() |