Spaces:

VarunKumarGupta2003
/

SelfDriving

Configuration error

App Files Files Community

VarunKumarGupta2003 commited on May 28

Commit

b89a51c

•

1 Parent(s): 0f9b2af

Upload 7 files

Browse files

Files changed (8) hide show

.gitattributes +1 -0
README.md +7 -11
SI_Final_Project.pdf +3 -0
car_dqn.py +58 -0
dqn.py +268 -0
exp_replay.py +77 -0
main.py +156 -0
processimage.py +25 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+SI_Final_Project.pdf filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,12 +1,8 @@
----
-title: SelfDriving
-emoji: 🔥
-colorFrom: blue
-colorTo: gray
-sdk: gradio
-sdk_version: 4.31.5
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# AI-self-driving-race-car-Deep-Reinforcement-Learning
+Solving OpenAI's reinforcement learning CarRacing environment
+In this project, a python based car racing environment is trained using a deep reinforcement learning algorithm to perform efficient self driving on a racing track. A deep Q learning algorithm is developed and then used to train an autonomous driver agent. Different configurations in the deep Q learning algorithm parameters and in the neural network architecture are then tested and compared in order to obtain the best racing car average score over a period of 100 races.  According to OpenAI Gym, this environment is considered solved when the agent successfully reaches an average score of 900 on the last 100 runs.
+A video with the final car's performance can be seen here: https://www.youtube.com/watch?v=jbdjhoDT41M
+A video of the car training can be seen here: https://youtu.be/C9CZpbuOz04

SI_Final_Project.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4bee8f9f9195010f83899d24f845e9c36c81e4431a6dbdb4570331e64ddfb06
+size 4964594

car_dqn.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from dqn import DQN
+import numpy as np
+from skimage import color
+import itertools as it
+class CarRacingDQN(DQN):
+    #CarRacing specific part of the DQN-agent
+    # ** is used for unpacking the model configurations
+    def __init__(self, max_negative_rewards=100, **model_config):
+        #Define all 12 actions possible:
+        # all_actions = np.array([k for k in it.product([-1, 0, 1], [1, 0], [0.5, 0])])
+        #selected 5 actions:
+        all_actions = np.array([[-1, 0, 0],  [0, 1, 0], [0, 0, 0.5], [0, 0, 0],[1, 0, 0]])
+        #Set self parameters
+        super().__init__(
+            action_map=all_actions,
+            pic_size=(96, 96),
+            **model_config
+        )
+        self.gas_actions = np.array([a[1] == 1 and a[2] == 0 for a in all_actions])
+        self.break_actions = np.array([a[2] > 0 for a in all_actions])
+        self.n_gas_actions = self.gas_actions.sum()
+        self.neg_reward_counter = 0
+        self.max_neg_rewards = max_negative_rewards
+    def get_random_action(self):
+# give priority to acceleration actions
+        action_weights = 14.0 * self.gas_actions + 1.0
+        action_weights /= np.sum(action_weights)
+        return np.random.choice(self.dim_actions, p=action_weights)
+    def check_early_stop(self, reward, totalreward, fie):
+        if reward < 0 and fie > 10:
+            self.neg_reward_counter += 1
+            done = (self.neg_reward_counter > self.max_neg_rewards)
+            if done and totalreward <= 500:
+                punishment = -20.0
+            else:
+                punishment = 0.0
+            if done:
+                self.neg_reward_counter = 0
+            return done, punishment
+        else:
+            self.neg_reward_counter = 0
+            return False, 0.0

dqn.py ADDED Viewed

	@@ -0,0 +1,268 @@

+from __future__ import generator_stop
+from exp_replay import ExperienceReplay
+import numpy as np
+import tensorflow.contrib.slim as slim
+import tensorflow as tf
+import re
+from processimage import processimage
+class DQN:
+    def __init__(self,
+            env,
+            batchsize=64,
+            pic_size=(96, 96),
+            num_frame_stack=3,
+            gamma=0.95,
+            frame_skip=3,
+            train_freq=3,
+            initial_epsilon=1,
+            min_epsilon=0.05,
+            render=False,
+            epsilon_decay_steps=int(100000),
+            min_experience_size=int(1000),
+            experience_capacity=int(100000),
+            target_network_update_freq=1000,
+            regularization = 1e-6,
+            optimizer_params = None,
+            action_map=None
+    ):
+        self.exp_history = ExperienceReplay(
+            num_frame_stack,
+            capacity=experience_capacity,
+            pic_size=pic_size
+        )
+        # in playing mode we don't store the experience to agent history
+        # but this cache is still needed to get the current frame stack
+        self.playing_cache = ExperienceReplay(
+            num_frame_stack,
+            capacity=num_frame_stack * 5 + 10,
+            pic_size=pic_size
+        )
+        if action_map is not None:
+            self.dim_actions = len(action_map)
+        else:
+            self.dim_actions = env.action_space.n
+        self.target_network_update_freq = target_network_update_freq
+        self.action_map = action_map
+        self.env = env
+        self.batchsize = batchsize
+        self.num_frame_stack = num_frame_stack
+        self.gamma = gamma
+        self.frame_skip = frame_skip
+        self.train_freq = train_freq
+        self.initial_epsilon = initial_epsilon
+        self.min_epsilon = min_epsilon
+        self.epsilon_decay_steps = epsilon_decay_steps
+        self.render = render
+        self.min_experience_size = min_experience_size
+        self.pic_size = pic_size
+        self.regularization = regularization
+        # These default magic values always work with Adam
+        self.global_step = tf.Variable(0, trainable=False)
+        self.increment_global_step_op = tf.assign(self.global_step, self.global_step+1)
+        self.decayed_lr = tf.train.exponential_decay(0.001, self.global_step, 200000, 0.7, staircase=False)
+        lr = self.decayed_lr
+        # lr = 0.001
+        self.optimizer_params = optimizer_params or dict(learning_rate=lr, epsilon=1e-7)
+        self.do_training = True
+        self.playing_epsilon = 0.0
+        self.session = None
+        self.state_size = (self.num_frame_stack,) + self.pic_size
+        self.global_counter = 0
+        self.episode_counter = 0
+    def build_graph(self):
+        input_dim_general = (None, self.pic_size[0], self.pic_size[1], self.num_frame_stack)   # (None, 4, 96, 96) changed to (None, 96, 96, 4)
+        input_dim_with_batch = (self.batchsize, self.pic_size[0], self.pic_size[1], self.num_frame_stack) #Input dimensions: (64, 4, 96, 96) changed to (64, 96, 96, 4)
+        self.input_prev_state = tf.compat.v1.placeholder(tf.float32, input_dim_general, "prev_state")
+        self.input_next_state = tf.compat.v1.placeholder(tf.float32, input_dim_with_batch, "next_state")
+        self.input_reward = tf.compat.v1.placeholder(tf.float32, self.batchsize, "reward")
+        self.input_actions = tf.compat.v1.placeholder(tf.int32, self.batchsize, "actions")
+        self.input_done_mask = tf.compat.v1.placeholder(tf.int32, self.batchsize, "done_mask")
+        # The target Q-values come from the fixed network
+        with tf.compat.v1.variable_scope("fixed"): #64 96 96 3
+            # Create target network which is gonna be fixed and updated every C parameters
+            qsa_targets = self.create_network(self.input_next_state, trainable=False)
+        with tf.compat.v1.variable_scope("train"): # ? 96 96 3
+            # Create Prediction/Estimate network which will be trained/updated every 3 frames
+            # Create Prediction/Estimate network which will be trained/updated every 3 frames
+            qsa_estimates = self.create_network(self.input_prev_state, trainable=True)
+        self.best_action = tf.argmax(qsa_estimates, axis=1)
+        not_done = tf.cast(tf.logical_not(tf.cast(self.input_done_mask, "bool")), "float32")
+        # select the chosen action from each row
+        # in numpy this is qsa_estimates[range(batchsize), self.input_actions]
+        action_slice = tf.stack([tf.range(0, self.batchsize), self.input_actions], axis=1)
+        #
+        q_estimates_for_input_action = tf.gather_nd(qsa_estimates, action_slice)
+        #Taken from paper : Loss = [(r + gamma*max Qtarget)-(Q estimate)^2]
+        q_target = tf.reduce_max(qsa_targets, -1) * self.gamma * not_done + self.input_reward
+        training_loss = tf.nn.l2_loss(q_target - q_estimates_for_input_action) / self.batchsize
+        # reg_loss = tf.add_n(tf.losses.get_regularization_losses())
+        reg_loss = [0]
+        #Adam optimizer
+        optimizer = tf.train.AdamOptimizer(**(self.optimizer_params))
+        #Adadelta optimizer:
+        # optimizer = tf.train.RMSPropOptimizer(**(self.optimizer_params))
+        self.train_op = optimizer.minimize(reg_loss + training_loss)
+        train_params = self.get_variables("train")
+        fixed_params = self.get_variables("fixed")
+        assert (len(train_params) == len(fixed_params))
+        self.copy_network_ops = [tf.assign(fixed_v, train_v) for train_v, fixed_v in zip(train_params, fixed_params)]
+    def get_variables(self, scope):
+        vars = [t for t in tf.compat.v1.global_variables()
+            if "%s/" % scope in t.name and "Adam" not in t.name]
+        return sorted(vars, key=lambda v: v.name)
+    def create_network(self, input, trainable):
+        if trainable:
+            # wr = None
+            wr = tf.compat.v1.keras.regularizers.l2(l=self.regularization)
+        else:
+            wr = None
+        net = tf.layers.conv2d(inputs=input, filters=8, kernel_size=(7,7), strides=4, name='conv1', kernel_regularizer=wr)
+        net = tf.nn.relu(net)
+        net = tf.nn.max_pool2d(net, ksize=2, strides=2, padding='SAME')
+        net = tf.layers.conv2d(inputs=net, filters=16, kernel_size=(3, 3), strides=1, name='conv2',
+                               kernel_regularizer=wr)
+        net = tf.nn.relu(net)
+        net = tf.nn.max_pool2d(net, ksize=2, strides=2, padding='SAME')
+        net = tf.layers.flatten(net)
+        net = tf.layers.dense(net, 400, activation=tf.nn.relu, kernel_regularizer=wr)
+        # net = tf.layers.dropout(net, 0.5)
+        q_state_action_values = tf.layers.dense(net, self.dim_actions, activation=None, kernel_regularizer=wr)
+        return q_state_action_values
+    # def check_early_stop(self, reward, totalreward):
+    #     return False, 0.0
+    def get_random_action(self):
+        return np.random.choice(self.dim_actions)
+    def get_epsilon(self):
+        if not self.do_training:
+            return self.playing_epsilon
+        elif self.global_counter >= self.epsilon_decay_steps:
+            return self.min_epsilon
+        else:
+            # linear decay
+            r = 1.0 - self.global_counter / float(self.epsilon_decay_steps)
+            return self.min_epsilon + (self.initial_epsilon - self.min_epsilon) * r
+    def train(self):
+        batch = self.exp_history.sample_mini_batch(self.batchsize)
+        # Feed dict
+        fd = {
+            self.input_reward: "reward",
+            self.input_prev_state: "prev_state",
+            self.input_next_state: "next_state",
+            self.input_actions: "actions",
+            self.input_done_mask: "done_mask"
+        }
+        fd1 = {ph: batch[k] for ph, k in fd.items()}
+        self.session.run([self.train_op], fd1)
+    def play_episode(self, render, load_checkpoint):
+        eh = (
+            self.exp_history if self.do_training
+            else self.playing_cache
+        )
+        total_reward = 0
+        total_score = 0
+        frames_in_episode = 0
+        first_frame = self.env.reset()
+        first_frame_pp = processimage.process_image(first_frame)
+        eh.start_new_episode(first_frame_pp)
+        epsilon = self.get_epsilon()
+        while True:
+            if np.random.rand() > epsilon and not load_checkpoint:
+                action_idx = self.session.run(
+                    self.best_action,
+                    {self.input_prev_state: eh.current_state()[np.newaxis, ...]}
+                )[0]
+            elif not load_checkpoint:
+                action_idx = self.get_random_action()
+            elif load_checkpoint:
+                action_idx = self.session.run(
+                    self.best_action,
+                    {self.input_prev_state: eh.current_state()[np.newaxis, ...]}
+                )[0]
+            if self.action_map is not None:
+                action = self.action_map[action_idx]
+            else:
+                action = action_idx
+            reward = 0
+            score = 0
+            for _ in range(self.frame_skip):
+                observation, r, done, info = self.env.step(action)
+                if render:
+                    self.env.render()
+                score += r
+                #Increase rewards on the last frames if reward is positive
+                if r > 0:
+                    r = r + frames_in_episode*0.2 #in 230 frames late game it adds +- 50 reward to tiles
+                reward += r
+                if done:
+                    break
+            early_done, punishment = self.check_early_stop(reward, total_reward, frames_in_episode)
+            if early_done:
+                reward += punishment
+            done = done or early_done
+            total_reward += reward
+            total_score += score
+            frames_in_episode += 1
+            observation = processimage.process_image(observation)
+            eh.add_experience(observation, action_idx, done, reward)
+            if self.do_training:
+                self.global_counter += 1
+                step = self.session.run(self.increment_global_step_op)
+                if self.global_counter % self.target_network_update_freq:
+                    self.update_target_network()
+                train_cond = (
+                    self.exp_history.counter >= self.min_experience_size and
+                    self.global_counter % self.train_freq == 0
+                )
+                if train_cond:
+                    self.train()
+            if done:
+                if self.do_training:
+                    self.episode_counter += 1
+                return total_score, total_reward, frames_in_episode, epsilon
+    def update_target_network(self):
+        self.session.run(self.copy_network_ops)

exp_replay.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import numpy as np
+class ExperienceReplay:
+    def __init__(self,
+            num_frame_stack=4,
+            capacity=int(1e5),
+            pic_size=(96, 96)
+    ):
+        self.num_frame_stack = num_frame_stack
+        self.capacity = capacity
+        self.pic_size = pic_size
+        self.counter = 0
+        self.frame_window = None
+        self.init_caches()
+        self.expecting_new_episode = True
+    def add_experience(self, frame, action, done, reward):
+        assert self.frame_window is not None, "start episode first"
+        self.counter += 1
+        frame_idx = self.counter % self.max_frame_cache
+        exp_idx = (self.counter - 1) % self.capacity
+        self.prev_states[exp_idx] = self.frame_window
+        self.frame_window = np.append(self.frame_window[1:], frame_idx)
+        self.next_states[exp_idx] = self.frame_window
+        self.actions[exp_idx] = action
+        self.is_done[exp_idx] = done
+        self.frames[frame_idx] = frame
+        self.rewards[exp_idx] = reward
+        if done:
+            self.expecting_new_episode = True
+    def start_new_episode(self, frame):
+        # it should be okay not to increment counter here
+        # because episode ending frames are not used
+        assert self.expecting_new_episode, "previous episode didn't end yet"
+        frame_idx = self.counter % self.max_frame_cache
+        self.frame_window = np.repeat(frame_idx, self.num_frame_stack)
+        self.frames[frame_idx] = frame
+        self.expecting_new_episode = False
+    def sample_mini_batch(self, n):
+        count = min(self.capacity, self.counter)
+        batchidx = np.random.randint(count, size=n)
+        prev_frames = self.frames[self.prev_states[batchidx]]
+        next_frames = self.frames[self.next_states[batchidx]]
+        prev_frames = np.moveaxis(prev_frames, 1, -1)
+        next_frames = np.moveaxis(next_frames, 1, -1)
+        return {
+            "reward": self.rewards[batchidx],
+            "prev_state": prev_frames,
+            "next_state": next_frames,
+            "actions": self.actions[batchidx],
+            "done_mask": self.is_done[batchidx]
+        }
+    def current_state(self):
+        # assert not self.expecting_new_episode, "start new episode first"'
+        assert self.frame_window is not None, "do something first"
+        sf = self.frames[self.frame_window]
+        sf = np.moveaxis(sf, 0, -1)
+        return sf
+    def init_caches(self):
+        self.rewards = np.zeros(self.capacity, dtype="float32")
+        self.prev_states = -np.ones((self.capacity, self.num_frame_stack),
+            dtype="int32")
+        self.next_states = -np.ones((self.capacity, self.num_frame_stack),
+            dtype="int32")
+        self.is_done = -np.ones(self.capacity, "int32")
+        self.actions = -np.ones(self.capacity, dtype="int32")
+        self.max_frame_cache = self.capacity + 2 * self.num_frame_stack + 1
+        self.frames = -np.ones((self.max_frame_cache,) + self.pic_size, dtype="float32")

main.py ADDED Viewed

	@@ -0,0 +1,156 @@

+from __future__ import absolute_import, division, print_function, unicode_literals
+from car_dqn import CarRacingDQN
+import os
+import tensorflow as tf
+import gym
+import _thread
+import re
+import sys
+import numpy as np
+#Ensure its running og GPU
+print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
+load_checkpoint = True
+checkpoint_path = "data/checkpoints/train24"
+train_episodes = 15000
+save_freq_episodes = train_episodes/100 ###############333
+finished = False
+opendir = checkpoint_path + '.txt'
+text_results = open(opendir, "w")
+render = False
+frame_skip = 3 #frame_skip number n. model is trained n to n times only
+model_config = dict(
+    min_epsilon=0.05,
+    max_negative_rewards=8,
+    min_experience_size=int(100), #######################################33
+    experience_capacity=int(150000),
+    num_frame_stack=frame_skip,
+    frame_skip=frame_skip,
+    train_freq=frame_skip,
+    batchsize=64,
+    epsilon_decay_steps=int(100000),
+    target_network_update_freq=int(1000), #Updates the target network every 10000 global steps by copying them from the prediction network to the target network
+    gamma=0.95,
+    render=False,
+)
+dqn_scores = []
+eps_history = []
+avg_score_all = [0]
+env = gym.make('CarRacing-v0',  verbose=False)
+tf.compat.v1.reset_default_graph
+dqn_agent = CarRacingDQN(env=env, **model_config)
+dqn_agent.build_graph()
+sess = tf.InteractiveSession()
+dqn_agent.session = sess
+#Initialize save checkpoints
+saver = tf.train.Saver(max_to_keep=1000) #max number of checkpoints = 500
+#Choice to load checkpoints
+if load_checkpoint:
+    train_episodes = 150
+    save_freq_episodes = 0
+    print("loading the latest checkpoint from %s" % checkpoint_path)
+    ckpt = tf.train.get_checkpoint_state(checkpoint_path)
+    assert ckpt, "checkpoint path %s not found" % checkpoint_path
+    global_counter = int(re.findall("-(\d+)$", ckpt.model_checkpoint_path)[0])
+    saver.restore(sess, ckpt.model_checkpoint_path)
+    dqn_agent.global_counter = global_counter
+    render = True
+else:
+    if checkpoint_path is not None:
+        assert not os.path.exists(checkpoint_path), \
+            "checkpoint path already exists but load_checkpoint is false"
+    tf.global_variables_initializer().run()
+def save_checkpoint():
+    if not os.path.exists(checkpoint_path):
+        os.makedirs(checkpoint_path)
+    p = os.path.join(checkpoint_path, "m.ckpt")
+    saver.save(sess, p, dqn_agent.global_counter)
+    print("saved to %s - %d" % (p, dqn_agent.global_counter))
+def one_episode(eps_history,dqn_scores,avg_score_all,render,load_checkpoint):
+    score, reward, frames, epsilon = dqn_agent.play_episode(render, load_checkpoint)
+    eps_history.append(epsilon)
+    dqn_scores.append(score)
+    i = dqn_agent.episode_counter
+    avg_score = np.mean(dqn_scores[max(0, i - 100):(i + 1)])
+    avg_score_all.append(avg_score)
+    max_avg_score = max(avg_score_all)
+    if avg_score >= max_avg_score:
+        new_max = ' => New HighScore! <= '
+        highscore = True
+    else:
+        new_max = ''
+        highscore = False
+    strm = ("#> episode: %i | score: %.2f | total steps: %i | epsilon: %.5f | average 100 score: %.2f" %
+            (i, score, dqn_agent.global_counter, epsilon, avg_score))
+    print(strm + new_max)
+    text_results = open(opendir, "a")
+    text_results.write(strm + new_max + '\n')
+    text_results.close()
+    if not load_checkpoint:
+        save_cond = (
+            dqn_agent.episode_counter % save_freq_episodes == 0
+            and checkpoint_path is not None
+            and dqn_agent.do_training
+        )
+        if save_cond or (highscore and dqn_agent.episode_counter > 100):
+            save_checkpoint()
+    return eps_history,dqn_scores,avg_score_all
+def input_thread(list):
+    input("...enter to stop after current episode\n")
+    list.append("OK")
+def main_loop(eps_history,dqn_scores,avg_score_all,render,load_checkpoint):
+    #call training loop
+    list = []
+    _thread.start_new_thread(input_thread, (list,))
+    while True:
+        if list:
+            break
+        if dqn_agent.do_training and dqn_agent.episode_counter >= train_episodes:
+            break
+        eps_history,dqn_scores,avg_score_all = one_episode(eps_history,dqn_scores,avg_score_all,render,load_checkpoint)
+    print("done")
+    text_results.close()
+    exit()
+    return eps_history,dqn_scores,avg_score_all
+if train_episodes > 0 and dqn_agent.episode_counter < train_episodes and not load_checkpoint :
+    print("now training... you can early stop with enter...")
+    print("##########")
+    sys.stdout.flush()
+    main_loop(eps_history,dqn_scores,avg_score_all,render,load_checkpoint)
+    save_checkpoint()
+    print("ok training done")
+else:
+    print("now just playing...")
+    sys.stdout.flush()
+    main_loop(eps_history,dqn_scores,avg_score_all,render,load_checkpoint)

processimage.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import matplotlib.pyplot as plt
+import matplotlib.image as mpimg
+import cv2
+import numpy as np
+from skimage import color, transform, io
+class processimage:
+    def process_image(obs):
+        #uncomment to see original image
+        # plt.imshow(obs)
+        # plt.show()
+        obs1 = obs.astype(np.uint8)
+        obs_gray = color.rgb2gray(obs1)
+        # obs_gray[abs(obs_gray - 0.60116) < 0.1] = 1
+        obs_gray[84:95,0:12] = 0
+        obs_gray[abs(obs_gray - 0.68616) < 0.0001] = 1
+        obs_gray[abs(obs_gray - 0.75630) < 0.0001] = 1
+        #uncomment to see pre processed image
+        # plt.imshow(obs_gray, cmap='gray')
+        # plt.show()
+        #Set values between -1 and 1 for input normalization
+        return 2 * obs_gray - 1