import os
os.environ["XDG_RUNTIME_DIR"] = "/tmp"
import numpy as np
import pygame
import random
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
import gradio as gr
import cv2

# Constants
SCREEN_WIDTH = 640
SCREEN_HEIGHT = 480
PADDLE_WIDTH = 100
PADDLE_HEIGHT = 10
BALL_RADIUS = 10
BRICK_WIDTH = 60
BRICK_HEIGHT = 20
BRICK_ROWS = 5
BRICK_COLS = 10
FPS = 40

# Colors
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
RED = (255, 0, 0)

# Initialize Pygame
pygame.init()

# Game classes
class Paddle:
    def __init__(self):
        self.rect = pygame.Rect(SCREEN_WIDTH // 2 - PADDLE_WIDTH // 2, SCREEN_HEIGHT - PADDLE_HEIGHT - 10, PADDLE_WIDTH, PADDLE_HEIGHT)

    def move(self, direction):
        if direction == -1:
            self.rect.x -= 10
        elif direction == 1:
            self.rect.x += 10
        self.rect.clamp_ip(pygame.Rect(0, 0, SCREEN_WIDTH, SCREEN_HEIGHT))

class Ball:
    def __init__(self):
        self.rect = pygame.Rect(SCREEN_WIDTH // 2 - BALL_RADIUS, SCREEN_HEIGHT // 2 - BALL_RADIUS, BALL_RADIUS * 2, BALL_RADIUS * 2)
        self.velocity = [random.choice([-5, 5]), -5]

    def move(self):
        self.rect.x += self.velocity[0]
        self.rect.y += self.velocity[1]

        if self.rect.left <= 0 or self.rect.right >= SCREEN_WIDTH:
            self.velocity[0] = -self.velocity[0]
        if self.rect.top <= 0:
            self.velocity[1] = -self.velocity[1]

    def reset(self):
        self.rect = pygame.Rect(SCREEN_WIDTH // 2 - BALL_RADIUS, SCREEN_HEIGHT // 2 - BALL_RADIUS, BALL_RADIUS * 2, BALL_RADIUS * 2)
        self.velocity = [random.choice([-5, 5]), -5]

class Brick:
    def __init__(self, x, y):
        self.rect = pygame.Rect(x, y, BRICK_WIDTH - 5, BRICK_HEIGHT - 5)

class ArkanoidEnv(gym.Env):
    def __init__(self, reward_size=1, penalty_size=-1, platform_reward=5):
        super(ArkanoidEnv, self).__init__()
        self.action_space = gym.spaces.Discrete(3)  # 0: stay, 1: move left, 2: move right
        self.observation_space = gym.spaces.Box(low=0, high=SCREEN_WIDTH, shape=(5 + BRICK_ROWS * BRICK_COLS * 2,), dtype=np.float32)
        self.reward_size = reward_size
        self.penalty_size = penalty_size
        self.platform_reward = platform_reward
        self.reset()

    def reset(self, seed=None, options=None):
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)
        self.paddle = Paddle()
        self.ball = Ball()
        self.bricks = [Brick(x, y) for y in range(BRICK_HEIGHT, BRICK_HEIGHT * (BRICK_ROWS + 1), BRICK_HEIGHT) 
                       for x in range(BRICK_WIDTH, SCREEN_WIDTH - BRICK_WIDTH, BRICK_WIDTH)]
        self.done = False
        self.score = 0
        return self._get_state(), {}

    def step(self, action):
        if action == 0:
            self.paddle.move(0)
        elif action == 1:
            self.paddle.move(-1)
        elif action == 2:
            self.paddle.move(1)

        self.ball.move()

        if self.ball.rect.colliderect(self.paddle.rect):
            self.ball.velocity[1] = -self.ball.velocity[1]
            self.score += self.platform_reward

        for brick in self.bricks[:]:
            if self.ball.rect.colliderect(brick.rect):
                self.bricks.remove(brick)
                self.ball.velocity[1] = -self.ball.velocity[1]
                self.score += 1
                reward = self.reward_size
                if not self.bricks:
                    reward += self.reward_size * 10  # Bonus reward for breaking all bricks
                    self.done = True
                    truncated = False
                    return self._get_state(), reward, self.done, truncated, {}

        if self.ball.rect.bottom >= SCREEN_HEIGHT:
            self.done = True
            reward = self.penalty_size
            truncated = False
        else:
            reward = 0
            truncated = False

        return self._get_state(), reward, self.done, truncated, {}

    def _get_state(self):
        state = [
            self.paddle.rect.x,
            self.ball.rect.x,
            self.ball.rect.y,
            self.ball.velocity[0],
            self.ball.velocity[1]
        ]
        for brick in self.bricks:
            state.extend([brick.rect.x, brick.rect.y])
        state.extend([0, 0] * (BRICK_ROWS * BRICK_COLS - len(self.bricks)))  # Padding for missing bricks
        return np.array(state, dtype=np.float32)

    def render(self, mode='rgb_array'):
        surface = pygame.Surface((SCREEN_WIDTH, SCREEN_HEIGHT))
        surface.fill(BLACK)
        pygame.draw.rect(surface, WHITE, self.paddle.rect)
        pygame.draw.ellipse(surface, WHITE, self.ball.rect)
        for brick in self.bricks:
            pygame.draw.rect(surface, RED, brick.rect)

        if mode == 'rgb_array':
            return pygame.surfarray.array3d(surface)
        elif mode == 'human':
            pygame.display.get_surface().blit(surface, (0, 0))
            pygame.display.flip()

    def close(self):
        pygame.quit()

# Training and playing with custom parameters
def train_and_play(reward_size, penalty_size, platform_reward, iterations):
    env = ArkanoidEnv(reward_size=reward_size, penalty_size=penalty_size, platform_reward=platform_reward)
    model = DQN('MlpPolicy', env, verbose=1)
    timesteps_per_update = min(1000, iterations)
    video_frames = []

    completed_iterations = 0
    while completed_iterations < iterations:
        steps = min(timesteps_per_update, iterations - completed_iterations)
        model.learn(total_timesteps=steps)
        completed_iterations += steps

        obs, _ = env.reset()
        done = False
        while not done:
            action, _states = model.predict(obs, deterministic=True)
            obs, reward, done, truncated, _ = env.step(action)

            frame = env.render(mode='rgb_array')
            frame = np.rot90(frame)
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            video_frames.append(frame)

    video_path = "arkanoid_training.mp4"
    video_writer = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), FPS, (SCREEN_WIDTH, SCREEN_HEIGHT))
    for frame in video_frames:
        video_writer.write(frame)
    video_writer.release()

    env.close()
    return video_path

# Main function with Gradio interface
def main():
    iface = gr.Interface(
        fn=train_and_play,
        inputs=[
            gr.Number(label="Reward Size", value=1),
            gr.Number(label="Penalty Size", value=-1),
            gr.Number(label="Platform Reward", value=5),
            gr.Slider(label="Iterations", minimum=10, maximum=100000, step=10, value=10000)
        ],
        outputs="video",
        live=False  # Disable auto-generation on slider changes
    )
    iface.launch()

if __name__ == "__main__":
    main()