game / app.py
Ivan000's picture
Update app.py
40ec26a verified
import os
os.environ["XDG_RUNTIME_DIR"] = "/tmp"
import numpy as np
import pygame
import random
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
import gradio as gr
import cv2
# Constants
SCREEN_WIDTH = 640
SCREEN_HEIGHT = 480
PADDLE_WIDTH = 100
PADDLE_HEIGHT = 10
BALL_RADIUS = 10
BRICK_WIDTH = 60
BRICK_HEIGHT = 20
BRICK_ROWS = 5
BRICK_COLS = 10
FPS = 40
# Colors
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
RED = (255, 0, 0)
# Initialize Pygame
pygame.init()
# Game classes
class Paddle:
def __init__(self):
self.rect = pygame.Rect(SCREEN_WIDTH // 2 - PADDLE_WIDTH // 2, SCREEN_HEIGHT - PADDLE_HEIGHT - 10, PADDLE_WIDTH, PADDLE_HEIGHT)
def move(self, direction):
if direction == -1:
self.rect.x -= 10
elif direction == 1:
self.rect.x += 10
self.rect.clamp_ip(pygame.Rect(0, 0, SCREEN_WIDTH, SCREEN_HEIGHT))
class Ball:
def __init__(self):
self.rect = pygame.Rect(SCREEN_WIDTH // 2 - BALL_RADIUS, SCREEN_HEIGHT // 2 - BALL_RADIUS, BALL_RADIUS * 2, BALL_RADIUS * 2)
self.velocity = [random.choice([-5, 5]), -5]
def move(self):
self.rect.x += self.velocity[0]
self.rect.y += self.velocity[1]
if self.rect.left <= 0 or self.rect.right >= SCREEN_WIDTH:
self.velocity[0] = -self.velocity[0]
if self.rect.top <= 0:
self.velocity[1] = -self.velocity[1]
def reset(self):
self.rect = pygame.Rect(SCREEN_WIDTH // 2 - BALL_RADIUS, SCREEN_HEIGHT // 2 - BALL_RADIUS, BALL_RADIUS * 2, BALL_RADIUS * 2)
self.velocity = [random.choice([-5, 5]), -5]
class Brick:
def __init__(self, x, y):
self.rect = pygame.Rect(x, y, BRICK_WIDTH - 5, BRICK_HEIGHT - 5)
class ArkanoidEnv(gym.Env):
def __init__(self, reward_size=1, penalty_size=-1, platform_reward=5):
super(ArkanoidEnv, self).__init__()
self.action_space = gym.spaces.Discrete(3) # 0: stay, 1: move left, 2: move right
self.observation_space = gym.spaces.Box(low=0, high=SCREEN_WIDTH, shape=(5 + BRICK_ROWS * BRICK_COLS * 2,), dtype=np.float32)
self.reward_size = reward_size
self.penalty_size = penalty_size
self.platform_reward = platform_reward
self.reset()
def reset(self, seed=None, options=None):
if seed is not None:
random.seed(seed)
np.random.seed(seed)
self.paddle = Paddle()
self.ball = Ball()
self.bricks = [Brick(x, y) for y in range(BRICK_HEIGHT, BRICK_HEIGHT * (BRICK_ROWS + 1), BRICK_HEIGHT)
for x in range(BRICK_WIDTH, SCREEN_WIDTH - BRICK_WIDTH, BRICK_WIDTH)]
self.done = False
self.score = 0
return self._get_state(), {}
def step(self, action):
if action == 0:
self.paddle.move(0)
elif action == 1:
self.paddle.move(-1)
elif action == 2:
self.paddle.move(1)
self.ball.move()
if self.ball.rect.colliderect(self.paddle.rect):
self.ball.velocity[1] = -self.ball.velocity[1]
self.score += self.platform_reward
for brick in self.bricks[:]:
if self.ball.rect.colliderect(brick.rect):
self.bricks.remove(brick)
self.ball.velocity[1] = -self.ball.velocity[1]
self.score += 1
reward = self.reward_size
if not self.bricks:
reward += self.reward_size * 10 # Bonus reward for breaking all bricks
self.done = True
truncated = False
return self._get_state(), reward, self.done, truncated, {}
if self.ball.rect.bottom >= SCREEN_HEIGHT:
self.done = True
reward = self.penalty_size
truncated = False
else:
reward = 0
truncated = False
return self._get_state(), reward, self.done, truncated, {}
def _get_state(self):
state = [
self.paddle.rect.x,
self.ball.rect.x,
self.ball.rect.y,
self.ball.velocity[0],
self.ball.velocity[1]
]
for brick in self.bricks:
state.extend([brick.rect.x, brick.rect.y])
state.extend([0, 0] * (BRICK_ROWS * BRICK_COLS - len(self.bricks))) # Padding for missing bricks
return np.array(state, dtype=np.float32)
def render(self, mode='rgb_array'):
surface = pygame.Surface((SCREEN_WIDTH, SCREEN_HEIGHT))
surface.fill(BLACK)
pygame.draw.rect(surface, WHITE, self.paddle.rect)
pygame.draw.ellipse(surface, WHITE, self.ball.rect)
for brick in self.bricks:
pygame.draw.rect(surface, RED, brick.rect)
if mode == 'rgb_array':
return pygame.surfarray.array3d(surface)
elif mode == 'human':
pygame.display.get_surface().blit(surface, (0, 0))
pygame.display.flip()
def close(self):
pygame.quit()
# Training and playing with custom parameters
def train_and_play(reward_size, penalty_size, platform_reward, iterations):
env = ArkanoidEnv(reward_size=reward_size, penalty_size=penalty_size, platform_reward=platform_reward)
model = DQN('MlpPolicy', env, verbose=1)
timesteps_per_update = min(1000, iterations)
video_frames = []
completed_iterations = 0
while completed_iterations < iterations:
steps = min(timesteps_per_update, iterations - completed_iterations)
model.learn(total_timesteps=steps)
completed_iterations += steps
obs, _ = env.reset()
done = False
while not done:
action, _states = model.predict(obs, deterministic=True)
obs, reward, done, truncated, _ = env.step(action)
frame = env.render(mode='rgb_array')
frame = np.rot90(frame)
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
video_frames.append(frame)
video_path = "arkanoid_training.mp4"
video_writer = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), FPS, (SCREEN_WIDTH, SCREEN_HEIGHT))
for frame in video_frames:
video_writer.write(frame)
video_writer.release()
env.close()
return video_path
# Main function with Gradio interface
def main():
iface = gr.Interface(
fn=train_and_play,
inputs=[
gr.Number(label="Reward Size", value=1),
gr.Number(label="Penalty Size", value=-1),
gr.Number(label="Platform Reward", value=5),
gr.Slider(label="Iterations", minimum=10, maximum=100000, step=10, value=10000)
],
outputs="video",
live=False # Disable auto-generation on slider changes
)
iface.launch()
if __name__ == "__main__":
main()