Spaces:

Ivan000
/

game

Sleeping

App Files Files Community

game / app.py

Ivan000

Update app.py

40ec26a verified 7 months ago

raw

history blame contribute delete

6.9 kB

	import os
	os.environ["XDG_RUNTIME_DIR"] = "/tmp"
	import numpy as np
	import pygame
	import random
	import gymnasium as gym
	from stable_baselines3 import DQN
	from stable_baselines3.common.evaluation import evaluate_policy
	import gradio as gr
	import cv2

	# Constants
	SCREEN_WIDTH = 640
	SCREEN_HEIGHT = 480
	PADDLE_WIDTH = 100
	PADDLE_HEIGHT = 10
	BALL_RADIUS = 10
	BRICK_WIDTH = 60
	BRICK_HEIGHT = 20
	BRICK_ROWS = 5
	BRICK_COLS = 10
	FPS = 40

	# Colors
	WHITE = (255, 255, 255)
	BLACK = (0, 0, 0)
	RED = (255, 0, 0)

	# Initialize Pygame
	pygame.init()

	# Game classes
	class Paddle:
	def __init__(self):
	self.rect = pygame.Rect(SCREEN_WIDTH // 2 - PADDLE_WIDTH // 2, SCREEN_HEIGHT - PADDLE_HEIGHT - 10, PADDLE_WIDTH, PADDLE_HEIGHT)

	def move(self, direction):
	if direction == -1:
	self.rect.x -= 10
	elif direction == 1:
	self.rect.x += 10
	self.rect.clamp_ip(pygame.Rect(0, 0, SCREEN_WIDTH, SCREEN_HEIGHT))

	class Ball:
	def __init__(self):
	self.rect = pygame.Rect(SCREEN_WIDTH // 2 - BALL_RADIUS, SCREEN_HEIGHT // 2 - BALL_RADIUS, BALL_RADIUS * 2, BALL_RADIUS * 2)
	self.velocity = [random.choice([-5, 5]), -5]

	def move(self):
	self.rect.x += self.velocity[0]
	self.rect.y += self.velocity[1]

	if self.rect.left <= 0 or self.rect.right >= SCREEN_WIDTH:
	self.velocity[0] = -self.velocity[0]
	if self.rect.top <= 0:
	self.velocity[1] = -self.velocity[1]

	def reset(self):
	self.rect = pygame.Rect(SCREEN_WIDTH // 2 - BALL_RADIUS, SCREEN_HEIGHT // 2 - BALL_RADIUS, BALL_RADIUS * 2, BALL_RADIUS * 2)
	self.velocity = [random.choice([-5, 5]), -5]

	class Brick:
	def __init__(self, x, y):
	self.rect = pygame.Rect(x, y, BRICK_WIDTH - 5, BRICK_HEIGHT - 5)

	class ArkanoidEnv(gym.Env):
	def __init__(self, reward_size=1, penalty_size=-1, platform_reward=5):
	super(ArkanoidEnv, self).__init__()
	self.action_space = gym.spaces.Discrete(3) # 0: stay, 1: move left, 2: move right
	self.observation_space = gym.spaces.Box(low=0, high=SCREEN_WIDTH, shape=(5 + BRICK_ROWS * BRICK_COLS * 2,), dtype=np.float32)
	self.reward_size = reward_size
	self.penalty_size = penalty_size
	self.platform_reward = platform_reward
	self.reset()

	def reset(self, seed=None, options=None):
	if seed is not None:
	random.seed(seed)
	np.random.seed(seed)
	self.paddle = Paddle()
	self.ball = Ball()
	self.bricks = [Brick(x, y) for y in range(BRICK_HEIGHT, BRICK_HEIGHT * (BRICK_ROWS + 1), BRICK_HEIGHT)
	for x in range(BRICK_WIDTH, SCREEN_WIDTH - BRICK_WIDTH, BRICK_WIDTH)]
	self.done = False
	self.score = 0
	return self._get_state(), {}

	def step(self, action):
	if action == 0:
	self.paddle.move(0)
	elif action == 1:
	self.paddle.move(-1)
	elif action == 2:
	self.paddle.move(1)

	self.ball.move()

	if self.ball.rect.colliderect(self.paddle.rect):
	self.ball.velocity[1] = -self.ball.velocity[1]
	self.score += self.platform_reward

	for brick in self.bricks[:]:
	if self.ball.rect.colliderect(brick.rect):
	self.bricks.remove(brick)
	self.ball.velocity[1] = -self.ball.velocity[1]
	self.score += 1
	reward = self.reward_size
	if not self.bricks:
	reward += self.reward_size * 10 # Bonus reward for breaking all bricks
	self.done = True
	truncated = False
	return self._get_state(), reward, self.done, truncated, {}

	if self.ball.rect.bottom >= SCREEN_HEIGHT:
	self.done = True
	reward = self.penalty_size
	truncated = False
	else:
	reward = 0
	truncated = False

	return self._get_state(), reward, self.done, truncated, {}

	def _get_state(self):
	state = [
	self.paddle.rect.x,
	self.ball.rect.x,
	self.ball.rect.y,
	self.ball.velocity[0],
	self.ball.velocity[1]
	]
	for brick in self.bricks:
	state.extend([brick.rect.x, brick.rect.y])
	state.extend([0, 0] * (BRICK_ROWS * BRICK_COLS - len(self.bricks))) # Padding for missing bricks
	return np.array(state, dtype=np.float32)

	def render(self, mode='rgb_array'):
	surface = pygame.Surface((SCREEN_WIDTH, SCREEN_HEIGHT))
	surface.fill(BLACK)
	pygame.draw.rect(surface, WHITE, self.paddle.rect)
	pygame.draw.ellipse(surface, WHITE, self.ball.rect)
	for brick in self.bricks:
	pygame.draw.rect(surface, RED, brick.rect)

	if mode == 'rgb_array':
	return pygame.surfarray.array3d(surface)
	elif mode == 'human':
	pygame.display.get_surface().blit(surface, (0, 0))
	pygame.display.flip()

	def close(self):
	pygame.quit()

	# Training and playing with custom parameters
	def train_and_play(reward_size, penalty_size, platform_reward, iterations):
	env = ArkanoidEnv(reward_size=reward_size, penalty_size=penalty_size, platform_reward=platform_reward)
	model = DQN('MlpPolicy', env, verbose=1)
	timesteps_per_update = min(1000, iterations)
	video_frames = []

	completed_iterations = 0
	while completed_iterations < iterations:
	steps = min(timesteps_per_update, iterations - completed_iterations)
	model.learn(total_timesteps=steps)
	completed_iterations += steps

	obs, _ = env.reset()
	done = False
	while not done:
	action, _states = model.predict(obs, deterministic=True)
	obs, reward, done, truncated, _ = env.step(action)

	frame = env.render(mode='rgb_array')
	frame = np.rot90(frame)
	frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
	video_frames.append(frame)

	video_path = "arkanoid_training.mp4"
	video_writer = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), FPS, (SCREEN_WIDTH, SCREEN_HEIGHT))
	for frame in video_frames:
	video_writer.write(frame)
	video_writer.release()

	env.close()
	return video_path

	# Main function with Gradio interface
	def main():
	iface = gr.Interface(
	fn=train_and_play,
	inputs=[
	gr.Number(label="Reward Size", value=1),
	gr.Number(label="Penalty Size", value=-1),
	gr.Number(label="Platform Reward", value=5),
	gr.Slider(label="Iterations", minimum=10, maximum=100000, step=10, value=10000)
	],
	outputs="video",
	live=False # Disable auto-generation on slider changes
	)
	iface.launch()

	if __name__ == "__main__":
	main()