""" FileName: app.py Author: Benhao Huang Create Date: 2023/11/19 Description: this file is used to display our project and add visualization elements to the game, using Streamlit """ import time import pandas as pd from copy import deepcopy import torch # import torch import numpy as np import streamlit as st from scipy.signal import convolve # this is used to check if any player wins from streamlit import session_state from streamlit.delta_generator import DeltaGenerator from streamlit_server_state import server_state, server_state_lock from Gomoku_MCTS import MCTSpure, alphazero, Board, PolicyValueNet_old, PolicyValueNet_new, duel_PolicyValueNet, \ Gumbel_MCTSPlayer from Gomoku_Bot import Gomoku_bot from Gomoku_Bot import Board as Gomoku_bot_board import matplotlib.pyplot as plt from const import ( _BLACK, # 1, for human _WHITE, # 2 , for AI _BLANK, _PLAYER_COLOR, _PLAYER_SYMBOL, _ROOM_COLOR, _VERTICAL, _NEW, _HORIZONTAL, _DIAGONAL_UP_LEFT, _DIAGONAL_UP_RIGHT, _BOARD_SIZE, _BOARD_SIZE_1D, _AI_AID_INFO, _MODEL_PATH ) ''' from ai import ( BOS_TOKEN_ID, generate_gpt2, load_model, ) gpt2 = load_model() ''' # Utils class Room: def __init__(self, room_id) -> None: self.ROOM_ID = room_id # self.BOARD = np.zeros(shape=(_BOARD_SIZE, _BOARD_SIZE), dtype=int) self.BOARD = Board(width=_BOARD_SIZE, height=_BOARD_SIZE, n_in_row=5, players=[_BLACK, _WHITE]) self.PLAYER = _BLACK self.TURN = self.PLAYER self.HISTORY = (0, 0) self.WINNER = _BLANK self.TIME = time.time() self.gomoku_bot_board = Gomoku_bot_board(_BOARD_SIZE, 1) self.MCTS_dict = {'Pure MCTS': MCTSpure(c_puct=5, n_playout=1000), 'AlphaZero': alphazero(PolicyValueNet_new(_BOARD_SIZE, _BOARD_SIZE, _MODEL_PATH["AlphaZero"]).policy_value_fn, c_puct=5, n_playout=100), 'duel': alphazero(duel_PolicyValueNet(_BOARD_SIZE, _BOARD_SIZE, _MODEL_PATH["duel"]).policy_value_fn, c_puct=5, n_playout=100), 'Gumbel AlphaZero': Gumbel_MCTSPlayer(PolicyValueNet_new(_BOARD_SIZE, _BOARD_SIZE, _MODEL_PATH["Gumbel AlphaZero"]).policy_value_fn, c_puct=5, n_playout=100, m_action=8), 'Gomoku Bot': Gomoku_bot(self.gomoku_bot_board, -1)} self.MCTS = self.MCTS_dict['AlphaZero'] self.last_mcts = self.MCTS self.AID_MCTS = self.MCTS_dict['AlphaZero'] self.COORDINATE_1D = [BOS_TOKEN_ID] self.current_move = -1 self.ai_simula_time_list = [] self.human_simula_time_list = [] def change_turn(cur): return cur % 2 + 1 # Initialize the game if "ROOM" not in session_state: session_state.ROOM = Room("local") if "OWNER" not in session_state: session_state.OWNER = False if "USE_AIAID" not in session_state: session_state.USE_AIAID = False # Check server health if "ROOMS" not in server_state: with server_state_lock["ROOMS"]: server_state.ROOMS = {} def handle_oppo_model_selection(): if st.session_state['selected_oppo_model'] == 'Gomoku Bot': session_state.ROOM.MCTS = session_state.ROOM.MCTS_dict['Gomoku Bot'] return else: TreeNode = session_state.ROOM.last_mcts.mcts._root new_mct = session_state.ROOM.MCTS_dict[st.session_state['selected_oppo_model']] new_mct.mcts._root = deepcopy(TreeNode) session_state.ROOM.MCTS = new_mct session_state.ROOM.last_mcts = new_mct return def handle_aid_model_selection(): if st.session_state['selected_aid_model'] == 'None': session_state.USE_AIAID = False return session_state.USE_AIAID = True TreeNode = session_state.ROOM.MCTS.mcts._root # use the same tree node new_mct = session_state.ROOM.MCTS_dict[st.session_state['selected_aid_model']] new_mct.mcts._root = deepcopy(TreeNode) session_state.ROOM.AID_MCTS = new_mct return if 'selected_oppo_model' not in st.session_state: st.session_state['selected_oppo_model'] = 'AlphaZero' # 默认值 if 'selected_aid_model' not in st.session_state: st.session_state['selected_aid_model'] = 'AlphaZero' # 默认值 # Layout TITLE = st.empty() Model_Switch = st.empty() TITLE.header("🤖 AI 3603 Gomoku") selected_oppo_option = Model_Switch.selectbox('Select Opponent Model', ['Pure MCTS', 'AlphaZero', 'Gomoku Bot', 'duel', 'Gumbel AlphaZero'], index=1, key='oppo_model') if st.session_state['selected_oppo_model'] != selected_oppo_option: st.session_state['selected_oppo_model'] = selected_oppo_option handle_oppo_model_selection() ROUND_INFO = st.empty() st.markdown("
", unsafe_allow_html=True) BOARD_PLATE = [ [cell.empty() for cell in st.columns([1 for _ in range(_BOARD_SIZE)])] for _ in range(_BOARD_SIZE) ] LOG = st.empty() # Sidebar SCORE_TAG = st.sidebar.empty() SCORE_PLATE = st.sidebar.columns(2) # History scores SCORE_TAG.subheader("Scores") PLAY_MODE_INFO = st.sidebar.container() MULTIPLAYER_TAG = st.sidebar.empty() with st.sidebar.container(): ANOTHER_ROUND = st.empty() RESTART = st.empty() GIVEIN = st.empty() AIAID = st.empty() EXIT = st.empty() selected_aid_option = AIAID.selectbox('Select Assistant Model', ['None', 'Pure MCTS', 'AlphaZero'], index=0, key='aid_model') if st.session_state['selected_aid_model'] != selected_aid_option: st.session_state['selected_aid_model'] = selected_aid_option handle_aid_model_selection() GAME_INFO = st.sidebar.container() message = st.empty() PLAY_MODE_INFO.write("---\n\n**You are Black, AI agent is White.**") GAME_INFO.markdown( """ --- # Freestyle Gomoku game. 🎲 - no restrictions 🚫 - no regrets 😎 - no regrets 😎 - swap players after one round is over 🔁 Powered by an AlphaZero approach with our own improvements! 🚀 For the specific details, please check out our report. ##### Adapted and improved by us! 🌟 Our Github repo """, unsafe_allow_html=True, ) def restart() -> None: """ Restart the game. """ session_state.ROOM = Room(session_state.ROOM.ROOM_ID) st.session_state['selected_oppo_model'] = 'AlphaZero' def givein() -> None: """ Give in to AI. """ session_state.ROOM = deepcopy(session_state.ROOM) session_state.ROOM.WINNER = _WHITE # add 1 score to AI session_state.ROOM.HISTORY = ( session_state.ROOM.HISTORY[0] + int(session_state.ROOM.WINNER == _WHITE), session_state.ROOM.HISTORY[1] + int(session_state.ROOM.WINNER == _BLACK), ) session_state.ROOM.BOARD = Board(width=_BOARD_SIZE, height=_BOARD_SIZE, n_in_row=5) session_state.ROOM.gomoku_bot_board = Gomoku_bot_board(_BOARD_SIZE, 1) session_state.ROOM.MCTS_dict = {'Pure MCTS': MCTSpure(c_puct=5, n_playout=1000), 'AlphaZero': alphazero(PolicyValueNet_new(_BOARD_SIZE, _BOARD_SIZE, _MODEL_PATH["AlphaZero"]).policy_value_fn, c_puct=5, n_playout=100), 'duel': alphazero(duel_PolicyValueNet(_BOARD_SIZE, _BOARD_SIZE, _MODEL_PATH["duel"]).policy_value_fn, c_puct=5, n_playout=100), 'Gumbel AlphaZero': Gumbel_MCTSPlayer(PolicyValueNet_new(_BOARD_SIZE, _BOARD_SIZE, _MODEL_PATH[ "Gumbel AlphaZero"]).policy_value_fn, c_puct=5, n_playout=100, m_action=8), 'Gomoku Bot': Gomoku_bot(session_state.ROOM.gomoku_bot_board, -1)} session_state.ROOM.MCTS = session_state.ROOM.MCTS_dict[st.session_state['selected_oppo_model']] session_state.ROOM.last_mcts = session_state.ROOM.MCTS session_state.ROOM.PLAYER = session_state.ROOM.PLAYER session_state.ROOM.TURN = session_state.ROOM.PLAYER session_state.ROOM.WINNER = _BLANK # 0 session_state.ROOM.ai_simula_time_list = [] session_state.ROOM.human_simula_time_list = [] session_state.ROOM.COORDINATE_1D = [BOS_TOKEN_ID] RESTART.button( "Reset", on_click=restart, help="Clear the board as well as the scores", ) GIVEIN.button( "Give in", on_click = givein, help="Give in to AI", ) # Draw the board def gomoku(): """ Draw the board. Handle the main logic. """ # Restart the game # Continue new round def another_round() -> None: """ Continue new round. """ session_state.ROOM = deepcopy(session_state.ROOM) session_state.ROOM.BOARD = Board(width=_BOARD_SIZE, height=_BOARD_SIZE, n_in_row=5) session_state.ROOM.gomoku_bot_board = Gomoku_bot_board(_BOARD_SIZE, 1) session_state.ROOM.MCTS_dict = {'Pure MCTS': MCTSpure(c_puct=5, n_playout=1000), 'AlphaZero': alphazero(PolicyValueNet_new(_BOARD_SIZE, _BOARD_SIZE, _MODEL_PATH["AlphaZero"]).policy_value_fn, c_puct=5, n_playout=100), 'duel': alphazero(duel_PolicyValueNet(_BOARD_SIZE, _BOARD_SIZE, _MODEL_PATH["duel"]).policy_value_fn, c_puct=5, n_playout=100), 'Gumbel AlphaZero': Gumbel_MCTSPlayer(PolicyValueNet_new(_BOARD_SIZE, _BOARD_SIZE, _MODEL_PATH["Gumbel AlphaZero"]).policy_value_fn, c_puct=5, n_playout=100, m_action=8), 'Gomoku Bot': Gomoku_bot(session_state.ROOM.gomoku_bot_board, -1)} session_state.ROOM.MCTS = session_state.ROOM.MCTS_dict[st.session_state['selected_oppo_model']] session_state.ROOM.last_mcts = session_state.ROOM.MCTS session_state.ROOM.PLAYER = session_state.ROOM.PLAYER session_state.ROOM.TURN = session_state.ROOM.PLAYER session_state.ROOM.WINNER = _BLANK # 0 session_state.ROOM.ai_simula_time_list = [] session_state.ROOM.human_simula_time_list = [] session_state.ROOM.COORDINATE_1D = [BOS_TOKEN_ID] # Room status sync def sync_room() -> bool: room_id = session_state.ROOM.ROOM_ID if room_id not in server_state.ROOMS.keys(): session_state.ROOM = Room("local") return False elif server_state.ROOMS[room_id].TIME == session_state.ROOM.TIME: return False elif server_state.ROOMS[room_id].TIME < session_state.ROOM.TIME: # Only acquire the lock when writing to the server state with server_state_lock["ROOMS"]: server_rooms = server_state.ROOMS server_rooms[room_id] = session_state.ROOM server_state.ROOMS = server_rooms return True else: session_state.ROOM = server_state.ROOMS[room_id] return True # Check if winner emerge from move def check_win() -> int: """ Use convolution to check if any player wins. """ vertical = convolve( session_state.ROOM.BOARD.board_map, _VERTICAL, mode="same", ) horizontal = convolve( session_state.ROOM.BOARD.board_map, _HORIZONTAL, mode="same", ) diagonal_up_left = convolve( session_state.ROOM.BOARD.board_map, _DIAGONAL_UP_LEFT, mode="same", ) diagonal_up_right = convolve( session_state.ROOM.BOARD.board_map, _DIAGONAL_UP_RIGHT, mode="same", ) if ( np.max( [ np.max(vertical), np.max(horizontal), np.max(diagonal_up_left), np.max(diagonal_up_right), ] ) == 5 * _BLACK ): winner = _BLACK elif ( np.min( [ np.min(vertical), np.min(horizontal), np.min(diagonal_up_left), np.min(diagonal_up_right), ] ) == 5 * _WHITE ): winner = _WHITE else: winner = _BLANK return winner # Triggers the board response on click def handle_click(x, y): """ Controls whether to pass on / continue current board / may start new round """ if session_state.ROOM.BOARD.board_map[x][y] != _BLANK: pass elif ( session_state.ROOM.ROOM_ID in server_state.ROOMS.keys() and _ROOM_COLOR[session_state.OWNER] != server_state.ROOMS[session_state.ROOM.ROOM_ID].TURN ): sync_room() # normal play situation elif session_state.ROOM.WINNER == _BLANK: # session_state.ROOM = deepcopy(session_state.ROOM) # print("View of human player: ", session_state.ROOM.BOARD.board_map) move = session_state.ROOM.BOARD.location_to_move((x, y)) session_state.ROOM.current_move = move session_state.ROOM.BOARD.do_move(move) # Gomoku Bot BOARD session_state.ROOM.MCTS_dict["Gomoku Bot"].board.put(_BOARD_SIZE - move // _BOARD_SIZE - 1, move % _BOARD_SIZE) # # this move starts from left up corner (0,0), however, the move in the game starts from left bottom corner (0,0) session_state.ROOM.BOARD.board_map[x][y] = session_state.ROOM.TURN session_state.ROOM.COORDINATE_1D.append(x * _BOARD_SIZE + y) session_state.ROOM.TURN = change_turn(session_state.ROOM.TURN) win, winner = session_state.ROOM.BOARD.game_end() if win: session_state.ROOM.WINNER = winner session_state.ROOM.HISTORY = ( session_state.ROOM.HISTORY[0] + int(session_state.ROOM.WINNER == _WHITE), session_state.ROOM.HISTORY[1] + int(session_state.ROOM.WINNER == _BLACK), ) session_state.ROOM.TIME = time.time() def forbid_click(x, y): # st.warning('This posistion has been occupied!!!!', icon="⚠️") st.error("({}, {}) has been occupied!!)".format(x, y), icon="🚨") # Draw board def draw_board(response: bool): """construct each buttons for all cells of the board""" if session_state.USE_AIAID and session_state.ROOM.WINNER == _BLANK and session_state.ROOM.TURN == _BLACK: if session_state.USE_AIAID: copy_mcts = deepcopy(session_state.ROOM.AID_MCTS.mcts) _, acts_aid, probs_aid, simul_mean_time_aid = copy_mcts.get_move_probs(session_state.ROOM.BOARD) sorted_acts_probs = sorted(zip(acts_aid, probs_aid), key=lambda x: x[1], reverse=True) top_five_acts = [act for act, prob in sorted_acts_probs[:5]] top_five_probs = [prob for act, prob in sorted_acts_probs[:5]] if response and session_state.ROOM.TURN == _BLACK: # human turn print("Your turn") # construction of clickable buttons cur_move = (session_state.ROOM.current_move // _BOARD_SIZE, session_state.ROOM.current_move % _BOARD_SIZE) for i, row in enumerate(session_state.ROOM.BOARD.board_map): # print("row:", row) for j, cell in enumerate(row): if ( i * _BOARD_SIZE + j in (session_state.ROOM.COORDINATE_1D) ): if i == cur_move[0] and j == cur_move[1]: BOARD_PLATE[i][j].button( _PLAYER_SYMBOL[_NEW], key=f"{i}:{j}", args=(i, j), on_click=forbid_click, ) else: # disable click for GPT choices BOARD_PLATE[i][j].button( _PLAYER_SYMBOL[cell], key=f"{i}:{j}", args=(i, j), on_click=forbid_click ) else: if session_state.USE_AIAID and i * _BOARD_SIZE + j in top_five_acts: # enable click for other cells available for human choices prob = top_five_probs[top_five_acts.index(i * _BOARD_SIZE + j)] BOARD_PLATE[i][j].button( _PLAYER_SYMBOL[cell] + f"({round(prob, 2)})", key=f"{i}:{j}", on_click=handle_click, args=(i, j), ) else: # enable click for other cells available for human choices BOARD_PLATE[i][j].button( _PLAYER_SYMBOL[cell], key=f"{i}:{j}", on_click=handle_click, args=(i, j), ) elif response and session_state.ROOM.TURN == _WHITE: # AI turn message.empty() with st.spinner('🔮✨ Waiting for AI response... ⏳🚀'): time.sleep(0.1) print("AI's turn") print("Below are current board under AI's view") # print(session_state.ROOM.BOARD.board_map) # move = _BOARD_SIZE * _BOARD_SIZE # forbid = [] # step = 0.1 # tmp = 0.7 # while move >= _BOARD_SIZE * _BOARD_SIZE or move in session_state.ROOM.COORDINATE_1D: # # gpt_predictions = generate_gpt2( # gpt2, # torch.tensor(session_state.ROOM.COORDINATE_1D).unsqueeze(0), # tmp # ) # print(gpt_predictions) # move = gpt_predictions[len(session_state.ROOM.COORDINATE_1D)] # print(move) # tmp += step # # if move >= _BOARD_SIZE * _BOARD_SIZE: # # forbid.append(move) # # else: # # break # # # gpt_response = move # gpt_i, gpt_j = gpt_response // _BOARD_SIZE, gpt_response % _BOARD_SIZE # print(gpt_i, gpt_j) # # session_state.ROOM.BOARD[gpt_i][gpt_j] = session_state.ROOM.TURN # # simul_time = 0 if st.session_state['selected_oppo_model'] != 'Gomoku Bot': move, simul_time = session_state.ROOM.MCTS.get_action(session_state.ROOM.BOARD, return_time=True) else: move, simul_time = session_state.ROOM.MCTS.get_action(return_time=True) session_state.ROOM.ai_simula_time_list.append(simul_time) print("AI takes move: ", move) session_state.ROOM.current_move = move gpt_response = move gpt_i, gpt_j = gpt_response // _BOARD_SIZE, gpt_response % _BOARD_SIZE print("AI's move is located at ({}, {}) :".format(gpt_i, gpt_j)) move = session_state.ROOM.BOARD.location_to_move((gpt_i, gpt_j)) print("Location to move: ", move) # print("Location to move: ", move) # MCTS BOARD session_state.ROOM.BOARD.do_move(move) # Gomoku Bot BOARD session_state.ROOM.MCTS_dict["Gomoku Bot"].board.put(_BOARD_SIZE - 1 - move // _BOARD_SIZE, move % _BOARD_SIZE) # session_state.ROOM.BOARD[gpt_i][gpt_j] = session_state.ROOM.TURN session_state.ROOM.COORDINATE_1D.append(gpt_i * _BOARD_SIZE + gpt_j) if not session_state.ROOM.BOARD.game_end()[0]: if session_state.USE_AIAID: copy_mcts = deepcopy(session_state.ROOM.AID_MCTS.mcts) _, acts_aid, probs_aid, simul_mean_time_aid = copy_mcts.get_move_probs(session_state.ROOM.BOARD) sorted_acts_probs = sorted(zip(acts_aid, probs_aid), key=lambda x: x[1], reverse=True) top_five_acts = [act for act, prob in sorted_acts_probs[:5]] top_five_probs = [prob for act, prob in sorted_acts_probs[:5]] else: top_five_acts = [] top_five_probs = [] # construction of clickable buttons for i, row in enumerate(session_state.ROOM.BOARD.board_map): # print("row:", row) for j, cell in enumerate(row): if ( i * _BOARD_SIZE + j in (session_state.ROOM.COORDINATE_1D) ): if i == gpt_i and j == gpt_j: BOARD_PLATE[i][j].button( _PLAYER_SYMBOL[_NEW], key=f"{i}:{j}", args=(i, j), on_click=handle_click, ) else: # disable click for GPT choices BOARD_PLATE[i][j].button( _PLAYER_SYMBOL[cell], key=f"{i}:{j}", args=(i, j), on_click=forbid_click ) else: if session_state.USE_AIAID and i * _BOARD_SIZE + j in top_five_acts and not \ session_state.ROOM.BOARD.game_end()[0]: # enable click for other cells available for human choices prob = top_five_probs[top_five_acts.index(i * _BOARD_SIZE + j)] BOARD_PLATE[i][j].button( _PLAYER_SYMBOL[cell] + f"({round(prob, 2)})", key=f"{i}:{j}", on_click=handle_click, args=(i, j), ) else: # enable click for other cells available for human choices BOARD_PLATE[i][j].button( _PLAYER_SYMBOL[cell], key=f"{i}:{j}", on_click=handle_click, args=(i, j), ) message.markdown( 'AI agent has calculated its strategy, which takes {:.3e}s per simulation.'.format( simul_time), unsafe_allow_html=True ) LOG.subheader("Logs") # change turn session_state.ROOM.TURN = change_turn(session_state.ROOM.TURN) # session_state.ROOM.WINNER = check_win() win, winner = session_state.ROOM.BOARD.game_end() if win: session_state.ROOM.WINNER = winner session_state.ROOM.HISTORY = ( session_state.ROOM.HISTORY[0] + int(session_state.ROOM.WINNER == _WHITE), session_state.ROOM.HISTORY[1] + int(session_state.ROOM.WINNER == _BLACK), ) session_state.ROOM.TIME = time.time() if not response or session_state.ROOM.WINNER != _BLANK: if session_state.ROOM.WINNER != _BLANK: print("Game over") for i, row in enumerate(session_state.ROOM.BOARD.board_map): for j, cell in enumerate(row): BOARD_PLATE[i][j].write( _PLAYER_SYMBOL[cell], # key=f"{i}:{j}", ) # Game process control def game_control(): if session_state.ROOM.WINNER != _BLANK: draw_board(False) else: draw_board(True) if session_state.ROOM.WINNER != _BLANK or 0 not in session_state.ROOM.BOARD.board_map: GIVEIN.empty() ANOTHER_ROUND.button( "Play Next round!", on_click=another_round, help="Clear board and swap first player", ) # Infos def update_info() -> None: # Additional information SCORE_PLATE[0].metric("Gomoku-Agent", session_state.ROOM.HISTORY[0]) SCORE_PLATE[1].metric("Black", session_state.ROOM.HISTORY[1]) if session_state.ROOM.WINNER != _BLANK: st.balloons() ROUND_INFO.write( f"#### **{_PLAYER_COLOR[session_state.ROOM.WINNER]} WIN!**\n**Click buttons on the left for more plays.**" ) # elif 0 not in session_state.ROOM.BOARD.board_map: # ROUND_INFO.write("#### **Tie**") # else: # ROUND_INFO.write( # f"#### **{_PLAYER_SYMBOL[session_state.ROOM.TURN]} {_PLAYER_COLOR[session_state.ROOM.TURN]}'s turn...**" # ) # draw the plot for simulation time # 创建一个 DataFrame # print(session_state.ROOM.ai_simula_time_list) st.markdown("
", unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) chart_data = pd.DataFrame(session_state.ROOM.ai_simula_time_list, columns=["Simulation Time"]) st.line_chart(chart_data) game_control() update_info() if __name__ == "__main__": gomoku()