Spaces:

Gomoku-Zero
/

Demo

Sleeping

App Files Files Community

sjz commited on Nov 22, 2023

Commit

2f21cdd

1 Parent(s): 70d6bef

fix AI Aid error when taking turn

Browse files

Files changed (11) hide show

.vscode/settings.json +4 -0
Gomoku_MCTS/__init__.py +1 -0
Gomoku_MCTS/__pycache__/__init__.cpython-38.pyc +0 -0
Gomoku_MCTS/__pycache__/dueling_net.cpython-38.pyc +0 -0
Gomoku_MCTS/__pycache__/mcts_alphaZero.cpython-38.pyc +0 -0
Gomoku_MCTS/__pycache__/mcts_pure.cpython-38.pyc +0 -0
Gomoku_MCTS/__pycache__/policy_value_net_pytorch.cpython-38.pyc +0 -0
Gomoku_MCTS/dueling_net.py +2 -1
Gomoku_MCTS/policy_value_net_pytorch.py +2 -1
__pycache__/const.cpython-38.pyc +0 -0
pages/Player_VS_AI.py +12 -5

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "editor.suggest.snippetsPreventQuickSuggestions": false,
+    "aiXcoder.showTrayIcon": true
+}

Gomoku_MCTS/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from .mcts_pure import MCTSPlayer as MCTSpure
 from .mcts_alphaZero import MCTSPlayer as alphazero
 from .dueling_net import PolicyValueNet
 import numpy as np

 from .mcts_pure import MCTSPlayer as MCTSpure
 from .mcts_alphaZero import MCTSPlayer as alphazero
 from .dueling_net import PolicyValueNet
+# from .policy_value_net_pytorch import PolicyValueNet
 import numpy as np

Gomoku_MCTS/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (5.51 kB). View file

Gomoku_MCTS/__pycache__/dueling_net.cpython-38.pyc ADDED Viewed

Binary file (4.72 kB). View file

Gomoku_MCTS/__pycache__/mcts_alphaZero.cpython-38.pyc ADDED Viewed

Binary file (8.09 kB). View file

Gomoku_MCTS/__pycache__/mcts_pure.cpython-38.pyc ADDED Viewed

Binary file (8.74 kB). View file

Gomoku_MCTS/__pycache__/policy_value_net_pytorch.cpython-38.pyc ADDED Viewed

Binary file (4.12 kB). View file

Gomoku_MCTS/dueling_net.py CHANGED Viewed

@@ -52,7 +52,7 @@ class DuelingDQNNet(nn.Module):
         return F.log_softmax(q_values, dim=1), val
 class PolicyValueNet():
-    """policy-value network """
     def __init__(self, board_width, board_height,
                  model_file=None, use_gpu=False):
         self.use_gpu = use_gpu
@@ -70,6 +70,7 @@ class PolicyValueNet():
         if model_file:
             net_params = torch.load(model_file)
             self.policy_value_net.load_state_dict(net_params, strict=False)
     def policy_value(self, state_batch):
         """

         return F.log_softmax(q_values, dim=1), val
 class PolicyValueNet():
+    """dueling policy-value network """
     def __init__(self, board_width, board_height,
                  model_file=None, use_gpu=False):
         self.use_gpu = use_gpu
         if model_file:
             net_params = torch.load(model_file)
             self.policy_value_net.load_state_dict(net_params, strict=False)
+            print('loaded dueling model file')
     def policy_value(self, state_batch):
         """

Gomoku_MCTS/policy_value_net_pytorch.py CHANGED Viewed

@@ -55,7 +55,7 @@ class Net(nn.Module):
 class PolicyValueNet():
-    """policy-value network """
     def __init__(self, board_width, board_height,
                  model_file=None, use_gpu=False):
         self.use_gpu = use_gpu
@@ -71,6 +71,7 @@ class PolicyValueNet():
         if model_file:
             net_params = torch.load(model_file)
             self.policy_value_net.load_state_dict(net_params)
     def policy_value(self, state_batch):
         """

 class PolicyValueNet():
+    """alphazero policy-value network """
     def __init__(self, board_width, board_height,
                  model_file=None, use_gpu=False):
         self.use_gpu = use_gpu
         if model_file:
             net_params = torch.load(model_file)
             self.policy_value_net.load_state_dict(net_params)
+            print('loaded model file')
     def policy_value(self, state_batch):
         """

__pycache__/const.cpython-38.pyc ADDED Viewed

Binary file (959 Bytes). View file

pages/Player_VS_AI.py CHANGED Viewed

@@ -46,7 +46,7 @@ class Room:
         self.WINNER = _BLANK
         self.TIME = time.time()
         self.MCTS = MCTSpure(c_puct=5, n_playout=10)
-        self.MCTS = alphazero(PolicyValueNet(_BOARD_SIZE, _BOARD_SIZE).policy_value_fn, c_puct=5, n_playout=10)
         self.COORDINATE_1D = [_BOARD_SIZE_1D + 1]
         self.current_move = -1
         self.simula_time_list = []
@@ -242,7 +242,7 @@ def gomoku():
         # normal play situation
         elif session_state.ROOM.WINNER == _BLANK:
             # session_state.ROOM = deepcopy(session_state.ROOM)
-            print("View of human player: ", session_state.ROOM.BOARD.board_map)
             move = session_state.ROOM.BOARD.location_to_move((x, y))
             session_state.ROOM.current_move = move
             session_state.ROOM.BOARD.do_move(move)
@@ -269,7 +269,7 @@ def gomoku():
     # Draw board
     def draw_board(response: bool):
         """construct each buttons for all cells of the board"""
-        if session_state.USE_AIAID and session_state.ROOM.WINNER == _BLANK:
             copy_mcts = deepcopy(session_state.ROOM.MCTS.mcts)
             _, acts, probs, simul_mean_time = copy_mcts.get_move_probs(session_state.ROOM.BOARD)
             sorted_acts_probs = sorted(zip(acts, probs), key=lambda x: x[1], reverse=True)
@@ -318,7 +318,7 @@ def gomoku():
                 time.sleep(0.1)
                 print("AI's turn")
                 print("Below are current board under AI's view")
-                print(session_state.ROOM.BOARD.board_map)
                 move, simul_time = session_state.ROOM.MCTS.get_action(session_state.ROOM.BOARD, return_time=True)
                 session_state.ROOM.simula_time_list.append(simul_time)
                 print("AI takes move: ", move)
@@ -332,6 +332,12 @@ def gomoku():
                 # session_state.ROOM.BOARD[gpt_i][gpt_j] = session_state.ROOM.TURN
                 session_state.ROOM.COORDINATE_1D.append(gpt_i * _BOARD_SIZE + gpt_j)
                 # construction of clickable buttons
                 for i, row in enumerate(session_state.ROOM.BOARD.board_map):
                     # print("row:", row)
@@ -390,7 +396,8 @@ def gomoku():
             session_state.ROOM.TIME = time.time()
         if not response or session_state.ROOM.WINNER != _BLANK:
-            print("Game over")
             for i, row in enumerate(session_state.ROOM.BOARD.board_map):
                 for j, cell in enumerate(row):
                     BOARD_PLATE[i][j].write(

         self.WINNER = _BLANK
         self.TIME = time.time()
         self.MCTS = MCTSpure(c_puct=5, n_playout=10)
+        self.MCTS = alphazero(PolicyValueNet(_BOARD_SIZE, _BOARD_SIZE, 'Gomoku_MCTS/checkpoints/best_policy_8_8_5_2torch.pth').policy_value_fn, c_puct=5, n_playout=100)
         self.COORDINATE_1D = [_BOARD_SIZE_1D + 1]
         self.current_move = -1
         self.simula_time_list = []
         # normal play situation
         elif session_state.ROOM.WINNER == _BLANK:
             # session_state.ROOM = deepcopy(session_state.ROOM)
+            # print("View of human player: ", session_state.ROOM.BOARD.board_map)
             move = session_state.ROOM.BOARD.location_to_move((x, y))
             session_state.ROOM.current_move = move
             session_state.ROOM.BOARD.do_move(move)
     # Draw board
     def draw_board(response: bool):
         """construct each buttons for all cells of the board"""
+        if session_state.USE_AIAID and session_state.ROOM.WINNER == _BLANK and session_state.ROOM.TURN == _BLACK:
             copy_mcts = deepcopy(session_state.ROOM.MCTS.mcts)
             _, acts, probs, simul_mean_time = copy_mcts.get_move_probs(session_state.ROOM.BOARD)
             sorted_acts_probs = sorted(zip(acts, probs), key=lambda x: x[1], reverse=True)
                 time.sleep(0.1)
                 print("AI's turn")
                 print("Below are current board under AI's view")
+                # print(session_state.ROOM.BOARD.board_map)
                 move, simul_time = session_state.ROOM.MCTS.get_action(session_state.ROOM.BOARD, return_time=True)
                 session_state.ROOM.simula_time_list.append(simul_time)
                 print("AI takes move: ", move)
                 # session_state.ROOM.BOARD[gpt_i][gpt_j] = session_state.ROOM.TURN
                 session_state.ROOM.COORDINATE_1D.append(gpt_i * _BOARD_SIZE + gpt_j)
+                copy_mcts = deepcopy(session_state.ROOM.MCTS.mcts)
+                _, acts, probs, simul_mean_time = copy_mcts.get_move_probs(session_state.ROOM.BOARD)
+                sorted_acts_probs = sorted(zip(acts, probs), key=lambda x: x[1], reverse=True)
+                top_five_acts = [act for act, prob in sorted_acts_probs[:5]]
+                top_five_probs = [prob for act, prob in sorted_acts_probs[:5]]
                 # construction of clickable buttons
                 for i, row in enumerate(session_state.ROOM.BOARD.board_map):
                     # print("row:", row)
             session_state.ROOM.TIME = time.time()
         if not response or session_state.ROOM.WINNER != _BLANK:
+            if session_state.ROOM.WINNER != _BLANK:
+                print("Game over")
             for i, row in enumerate(session_state.ROOM.BOARD.board_map):
                 for j, cell in enumerate(row):
                     BOARD_PLATE[i][j].write(