Spaces:

Gomoku-Zero
/

Demo

Sleeping

File size: 7,768 Bytes

7d23b62

from .zobrist import ZobristCache as Zobrist
from .cache import Cache
from .eval import Evaluate, FIVE
from scipy import signal
import pickle
import os
save_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'train_data/data', 'train_data.pkl')

if 'numpy' not in globals():
    import numpy as np


class Board:
    def __init__(self, size=15, firstRole=1):
        self.size = size
        self.board = [[0] * self.size for _ in range(self.size)]
        self.firstRole = firstRole  # 1 for black, -1 for white
        self.role = firstRole  # 1 for black, -1 for white
        self.history = []
        self.zobrist = Zobrist(self.size)
        self.winnerCache = Cache()
        self.gameoverCache = Cache()
        self.evaluateCache = Cache()
        self.valuableMovesCache = Cache()
        self.evaluateTime = 0
        self.evaluator = Evaluate(self.size)
        self.available = [(i, j) for i in range(self.size) for j in range(self.size)]
        self.patterns = [np.ones((1, 5)), np.ones((5, 1)), np.eye(5), np.fliplr(np.eye(5))]
        self.train_data = {1:[], -1: []}
        if os.path.exists(save_path):
            with open(save_path, 'rb') as f:
                self.train_data = pickle.load(f)

    def isGameOver(self):
        # Checked
        hash = self.hash()
        if self.gameoverCache.get(hash):
            return self.gameoverCache.get(hash)
        if self.getWinner() != 0:
            self.gameoverCache.put(hash, True)
            # save train data
            # with open(save_path, 'wb') as f:
            #     pickle.dump(self.train_data, f)
            return True  # Someone has won
        # Game is over when there is no empty space on the board or someone has won
        if len(self.history) == self.size ** 2:
            self.gameoverCache.put(hash, True)
            return True
        else:
            self.gameoverCache.put(hash, False)
            return False

    def getWinner(self):
        # Checked
        hash = self.hash()
        flag = True
        if self.winnerCache.get(hash):
            return self.winnerCache.get(hash)
        directions = [[1, 0], [0, 1], [1, 1], [1, -1]]  # Horizontal, Vertical, Diagonal
        for i in range(self.size):
            for j in range(self.size):
                if self.board[i][j] == 0:
                    flag = False
                    continue
                for direction in directions:
                    count = 0
                    while (
                            0 <= i + direction[0] * count < self.size and
                            0 <= j + direction[1] * count < self.size and
                            self.board[i + direction[0] * count][j + direction[1] * count] == self.board[i][j]
                    ):
                        count += 1
                    if count >= 5:
                        self.winnerCache.put(hash, self.board[i][j])
                        return self.board[i][j]
        if flag:
            print("tie!!!")
            return 0
        self.winnerCache.put(hash, 0)
        return 0

    def getValidMoves(self):
        return self.available

    def put(self, i, j, role=None):
        # Checked
        if role is None:
            role = self.role
        if not isinstance(i, int) or not isinstance(j, int):
            print("Invalid move: Not Number!", i, j)
            return False
        if self.board[i][j] != 0:
            print("Invalid move!", i, j)
            return False
        self.board[i][j] = role
        self.available.remove((i, j))
        self.history.append({"i": i, "j": j, "role": role})
        self.zobrist.togglePiece(i, j, role)
        self.evaluator.move(i, j, role)
        self.role *= -1  # Switch role
        return True

    def undo(self):
        # Checked
        if len(self.history) == 0:
            print("No moves to undo!")
            return False

        lastMove = self.history.pop()
        self.board[lastMove['i']][lastMove['j']] = 0  # Remove the piece from the board
        self.role = lastMove['role']  # Switch back to the previous player
        self.zobrist.togglePiece(lastMove['i'], lastMove['j'], lastMove['role'])
        self.evaluator.undo(lastMove['i'], lastMove['j'])
        self.available.append((lastMove['i'], lastMove['j']))
        return True

    def position2coordinate(self, position):
        # checked
        row = position // self.size
        col = position % self.size
        return [row, col]

    def coordinate2position(self, coordinate):
        # Checked
        return coordinate[0] * self.size + coordinate[1]

    def getValuableMoves(self, role, depth=0, onlyThree=False, onlyFour=False):
        # Checked
        hash = self.hash()
        prev = self.valuableMovesCache.get(hash)
        if prev:
            if (prev["role"] == role and
                    prev["depth"] == depth and
                    prev["onlyThree"] == onlyThree
                    and prev["onlyFour"] == onlyFour):
                return prev["moves"]

        moves, train_data = self.evaluator.getMoves(role, depth, onlyThree, onlyFour)
        self.train_data[self.role].append(train_data)
        # Handle a special case, if the center point is not occupied, add it by default

        # 开局的时候随机走一步，增加开局的多样性
        if not onlyThree and not onlyFour:
            center = self.size // 2
            if self.board[center][center] == 0:
                moves.append((center, center))

            # x_step = np.random.randint(-self.size // 2, self.size // 2)
            # y_step = np.random.randint(-self.size // 2, self.size // 2)
            # x = center + x_step
            # y = center + y_step
            # if 0 <= x < self.size and 0 <= y < self.size and self.board[x][y] == 0:
            #     moves.append((x, y))

        self.valuableMovesCache.put(hash, {
            "role": role,
            "moves": moves,
            "depth": depth,
            "onlyThree": onlyThree,
            "onlyFour": onlyFour
        })
        return moves

    def display(self, extraPoints=[]):
        # Checked
        extraPosition = [self.coordinate2position(point) for point in extraPoints]
        result = ""
        for i in range(self.size):
            for j in range(self.size):
                position = self.coordinate2position([i, j])
                if position in extraPosition:
                    result += "? "
                    continue
                value = self.board[i][j]
                if value == 1:
                    result += "B " # Black
                elif value == -1:
                    result += "W " # White
                else:
                    result += "- "
            result += "\n"
        return result

    def hash(self):
        # Checked
        return self.zobrist.getHash()  # Return the hash value of the current board, used for caching

    def evaluate(self, role):
        # Checked
        hash_key = self.hash()
        prev = self.evaluateCache.get(hash_key)
        if prev:
            if prev["role"] == role:
                return prev["score"]

        winner = self.getWinner()
        score = 0
        if winner != 0:
            score = FIVE * winner * role
        else:
            score = self.evaluator.evaluate(role)

        self.evaluateCache.put(hash_key, {"role": role, "score": score})
        return score

    def reverse(self):
        # Checked
        new_board = Board(self.size, -self.firstRole)
        for move in self.history:
            x, y, role = move['i'], move['j'], move['role']
            new_board.put(x, y, -role)
        return new_board

    def toString(self):
        # Checked
        return ''.join([''.join(map(str, row)) for row in self.board])