Reusing a Tkinter window for a game of Tic Tac Toe

Question

I've written a program (listed below) which plays Tic Tic Toe with a Tkinter GUI. If I invoke it like this:

root = tk.Tk()
root.title("Tic Tac Toe")

player1 = QPlayer(mark="X")
player2 = QPlayer(mark="O")

human_player = HumanPlayer(mark="X")
player2.epsilon = 0         # For playing the actual match, disable exploratory moves

game = Game(root, player1=human_player, player2=player2)
game.play()
root.mainloop()

it works as expected and the HumanPlayer can play against player2, which is a computer player (specifically, a QPlayer). The figure below shows how the HumanPlayer (with mark "X") easily wins.

In order to improve the performance of the QPlayer, I'd like to 'train' it by allowing it to play against an instance of itself before playing against the human player. I've tried modifying the above code as follows:

root = tk.Tk()
root.title("Tic Tac Toe")

player1 = QPlayer(mark="X")
player2 = QPlayer(mark="O")

for _ in range(1):              # Play a couple of training games
    training_game = Game(root, player1, player2)
    training_game.play()
    training_game.reset()

human_player = HumanPlayer(mark="X")
player2.epsilon = 0         # For playing the actual match, disable exploratory moves

game = Game(root, player1=human_player, player2=player2)
game.play()
root.mainloop()

What I then find, however, is that the Tkinter window contains two Tic Tac Toe boards (depicted below), and the buttons of the second board are unresponsive.

In the above code, the reset() method is the same one as used in the callback of the "Reset" button, which usually makes the board blank again to start over. I don't understand why I'm seeing two boards (of which one is unresponsive) instead of a single, responsive board?

For reference, the full code of the Tic Tac Toe program is listed below (with the 'offensive' lines of code commented out):

import numpy as np
import Tkinter as tk
import copy

class Game:
    def __init__(self, master, player1, player2, Q_learn=None, Q={}, alpha=0.3, gamma=0.9):
        frame = tk.Frame()
        frame.grid()
        self.master = master
        self.player1 = player1
        self.player2 = player2
        self.current_player = player1
        self.other_player = player2
        self.empty_text = ""
        self.board = Board()

        self.buttons = [[None for _ in range(3)] for _ in range(3)]
        for i in range(3):
            for j in range(3):
                self.buttons[i][j] = tk.Button(frame, height=3, width=3, text=self.empty_text, command=lambda i=i, j=j: self.callback(self.buttons[i][j]))
                self.buttons[i][j].grid(row=i, column=j)

        self.reset_button = tk.Button(text="Reset", command=self.reset)
        self.reset_button.grid(row=3)

        self.Q_learn = Q_learn
        self.Q_learn_or_not()
        if self.Q_learn:
            self.Q = Q
            self.alpha = alpha          # Learning rate
            self.gamma = gamma          # Discount rate
            self.share_Q_with_players()

    def Q_learn_or_not(self):       # If either player is a QPlayer, turn on Q-learning
        if self.Q_learn is None:
            if isinstance(self.player1, QPlayer) or isinstance(self.player2, QPlayer):
                self.Q_learn = True

    def share_Q_with_players(self):             # The action value table Q is shared with the QPlayers to help them make their move decisions
        if isinstance(self.player1, QPlayer):
            self.player1.Q = self.Q
        if isinstance(self.player2, QPlayer):
            self.player2.Q = self.Q

    def callback(self, button):
        if self.board.over():
            pass                # Do nothing if the game is already over
        else:
            if isinstance(self.current_player, HumanPlayer) and isinstance(self.other_player, HumanPlayer):
                if self.empty(button):
                    move = self.get_move(button)
                    self.handle_move(move)
            elif isinstance(self.current_player, HumanPlayer) and isinstance(self.other_player, ComputerPlayer):
                computer_player = self.other_player
                if self.empty(button):
                    human_move = self.get_move(button)
                    self.handle_move(human_move)
                    if not self.board.over():               # Trigger the computer's next move
                        computer_move = computer_player.get_move(self.board)
                        self.handle_move(computer_move)

    def empty(self, button):
        return button["text"] == self.empty_text

    def get_move(self, button):
        info = button.grid_info()
        move = (info["row"], info["column"])                # Get move coordinates from the button's metadata
        return move

    def handle_move(self, move):
        try:
            if self.Q_learn:
                self.learn_Q(move)
            i, j = move         # Get row and column number of the corresponding button
            self.buttons[i][j].configure(text=self.current_player.mark)     # Change the label on the button to the current player's mark
            self.board.place_mark(move, self.current_player.mark)           # Update the board
            if self.board.over():
                self.declare_outcome()
            else:
                self.switch_players()
        except:
            print "There was an error handling the move."
            pass        # This might occur if no moves are available and the game is already over

    def declare_outcome(self):
        if self.board.winner() is None:
            print "Cat's game."
        else:
            print "The game is over. The player with mark %s won!" % self.current_player.mark

    def reset(self):
        print "Resetting..."
        for i in range(3):
            for j in range(3):
                self.buttons[i][j].configure(text=self.empty_text)
        self.board = Board(grid=np.ones((3,3))*np.nan)
        self.current_player = self.player1
        self.other_player = self.player2
        # np.random.seed(seed=0)      # Set the random seed to zero to see the Q-learning 'in action' or for debugging purposes
        self.play()

    def switch_players(self):
        if self.current_player == self.player1:
            self.current_player = self.player2
            self.other_player = self.player1
        else:
            self.current_player = self.player1
            self.other_player = self.player2

    def play(self):
        if isinstance(self.player1, HumanPlayer) and isinstance(self.player2, HumanPlayer):
            pass        # For human vs. human, play relies on the callback from button presses
        elif isinstance(self.player1, HumanPlayer) and isinstance(self.player2, ComputerPlayer):
            pass
        elif isinstance(self.player1, ComputerPlayer) and isinstance(self.player2, HumanPlayer):
            first_computer_move = player1.get_move(self.board)      # If player 1 is a computer, it needs to be triggered to make the first move.
            self.handle_move(first_computer_move)
        elif isinstance(self.player1, ComputerPlayer) and isinstance(self.player2, ComputerPlayer):
            while not self.board.over():        # Make the two computer players play against each other without button presses
                move = self.current_player.get_move(self.board)
                self.handle_move(move)

    def learn_Q(self, move):                        # If Q-learning is toggled on, "learn_Q" should be called after receiving a move from an instance of Player and before implementing the move (using Board's "place_mark" method)
        state_key = QPlayer.make_and_maybe_add_key(self.board, self.current_player.mark, self.Q)
        next_board = self.board.get_next_board(move, self.current_player.mark)
        reward = next_board.give_reward()
        next_state_key = QPlayer.make_and_maybe_add_key(next_board, self.other_player.mark, self.Q)
        if next_board.over():
            expected = reward
        else:
            next_Qs = self.Q[next_state_key]             # The Q values represent the expected future reward for player X for each available move in the next state (after the move has been made)
            if self.current_player.mark == "X":
                expected = reward + (self.gamma * min(next_Qs.values()))        # If the current player is X, the next player is O, and the move with the minimum Q value should be chosen according to our "sign convention"
            elif self.current_player.mark == "O":
                expected = reward + (self.gamma * max(next_Qs.values()))        # If the current player is O, the next player is X, and the move with the maximum Q vlue should be chosen
        change = self.alpha * (expected - self.Q[state_key][move])
        self.Q[state_key][move] += change


class Board:
    def __init__(self, grid=np.ones((3,3))*np.nan):
        self.grid = grid

    def winner(self):
        rows = [self.grid[i,:] for i in range(3)]
        cols = [self.grid[:,j] for j in range(3)]
        diag = [np.array([self.grid[i,i] for i in range(3)])]
        cross_diag = [np.array([self.grid[2-i,i] for i in range(3)])]
        lanes = np.concatenate((rows, cols, diag, cross_diag))      # A "lane" is defined as a row, column, diagonal, or cross-diagonal

        any_lane = lambda x: any([np.array_equal(lane, x) for lane in lanes])   # Returns true if any lane is equal to the input argument "x"
        if any_lane(np.ones(3)):
            return "X"
        elif any_lane(np.zeros(3)):
            return "O"

    def over(self):             # The game is over if there is a winner or if no squares remain empty (cat's game)
        return (not np.any(np.isnan(self.grid))) or (self.winner() is not None)

    def place_mark(self, move, mark):       # Place a mark on the board
        num = Board.mark2num(mark)
        self.grid[tuple(move)] = num

    @staticmethod
    def mark2num(mark):         # Convert's a player's mark to a number to be inserted in the Numpy array representing the board. The mark must be either "X" or "O".
        d = {"X": 1, "O": 0}
        return d[mark]

    def available_moves(self):
        return [(i,j) for i in range(3) for j in range(3) if np.isnan(self.grid[i][j])]

    def get_next_board(self, move, mark):
        next_board = copy.deepcopy(self)
        next_board.place_mark(move, mark)
        return next_board

    def make_key(self, mark):          # For Q-learning, returns a 10-character string representing the state of the board and the player whose turn it is
        fill_value = 9
        filled_grid = copy.deepcopy(self.grid)
        np.place(filled_grid, np.isnan(filled_grid), fill_value)
        return "".join(map(str, (map(int, filled_grid.flatten())))) + mark

    def give_reward(self):                          # Assign a reward for the player with mark X in the current board position.
        if self.over():
            if self.winner() is not None:
                if self.winner() == "X":
                    return 1.0                      # Player X won -> positive reward
                elif self.winner() == "O":
                    return -1.0                     # Player O won -> negative reward
            else:
                return 0.5                          # A smaller positive reward for cat's game
        else:
            return 0.0                              # No reward if the game is not yet finished


class Player(object):
    def __init__(self, mark):
        self.mark = mark
        self.get_opponent_mark()

    def get_opponent_mark(self):
        if self.mark == 'X':
            self.opponent_mark = 'O'
        elif self.mark == 'O':
            self.opponent_mark = 'X'
        else:
            print "The player's mark must be either 'X' or 'O'."

class HumanPlayer(Player):
    def __init__(self, mark):
        super(HumanPlayer, self).__init__(mark=mark)

class ComputerPlayer(Player):
    def __init__(self, mark):
        super(ComputerPlayer, self).__init__(mark=mark)

class RandomPlayer(ComputerPlayer):
    def __init__(self, mark):
        super(RandomPlayer, self).__init__(mark=mark)

    @staticmethod
    def get_move(board):
        moves = board.available_moves()
        if moves:   # If "moves" is not an empty list (as it would be if cat's game were reached)
            return moves[np.random.choice(len(moves))]    # Apply random selection to the index, as otherwise it will be seen as a 2D array

class THandPlayer(ComputerPlayer):
    def __init__(self, mark):
        super(THandPlayer, self).__init__(mark=mark)

    def get_move(self, board):
        moves = board.available_moves()
        if moves:
            for move in moves:
                if THandPlayer.next_move_winner(board, move, self.mark):
                    return move
                elif THandPlayer.next_move_winner(board, move, self.opponent_mark):
                    return move
            else:
                return RandomPlayer.get_move(board)

    @staticmethod
    def next_move_winner(board, move, mark):
        return board.get_next_board(move, mark).winner() == mark


class QPlayer(ComputerPlayer):
    def __init__(self, mark, Q={}, epsilon=0.2):
        super(QPlayer, self).__init__(mark=mark)
        self.Q = Q
        self.epsilon = epsilon

    def get_move(self, board):
        if np.random.uniform() < self.epsilon:              # With probability epsilon, choose a move at random ("epsilon-greedy" exploration)
            return RandomPlayer.get_move(board)
        else:
            state_key = QPlayer.make_and_maybe_add_key(board, self.mark, self.Q)
            Qs = self.Q[state_key]

            if self.mark == "X":
                return QPlayer.stochastic_argminmax(Qs, max)
            elif self.mark == "O":
                return QPlayer.stochastic_argminmax(Qs, min)

    @staticmethod
    def make_and_maybe_add_key(board, mark, Q):     # Make a dictionary key for the current state (board + player turn) and if Q does not yet have it, add it to Q
        state_key = board.make_key(mark)
        if Q.get(state_key) is None:
            moves = board.available_moves()
            Q[state_key] = {move: 0.0 for move in moves}    # The available moves in each state are initially given a default value of zero
        return state_key

    @staticmethod
    def stochastic_argminmax(Qs, min_or_max):       # Determines either the argmin or argmax of the array Qs such that if there are 'ties', one is chosen at random
        min_or_maxQ = min_or_max(Qs.values())
        if Qs.values().count(min_or_maxQ) > 1:      # If there is more than one move corresponding to the maximum Q-value, choose one at random
            best_options = [move for move in Qs.keys() if Qs[move] == min_or_maxQ]
            move = best_options[np.random.choice(len(best_options))]
        else:
            move = min_or_max(Qs, key=Qs.get)
        return move


root = tk.Tk()
root.title("Tic Tac Toe")

player1 = QPlayer(mark="X")
player2 = QPlayer(mark="O")

# for _ in range(1):              # Play a couple of training games
#     training_game = Game(root, player1, player2)
#     training_game.play()
#     training_game.reset()

human_player = HumanPlayer(mark="X")
player2.epsilon = 0         # For playing the actual match, disable exploratory moves

game = Game(root, player1=human_player, player2=player2)
game.play()
root.mainloop()

patthoyts · Accepted Answer

It looks like you only need to create the board one time as the reset method resets it for the new players. Each type you create a Game instance, you create a new Tk frame so you either need to destroy the old one or you can reuse the windows by not creating a new Game instance each time.

A minor change to the main code at the bottom of the file seems to fix this:

player1 = QPlayer(mark="X")
player2 = QPlayer(mark="O")

game = Game(root, player1, player2)
for _ in range(1):              # Play a couple of training games
    game.play()
    game.reset()

human_player = HumanPlayer(mark="X")
player2.epsilon = 0         # For playing the actual match, disable exploratory moves

game.player1 = human_player
game.player2 = player2

game.play()

Reusing a Tkinter window for a game of Tic Tac Toe

Answers (2)

Related Questions