Problem training AI in own Python Game (NEAT-Python)

Question

for me its not possible to train an AI with Neat-Python in my own game enviroment :/

I downloaded a FlappyBird example from internet with Neat-Python implementation, there it works.

I created a GIT Repo with a File, that starts Training the AI my Game:

https://bitbucket.org/salvaracer/testpy/src/master/

*The Problem*

The AI doesnt get besser fitness scores. I do get different fitness scores, better and worse, but after many generation it did not learn that is should move to the targets! so the fitness/AOI is not realy evolving.

I tried a lot thinks:

Inputs like (playerX, playerY, targetX, targetY, distanceToTarget) at the beginning
Now I have inputs: normalized(distToTopBorder, distToLeftBorder, distToRightBorder, distToBottomBorder, distanceToTarget, playerDirection).
I used 4 outputs at beginning, movement with [WASD] (up/left/right/down)
now I use 2 outputs ("turn left/right), movement "Turn Left" or "Turn Right" while the Player is always moving.
Now the game runs 1x while evaluating all genomes's fitness in this one game
At beginning ich started 1 game for 1 player.

Population always min 50 and generations min 200, up to 2000 generations, I never got good results. Sometimes, some player randomly perfor ell, but FITNESS does not increase with time. I also tried different fitness calculations. Always no evolving AI :/

Code entry file is:

import random
from Square import Square
import neat
import os
from game_singleplayer import Game
import global_vars

GENERATION = 0
MAX_FITNESS = 0
BEST_GENOME = 0

def eval_genomes(genomes, config):
    global GENERATION, MAX_FITNESS, BEST_GENOME

    GENERATION += 1
    i = 0
    for genome_id, genome in genomes:
        i+=1
        game = Game()
        GENOMES_FITNESSES = game.train_ai([genome], config, GENERATION, len(genomes), MAX_FITNESS, genome_id)
        if genome.fitness is None:
            genome.fitness = float('-inf') #fixes errors on early termination

        for genfit in GENOMES_FITNESSES:
            if genfit[0] == genome_id:
                genome.fitness = genfit[1]

                # print("Gen : {} Genome # : {}  Fitness : {} Max Fitness : {}".format(GENERATION,i,genome.fitness, MAX_FITNESS))
                if (genome.fitness):
                    if genome.fitness >= MAX_FITNESS:
                        MAX_FITNESS = genome.fitness
                        BEST_GENOME = genome

def eval_genomes_all(genomes, config):
    print(len(genomes))
    global GENERATION, MAX_FITNESS, BEST_GENOME

    GENERATION += 1

    game = Game()
    GENOMES_FITNESSES = game.train_ai_all(genomes, config, GENERATION, len(genomes), MAX_FITNESS)
    for genfit in GENOMES_FITNESSES:
        for genome_id, genome in genomes:
            if genfit[0] == genome_id:
                genome.fitness = genfit[1]

                # print("Gen : {} Genome # : {}  Fitness : {} Max Fitness : {}".format(GENERATION,i,genome.fitness, MAX_FITNESS))
                if (genome.fitness):
                    if genome.fitness >= MAX_FITNESS:
                        MAX_FITNESS = genome.fitness
                        BEST_GENOME = genome
    

def run_neat(config):
    p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-2718')
    # p = neat.Population(config)

    p.add_reporter(neat.StdOutReporter(True))
    stats = neat.StatisticsReporter()
    p.add_reporter(stats)
    p.add_reporter(neat.Checkpointer(100))

    # winner = p.run(eval_genomes, 100) # Generations
    winner = p.run(eval_genomes_all, 2000) # Generations


if __name__ == '__main__':
    local_dir = os.path.dirname(__file__)
    config_path = os.path.join(local_dir, 'config.txt')
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_path)
    run_neat(config)

    # game = Game()
    # game.test_ai(config)
    # game.play()

In Game-Class i have:

def train_ai(self, genomes, config, gen, populationInt, maxfit, genome_id):
        self.squares, self.targets = [], []

        self.squares.append(Square(agent = 2, playerIndex = genome_id, targetsNum = 1))
        # self.targets.append(Target())
        
        GENOMES_FITNESSES = self.play_game([genomes[0]], config, 0, gen, populationInt, maxfit)
        return GENOMES_FITNESSES[0]


def train_ai_all(self, genomes, config, gen, populationInt, maxfit):
        self.squares, self.targets = [], []
        
        for genome_id, genome in genomes:
            self.squares.append(Square(agent = 2, playerIndex = genome_id, targetsNum = 3))

        GENOMES_FITNESSES = self.play_game(genomes, config, 0, gen, populationInt, maxfit)
        return GENOMES_FITNESSES

def play_game(self, genomes = False, config = False, FPS = 30, GENERATION = 0, populationInt = 0, MAX_FITNESS = 0, GENOME_INDEX = 0, genome_id = 0):
        GENOMES_FITNESSES = []
        pygame.init()
        screen = pygame.display.set_mode((global_vars.SCREEN_WIDTH, global_vars.SCREEN_HEIGHT))
        
        if FPS > 0:
            clock = pygame.time.Clock()
        # Hauptspielschleife
        running = True
        self.tickIndex = 0
        while running:
            self.tickIndex += 1
            
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    running = False

            # init
            screen.fill((0, 0, 0))
            all_sprites = pygame.sprite.Group()

            overIn = global_vars.PLAY_MAX_TICKS - self.tickIndex

            for target in self.targets:
                target.drawObject(screen, all_sprites)

            for square in self.squares:
                square.tickIndex += 1
                
                if overIn > global_vars.PLAYER_MAX_TICKS - self.tickIndex:
                    overIn = global_vars.PLAYER_MAX_TICKS - self.tickIndex
                
                for genome_id, genome in genomes:
                    if genome_id == square.playerIndex:
                        if square.agent == 2:
                            net = neat.nn.FeedForwardNetwork.create(genome, config)
                            # output = net.activate((square.rect.x, square.rect.y, square.Cx, square.Cy, square.nearestChildDistance/10, square.direction))

                            x_array = np.array([square.distTop, square.distLeft, square.distBottom, square.distRight, square.nearestChildDistance, square.direction])
                            normalized_arr = preprocessing.normalize([x_array])
                            # print(normalized_arr)
                            output = net.activate(normalized_arr[0])
                            # probabilities = global_vars.softmax(output)
                            # decision = np.argmax(probabilities)
                            action1 = 1 if output[0] >= 0.5 else 0
                            action2 = 1 if output[1] >= 0.5 else 0
                            actions = [action1, action2]
                        else:
                            # manual player and computer no need actions here
                            actions = []
                    
                        # move and draw
                        square.move(self, actions)
                        square.handle_collision(self.targets)

                        fitness = square.score*100 + (global_vars.SCREEN_WIDTH - square.nearestChildDistance)/10 + (self.tickIndex/100.0)
                        lastFitness = [genome_id, fitness]

                        square.drawObject(screen, all_sprites, fitness)
                        all_sprites.draw(screen)
                        
                        sx = square.rect.x + square.size/2
                        sy = square.rect.y + square.size/2
                        sP = (sx, sy)

                        s0 = (sx, 0)
                        s1 = (0, sy)
                        s2 = (sx, global_vars.SCREEN_HEIGHT)
                        s3 = (global_vars.SCREEN_WIDTH, sy)

                        for targetC in square.targets:
                            pygame.draw.line(screen, (180, 180, 180), sP, (square.Cx + targetC.size/2, square.Cy + targetC.size/2))

                        if square.rect.y > 0: pass
                        else:
                            self.squares.remove(square)
                            GENOMES_FITNESSES.append(lastFitness)
                            continue
                        if square.rect.y < global_vars.SCREEN_HEIGHT - square.rect.height: pass
                        else:
                            self.squares.remove(square)
                            GENOMES_FITNESSES.append(lastFitness)
                            continue
                        if square.rect.x > 0: pass
                        else:
                            self.squares.remove(square)
                            GENOMES_FITNESSES.append(lastFitness)
                            continue
                        if square.rect.x < global_vars.SCREEN_WIDTH - square.rect.width: pass
                        else:
                            self.squares.remove(square)
                            GENOMES_FITNESSES.append(lastFitness)
                            continue


                if square.tickIndex >= global_vars.PLAYER_MAX_TICKS:
                    try:
                        self.squares.remove(square)
                    except ValueError:
                        pass 
                    GENOMES_FITNESSES.append(lastFitness)
                    continue

            # Additional Text
            font = pygame.font.Font(None, 24)
            infotxt = font.render("Game left: {}".format(int(global_vars.PLAY_MAX_TICKS - self.tickIndex)/10), True, (255, 255, 255))
            if FPS == 0:
                infotxt = font.render("GEN: {} P: {}/{} Max Fit: {} Over in: {}".format(GENERATION, len(self.squares), populationInt, int(MAX_FITNESS), int(overIn)), True, (255, 255, 255))

            text_rect = infotxt.get_rect()
            text_rect.topleft = (10, 10)
            screen.blit(infotxt, text_rect)

            pygame.display.flip()

            if self.tickIndex > global_vars.PLAY_MAX_TICKS or len(self.squares) == 0:
                GENOMES_FITNESSES.append(lastFitness)
                return GENOMES_FITNESSES

            if FPS > 0:
                clock.tick(FPS)

        # end of play_game()

and mostly default CONFIG

[NEAT]
fitness_criterion     = max
fitness_threshold     = 2000
pop_size              = 25
reset_on_extinction   = False

[DefaultGenome]
# node activation options
# activation_default      = random
activation_default      = sigmoid
activation_mutate_rate  = 0.0
activation_options      = abs clamped cube exp gauss hat identity inv log relu sigmoid sin softplus square tanh

# node aggregation options
aggregation_default     = sum
aggregation_mutate_rate = 0.1
aggregation_options     = sum

# node bias options
bias_init_mean          = 0.0
bias_init_stdev         = 1.0
bias_max_value          = 30.0
bias_min_value          = -30.0
bias_mutate_power       = 0.5
bias_mutate_rate        = 0.7
bias_replace_rate       = 0.1

The point is, none of my tries and configurations did work very well, i want to know what I'm doing wrong :)

Problem training AI in own Python Game (NEAT-Python)

Answers (0)

Related Questions