Genetic Algorithm for Snake not converging

Question

I'm trying to train an AI to play snake with a genetic algorithm. I'm using the Python library NEAT for the training. The problem is that the training doesn't converge and the AI doesn't learn. Here's the training code:

class SnakeEnv():

def __init__(self, screen):
    self.action_space = np.array([0, 1, 2, 3])
    self.state = None
    pygame.init()
    self.screen = screen
    self.snakes = [] 
    self.total_reward = 0

def reset(self):
    self.__init__()

    
def get_state(self):
    return np.reshape(self.snake.board, (400, 1)).T / 5

def render(self, snake):
    self.screen.fill((0, 0, 0))
    snake.food.render()
    snake.render()
    pygame.display.flip()

def step(self, snake, action):
    snake.move(action)
    self.render(snake)

def close(self):
    pygame.quit()


def eval_genomes(self, genomes, config):
    global nets_g
    nets_g = []
    nets = []
    snakes = []
    global ge_g
    ge_g = []
    ge = []
    for genome_id, genome in genomes:
        genome.fitness = 0
        net = neat.nn.FeedForwardNetwork.create(genome, config)
        nets.append(net)
        snakes.append(Snake(self.screen))
        ge.append(genome)
    
    ge_g = ge.copy()
    nets_g = nets.copy()
    run = True
    #Main loop
    while run:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                run = False
                pygame.quit()
                quit()
                break

        for x, snake in enumerate(snakes):
            if(snake.done):
                continue
            ge[x].fitness += 0.1

            """
            Inputs to the neural net:
            Vertical distance from food to head
            Horizontal distance from food to head
            Vertical distance to nearest wall from head
            Horizontal distance to nearest wall from head
            Distance from head to body segment (default -1)
            """

            snake_x = snake.head.x
            snake_y = snake.head.y
            food_x = snake.food.x 
            food_y = snake.food.y 

            food_vert = snake_y - food_y
            food_horz = snake_x - food_x
            wall_vert = min(snake_y, 600 - snake_y)
            wall_horz = min(snake_x, 600 - snake_x)
            body_front = snake.body_front()
            output = np.argmax(nets[snakes.index(snake)].activate((food_vert, food_horz, wall_vert, wall_horz, body_front)))
            state = snake.move(output)
            if state["Food"] == True:
                ge[snakes.index(snake)].fitness += 1

            if state["Died"] == True:
                ge[snakes.index(snake)].fitness -= 1
                #nets.pop(snakes.index(snake))
                #ge.pop(snakes.index(snake))
                #snakes.pop(snakes.index(snake))
            all_done = [snake.done for snake in snakes]
            if(False not in all_done):
                run = False


def run(self, config_file):
    config = neat.config.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet, neat.DefaultStagnation, config_file)
    population = neat.Population(config)
    population.add_reporter(neat.StdOutReporter(True))
    stats = neat.StatisticsReporter()
    population.add_reporter(stats)
    best = population.run(self.eval_genomes, 200)
    print('
Best genome:
{!s}'.format(best))
    best_net = nets_g[ge_g.index(best)]
    pickle.dump(best_net, open('best.pkl', 'wb'))

(Pretend like my code is indented, the editor isn't working for some reason) Here's the conf.txt file:

[NEAT]
fitness_criterion     = max
fitness_threshold     = 20
pop_size              = 50
reset_on_extinction   = False

[DefaultGenome]
# node activation options
activation_default      = relu
activation_mutate_rate  = 0.0
activation_options      = relu

# node aggregation options
aggregation_default     = sum
aggregation_mutate_rate = 0.0
aggregation_options     = sum

# node bias options
bias_init_mean          = 0.0
bias_init_stdev         = 1.0
bias_max_value          = 10.0
bias_min_value          = -10.0
bias_mutate_power       = 0.5
bias_mutate_rate        = 0.9
bias_replace_rate       = 0.1

# genome compatibility options
compatibility_disjoint_coefficient = 1.0
compatibility_weight_coefficient   = 0.5

# connection add/remove rates
conn_add_prob           = 0.7
conn_delete_prob        = 0.7

# connection enable options
enabled_default         = True
enabled_mutate_rate     = 0.01

feed_forward            = True
initial_connection      = full

# node add/remove rates
node_add_prob           = 0.7
node_delete_prob        = 0.7

# network parameters
num_hidden              = 0
num_inputs              = 5
num_outputs             = 4

# node response options
response_init_mean      = 1.0
response_init_stdev     = 0.0
response_max_value      = 30.0
response_min_value      = -30.0
response_mutate_power   = 0.0
response_mutate_rate    = 0.0
response_replace_rate   = 0.0

# connection weight options
weight_init_mean        = 0.0
weight_init_stdev       = 1.0
weight_max_value        = 30
weight_min_value        = -30
weight_mutate_power     = 0.5
weight_mutate_rate      = 0.8
weight_replace_rate     = 0.1

[DefaultSpeciesSet]
compatibility_threshold = 3.0

[DefaultStagnation]
species_fitness_func = max
max_stagnation       = 20
species_elitism      = 2

[DefaultReproduction]
elitism            = 2
survival_threshold = 0.2

As you can see I train for 200 generations. The results are pretty odd. The snake consistently gets a single piece of food but then immediately runs into a wall. It's sort of learning but not fully. I've tried to let it train for more generations, but there's no difference. I think the problem may be with my inputs to the neural nets, but I'm not sure.

EDIT: I changed the network architecture so that it now has 4 output nodes with a relu activation. The problem is now that the code freezes on the step where the output is computed (output = np.argmax(nets[snakes.index(snake)].activate((food_vert, food_horz, wall_vert, wall_horz, body_front))))

Genetic Algorithm for Snake not converging

Answers (1)

Output Shape

General Advice

Related Questions