Mateusz Bulewicz
Mateusz Bulewicz

Reputation: 1

Understanding shapes in enviroment

import random
import numpy as np

class AbilityStone:
    def __init__(self):
        self.positive1 = []
        self.positive2 = []
        self.negative = []
        self.actions_left = 30
        self.success_chance = 0.75
    
    def step(self, action, value):
        if value not in [0, 1]:
            print("Invalid value. Value must be either 0 or 1.")
        
        reward = 0
        if value == 1 and self.success_chance > 0.26:
            self.success_chance -= 0.1
        elif value == 0 and self.success_chance < 0.75:
            self.success_chance += 0.1
        
        if action == 1:
            if len(self.positive1) < 10:
                self.positive1.append(value)
                self.actions_left -= 1
            else:
                reward -= 100
        elif action == 2:
            if len(self.positive2) < 10:
                self.positive2.append(value)
                self.actions_left -= 1
            else:
                reward -= 100
        elif action == 3:
            if len(self.negative) < 10:
                self.negative.append(value)
                self.actions_left -= 1
            else:
                reward -= 100
        else:
            print("Invalid variable name")

        return self.get_state(), reward
    
    def check_possible_moves(self):
        possible_moves = []
        
        if len(self.positive1) < 10:
            possible_moves.append(1)
        
        if len(self.positive2) < 10:
            possible_moves.append(2)
        
        if len(self.negative) < 10:
            possible_moves.append(3)
        
        return possible_moves
    
    def check_stone(self):
        if self.positive1.count(1) >= 9 and self.positive2.count(1) >= 7 and self.negative.count(1) < 5:
            print("9/7 Created !!!")
            return True
        else:
            print("Stone Failed")
            return False
        
    def get_state(self):
        state = (
            self.positive1,
            self.positive2,
            self.negative,
            self.actions_left,
            self.success_chance
        )
        return state    
        
    def get_start_state(self):
        state = (
            10, # positive1
            10, # positive2
            10, # negative
            1, # actions_left
            1 # success_chance
        )
        return state

    def reset(self):
        self.positive1 = []
        self.positive2 = []
        self.negative = []
        self.actions_left = 30
        self.success_chance = 0.75

        return self.get_state()
    

def get_random_value(chance):
    if random.random() < chance:
        return 1
    else:
        return 0
   
def q_learning(env, episodes, learning_rate, gamma, epsilon):
    q_table = np.zeros((env.get_start_state()))

    for eposide in (range(episodes)):
        state = env.reset()
        reward = 0

        while env.actions_left > 0:
            # Choose action
            if random.uniform(0, 1) < epsilon:
                action = random.choice(env.check_possible_moves())
            else:
                action = np.argmax(q_table[state])

            # Get new state and reward from environment
            new_state, reward = env.step(action, get_random_value(env.success_chance))

            if env.actions_left == 1:
                if env.check_stone():
                    reward += 100
            # Update q-values
            q_table[state, action] = q_table[state, action] + learning_rate * (reward + gamma * np.max(q_table[new_state, :]) - q_table[state, action])

            state = new_state
        

if __name__ == '__main__':
    # Q-learning parameters
    episodes = 1000
    learning_rate = 0.1  # Learning rate
    gamma = 0.9  # Discount factor
    epsilon = 0.1  # Exploration rate

  
    ability_stone = AbilityStone()
    q_learning(ability_stone, episodes, learning_rate, gamma, epsilon)

I got into learning ML and so far its not going so bad. I just cant find good resource to nicely explain how to correctly make a state from enviroment. When trying to run what i have created im getting this error:

Traceback (most recent call last):
  File "c:\Users\bulaw\Desktop\ML Project\gradient_descent.py", line 139, in <module>
    q_learning(ability_stone, episodes, learning_rate, gamma, epsilon)
  File "c:\Users\bulaw\Desktop\ML Project\gradient_descent.py", line 116, in q_learning
    action = np.argmax(q_table[state])
                       ~~~~~~~^^^^^^^
IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

I assume its cause variable from ability stone success_change is a float but i just cant find it how to apply it to the state correctly could i get some help ?

I was trying to find how to correctly create states from enviroment but were unsuccessful.

Upvotes: 0

Views: 43

Answers (0)

Related Questions