Reputation: 1
import random
import numpy as np
class AbilityStone:
def __init__(self):
self.positive1 = []
self.positive2 = []
self.negative = []
self.actions_left = 30
self.success_chance = 0.75
def step(self, action, value):
if value not in [0, 1]:
print("Invalid value. Value must be either 0 or 1.")
reward = 0
if value == 1 and self.success_chance > 0.26:
self.success_chance -= 0.1
elif value == 0 and self.success_chance < 0.75:
self.success_chance += 0.1
if action == 1:
if len(self.positive1) < 10:
self.positive1.append(value)
self.actions_left -= 1
else:
reward -= 100
elif action == 2:
if len(self.positive2) < 10:
self.positive2.append(value)
self.actions_left -= 1
else:
reward -= 100
elif action == 3:
if len(self.negative) < 10:
self.negative.append(value)
self.actions_left -= 1
else:
reward -= 100
else:
print("Invalid variable name")
return self.get_state(), reward
def check_possible_moves(self):
possible_moves = []
if len(self.positive1) < 10:
possible_moves.append(1)
if len(self.positive2) < 10:
possible_moves.append(2)
if len(self.negative) < 10:
possible_moves.append(3)
return possible_moves
def check_stone(self):
if self.positive1.count(1) >= 9 and self.positive2.count(1) >= 7 and self.negative.count(1) < 5:
print("9/7 Created !!!")
return True
else:
print("Stone Failed")
return False
def get_state(self):
state = (
self.positive1,
self.positive2,
self.negative,
self.actions_left,
self.success_chance
)
return state
def get_start_state(self):
state = (
10, # positive1
10, # positive2
10, # negative
1, # actions_left
1 # success_chance
)
return state
def reset(self):
self.positive1 = []
self.positive2 = []
self.negative = []
self.actions_left = 30
self.success_chance = 0.75
return self.get_state()
def get_random_value(chance):
if random.random() < chance:
return 1
else:
return 0
def q_learning(env, episodes, learning_rate, gamma, epsilon):
q_table = np.zeros((env.get_start_state()))
for eposide in (range(episodes)):
state = env.reset()
reward = 0
while env.actions_left > 0:
# Choose action
if random.uniform(0, 1) < epsilon:
action = random.choice(env.check_possible_moves())
else:
action = np.argmax(q_table[state])
# Get new state and reward from environment
new_state, reward = env.step(action, get_random_value(env.success_chance))
if env.actions_left == 1:
if env.check_stone():
reward += 100
# Update q-values
q_table[state, action] = q_table[state, action] + learning_rate * (reward + gamma * np.max(q_table[new_state, :]) - q_table[state, action])
state = new_state
if __name__ == '__main__':
# Q-learning parameters
episodes = 1000
learning_rate = 0.1 # Learning rate
gamma = 0.9 # Discount factor
epsilon = 0.1 # Exploration rate
ability_stone = AbilityStone()
q_learning(ability_stone, episodes, learning_rate, gamma, epsilon)
I got into learning ML and so far its not going so bad. I just cant find good resource to nicely explain how to correctly make a state from enviroment. When trying to run what i have created im getting this error:
Traceback (most recent call last):
File "c:\Users\bulaw\Desktop\ML Project\gradient_descent.py", line 139, in <module>
q_learning(ability_stone, episodes, learning_rate, gamma, epsilon)
File "c:\Users\bulaw\Desktop\ML Project\gradient_descent.py", line 116, in q_learning
action = np.argmax(q_table[state])
~~~~~~~^^^^^^^
IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices
I assume its cause variable from ability stone success_change is a float but i just cant find it how to apply it to the state correctly could i get some help ?
I was trying to find how to correctly create states from enviroment but were unsuccessful.
Upvotes: 0
Views: 43