Reputation: 11
I'm trying to play a game with AI, but I want to do it in real time. Because of that, I'm not using gym to create an environment.
I want to take a screenshot, preprocess it, then pass it through the model before updating the model based on whether or not the model died in-game.
EDIT: I've made some progress, so here is the updated code
from PIL import ImageGrab, Image
import numpy as np
import torch
from torch import nn
from torchvision import transforms
import torch.optim as optim
# imporot keyboard and pyautogui for playing the game
_bbox=(800, 200, 1200, 900) # (L,T,R,B)
dead = False
def get():
global dead
screenshot = ImageGrab.grab(bbox=_bbox)
# Check if dead
if np.sum(np.array(screenshot)[:,:,0])/(screenshot.size[0]*screenshot.size[1])>=190 and np.sum(np.array(screenshot)[:,:,1])/(screenshot.size[0]*screenshot.size[1])<50 and np.sum(np.array(screenshot)[:,:,2])/(screenshot.size[0]*screenshot.size[1])<15:
dead = True # edit
return screenshot.copy()
def compute_discounted_rewards(rewards, gamma=0.99): # Edit
"""This function returns an error if used, so I don't use it further on. But I still wanted to show it here."""
discounted_rewards = []
R = 0
for r in reversed(rewards):
R = r + gamma * R
discounted_rewards.insert(0, R)
discounted_rewards = torch.tensor(discounted_rewards)
discounted_rewards = (discounted_rewards - discounted_rewards.mean()) / (discounted_rewards.std() + 1e-5)
return discounted_rewards
# define preprocessor
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(256),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
# define model (am I doing this right?)
model = nn.Sequential(
nn.Conv2d(3,256,5),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256,512,5),
nn.MaxPool2d(2, 2),
nn.ReLU(),
nn.Flatten(),
nn.Linear(3721, 512),
nn.LeakyReLU(),
nn.Linear(512, 512),
nn.LeakyReLU(),
nn.Linear(512, 3),
nn.Softmax(1)
)
# Everything bellow here is after the edit
optimizer = optim.Adam(model.parameters(), lr=1E-2)
ep_reward = list()
for j in range(1): # number of episodes
dead = False
i = 0
log_probs = list()
rewards = list()
while not dead: # episode ends after loop ends
inputs = preprocess(get(raw=True))
outputs = model(inputs)
# pred = torch.argmax(outputs.data[0]).item() # 0 = no moves, 1 = left, 2 = right
m = Categorical(outputs)
action = m.sample()
log_probs.append(m.log_prob(action))
rewards.append(1 if not dead else 0) # append the reward for action
i += 1
break # Temp
dead = False
ep_reward.append(sum(rewards))
# discounted_rewards = compute_discounted_rewards(rewards)
discounted_rewards = rewards
policy_loss = list() # model_loss
for log_prob, Gt in zip(log_probs, discounted_rewards):
policy_loss.append(-log_prob*Gt)
optimizer.zero_grad()
policy_loss = torch.cat(policy_loss).sum()
policy_loss.backward()
optimizer.step()
Most of this is code I've copied from an assortment of sources. I try to understand what it all does, though.
I don't know if this will work, so please let me know.
How does the optimizer affect the model? It isn't linked in any way I can tell. Is it something pytorch does behind the scenes?
Upvotes: 0
Views: 44