user1566490
user1566490

Reputation: 75

IndexError: too many indices for array in reinforcement learning model using Pennylane and PyTorch

I am working on a quantum reinforcement learning model using PennyLane, PyTorch, and a stock trading environment from finrl library.When I run my training function, I get following error. Any help is highly appreciated.

env = StockTradingEnv(
    df=processed,
    stock_dim=1,
    hmax=100,
    initial_amount=1000000,
    num_stock_shares=[0],
    buy_cost_pct=[0.001],
    sell_cost_pct=[0.001],
    reward_scaling=1e-4,
    state_space=len(TECH_INDICATORS) + 3,
    action_space=3,
    tech_indicator_list=TECH_INDICATORS
)

env_train = DummyVecEnv([lambda: env])
state_dim = env_train.observation_space.shape[0]

num_qubits = 4
dev = qml.device("default.qubit", wires=num_qubits)

@qml.qnode(dev, interface="torch")
def quantum_actor(inputs, weights):
    for i in range(num_qubits):
        qml.RY(float(inputs[i]), wires=i)
    
    for i in range(num_qubits):
        qml.RZ(float(weights[i]), wires=i)
        qml.RY(float(weights[i + num_qubits]), wires=i)
    
    for i in range(num_qubits - 1):
        qml.CNOT(wires=[i, i + 1])
    
    return [qml.expval(qml.PauliZ(i)) for i in range(num_qubits)]

class QuantumActor(nn.Module):
    def __init__(self, state_dim):
        super().__init__()
        self.state_dim = state_dim
        self.num_qubits = num_qubits
        self.weights = nn.Parameter(torch.randn(2 * num_qubits, dtype=torch.float32) * 0.1)
        self.classical_layer = nn.Linear(num_qubits, 3)
    
    def forward(self, state):
        quantum_input = state[:num_qubits]
        quantum_out = torch.tensor(quantum_actor(quantum_input, self.weights), dtype=torch.float32)
        action_logits = self.classical_layer(quantum_out)
        return torch.softmax(action_logits, dim=0)

class ClassicalCritic(nn.Module):
    def __init__(self, state_dim):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    
    def forward(self, state):
        return self.network(state)

def train_quantum_ac(env, actor, critic, actor_optimizer, critic_optimizer, episodes=100):
    gamma = 0.99

    for episode in range(episodes):
        state = env.reset()
        if isinstance(state, tuple):
            state = state[0]
        state = torch.tensor(state.squeeze(), dtype=torch.float32)
        
        done = False
        episode_reward = 0

        while not done:
            action_probs = actor(state)
            action_probs = torch.clamp(action_probs, min=1e-6)
            action = torch.multinomial(action_probs, 1).item()
            
            action_array = np.array([action], dtype=np.int32)
            next_state, reward, done, _, _ = env.step(action_array)
            next_state = torch.tensor(next_state.squeeze(), dtype=torch.float32)
            
            reward = reward.item() if isinstance(reward, np.ndarray) else reward
            value = critic(state)
            next_value = critic(next_state)
            advantage = reward + gamma * next_value * (1 - done) - value
            
            critic_loss = advantage.pow(2).mean()
            critic_optimizer.zero_grad()
            critic_loss.backward()
            critic_optimizer.step()
            
            log_prob = torch.log(action_probs[action])
            actor_loss = -log_prob * advantage.detach()
            actor_optimizer.zero_grad()
            actor_loss.backward()
            actor_optimizer.step()
            
            state = next_state
            episode_reward += reward
        
        print(f"Episode {episode + 1}, Reward: {episode_reward:.2f}")

actor = QuantumActor(state_dim)
critic = ClassicalCritic(state_dim)

actor_optimizer = optim.Adam(actor.parameters(), lr=0.001)
critic_optimizer = optim.Adam(critic.parameters(), lr=0.001)


train_quantum_ac(env_train, actor, critic, actor_optimizer, critic_optimizer, episodes=100)

obs = env_train.reset()
obs = torch.tensor(obs, dtype=torch.float32).unsqueeze(0)
done = False
total_reward = 0

while not done:
    action_probs = actor(obs)
    action_probs = torch.clamp(action_probs, min=1e-6)
    action = torch.multinomial(action_probs, 1).item()
    
    obs, reward, done, _ = env_train.step([action])
    obs = torch.tensor(obs, dtype=torch.float32).unsqueeze(0)
    total_reward += reward

print(f"Total Reward from Test: {total_reward}")

Error

Shape of DataFrame: (1509, 8) Successfully added technical indicators /usr/local/lib/python3.11/dist-packages/finrl/meta/env_stock_trading/env_stocktrading.py:317: DeprecationWarning: Calling nonzero on 0d arrays is deprecated, as it behaves surprisingly. Use atleast_1d(cond).nonzero() if the old behavior was intended. If the context of this warning is of the form arr[nonzero(cond)], just use arr[cond]. sell_index = argsort_actions[: np.where(actions < 0)[0].shape[0]]

IndexError Traceback (most recent call last) in <cell line: 0>() 161 162 # Train the model --> 163 train_quantum_ac(env_train, actor, critic, actor_optimizer, critic_optimizer, episodes=100) 164 165 # Test the trained model

4 frames /usr/local/lib/python3.11/dist-packages/pennylane/numpy/tensor.py in getitem(self, *args, **kwargs) 185 186 def getitem(self, *args, **kwargs): --> 187 item = super().getitem(*args, **kwargs) 188 189 if not isinstance(item, tensor):

IndexError: too many indices for array: array is 0-dimensional, but 1 were indexed

Upvotes: 0

Views: 15

Answers (0)

Related Questions