Trying to represent the Long Short-Term Memory (LSTM) model in PyTorch to improve understanding

Question

I am currently engaged in the implementation of Long Short-Term Memory (LSTM) models using Python and PyTorch for the purpose of predicting some time series data, specifically through the utilization of a simple sine wave as a test case. This particular problem is classified as many-to-one, wherein multiple past values are employed to forecast a present value.

Does this representation accurately reflect the model in the following code?

The code:

from matplotlib import pyplot
import numpy
import torch


# Define the model class
class LSTM(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout):
        super().__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.linear = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.randn(self.num_layers, x.size(0), self.hidden_size)
        c0 = torch.randn(self.num_layers, x.size(0), self.hidden_size)
        out, (ht, ct) = self.lstm(x, (h0, c0))
        return self.linear(out[:, -1, :])


# Define hyper-parameters
SEED = 0
SEQ_LENGTH = 5
EPOCHS = 500
HIDDEN_SIZE = 5
NUM_LAYERS = 3
NUM_FEATURES = 1
DROPOUT = 0.2
TRAIN_DATA_SIZE = 0.8
LEARNING_RATE = 0.001

# Set seed
torch.manual_seed(SEED)

# Generate data (sine wave)
x = numpy.linspace(0, 50, 1000)
fx = numpy.sin(x).reshape(-1, 1)

# Convert data to sequences
# (X_t-5, X_t-4, X_t-3, X_t-2, X_t-1) => (Y_t)
sequences = []
targets = []
for i in range(len(fx) - SEQ_LENGTH):
    sequences.append(fx[i:i+SEQ_LENGTH])
    targets.append(fx[i+SEQ_LENGTH])

# Convert data to tensor
# python.list => numpy.ndarray => torch.Tensor
X = torch.tensor(numpy.array(sequences), dtype=torch.float32)
y = torch.tensor(numpy.array(targets), dtype=torch.float32)

# Split data
train_size = int(TRAIN_DATA_SIZE * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Make datasets and dataloaders (full-batch)
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
test_dataset = torch.utils.data.TensorDataset(X_test, y_test)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=len(y_train), shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=len(y_test), shuffle=False)

# Create the model, loss-function, and optimizer
model = LSTM(NUM_FEATURES, HIDDEN_SIZE, NUM_LAYERS, NUM_FEATURES, DROPOUT)
loss_function = torch.nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Evaluate the model before training
model.eval()
with torch.inference_mode():
    for input, target in test_dataloader:
        prediction_before_training = model(input)

# Define lists to keep data of training process
epochs_list = []
train_loss_list = []
test_loss_list = []

# Train the model
for epoch in range(EPOCHS):
    epochs_list.append(epoch + 1)
    model.train()
    for input, target in train_dataloader:
        prediction_during_training = model(input)
    loss = loss_function(prediction_during_training, target)
    train_loss_list.append(loss.detach())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    model.eval()
    with torch.inference_mode():
        for input, target in test_dataloader:
            prediction_during_training = model(input)
        loss = loss_function(prediction_during_training, target)
        test_loss_list.append(loss.detach())

# Evaluate the model after training
model.eval()
with torch.inference_mode():
    for input, target in test_dataloader:
        prediction_after_training = model(input)

# Visualize
figure = pyplot.figure(figsize=(16, 9))
axes = figure.subplots(1, 2)
axes[0].grid(True)
axes[0].set_xlabel("Epoch")
axes[0].set_ylabel("Loss")
axes[0].plot(epochs_list, train_loss_list, label="Training")
axes[0].plot(epochs_list, test_loss_list, label="Testing")
axes[0].legend()
axes[1].grid(True)
axes[1].set_xlabel("x")
axes[1].set_ylabel("F(x) = sin(x)")
axes[1].plot(target, label="Actual values")
axes[1].plot(prediction_before_training, label="Predictions before training")
axes[1].plot(prediction_after_training, label="Predictions after training")
axes[1].legend()
figure.tight_layout()
pyplot.show()

The output graph should be like this:

Thanks for your answers.

Trying to represent the Long Short-Term Memory (LSTM) model in PyTorch to improve understanding

Answers (0)

Related Questions