Reputation: 1
I'm trying to create a convolutional Autoencoder that will work with seismic waveforms. The problem I have is that my model doesn't seem to be learning anything from the data, and when I test the model by comparing one waveform to the same, reconstructed version of it, I get a straight line. It should be outputting a similar waveform to the original:
original_vs_reconstructed_image (blue is original, orange is reconstructed)
In addition, the validation and test epochs don't change over time at all on my epoch graph.
I'm honestly not sure where to focus my debugging, but I suspect the problem lies with either the Autoencoder itself, the way my training function is structured, or the loss function I'm using (MSE).
I'm also fairly new to machine learning, so I might be missing something obvious or doing something totally wrong.
Here is my code that relates to this problem:
import glob
import numpy as np
import obspy as obs
import sklearn.model_selection
import torch
import torch.nn as nn
import torch.nn.functional
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import random
import sys
files = glob.glob('/loggerhead/coke/wf_Tony/trim/15_62.5/1108/DH1' + '/*.mseed')
# empty list to store the properly read waveforms
waves = []
# read all the files
for f in files:
temp_wave = obs.read(f)
A = temp_wave[0].data
# normalization
B = A/np.max(np.abs(A))
# ensures every wave is size 3126
waves.append(np.pad(B, (0, 3126 - B.size), 'constant'))
wave_arr = np.vstack(waves)
train_arr, test_arr = sklearn.model_selection.train_test_split(wave_arr, train_size=0.95)
train_torch = torch.tensor(train_arr, requires_grad=True).clone()
test_torch = torch.tensor(test_arr, requires_grad=True).clone()
train_waves = train_torch.unsqueeze_(1)
test_waves = test_torch.unsqueeze_(1)
k = 7
p = k//2
class AutoEncoder(nn.Module):
def __init__(self):
# make sure to always initialize the super class when using outside methods
super().__init__()
self.encoder = nn.Sequential(
nn.Conv1d(1, 64, kernel_size=k, padding=p), nn.LeakyReLU(),
nn.Conv1d(64, 64, kernel_size=k, padding=p), nn.LeakyReLU(), nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(64, 128, kernel_size=k, padding=p), nn.LeakyReLU(), nn.Conv1d(128, 128, kernel_size=k, padding=p),
nn.LeakyReLU(), nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(128, 256, kernel_size=k, padding=p), nn.LeakyReLU(), nn.Conv1d(256, 256, kernel_size=k, padding=p),
nn.LeakyReLU(), nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(256, 512, kernel_size=k, padding=p), nn.LeakyReLU(), nn.Conv1d(512, 512, kernel_size=k, padding=p),
nn.LeakyReLU(), nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(512, 1024, kernel_size=k, padding=p), nn.LeakyReLU(), nn.Conv1d(1024, 1024, kernel_size=k, padding=p),
nn.LeakyReLU()
)
self.decoder = nn.Sequential(
nn.ConvTranspose1d(1024, 512, kernel_size=2, stride=2), nn.LeakyReLU(),
nn.ConvTranspose1d(512, 256, kernel_size=2, stride=2), nn.LeakyReLU(),
nn.ConvTranspose1d(256, 128, kernel_size=2, stride=2), nn.LeakyReLU(),
nn.ConvTranspose1d(128, 64, kernel_size=2, stride=2), nn.LeakyReLU(),
nn.Conv1d(64, 1, kernel_size=1, padding=p), nn.Tanh()
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
model = AutoEncoder()
loss_function_MSE = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
# Check if the GPU is available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Selected device: {device}')
model.to(device)
# Training function
def train_epoch(model, device, loss_fn, optimizer):
# Set train mode for both the encoder and the decoder
model.train()
train_loss = []
train_tester = train_waves.clone().detach()
# shuffle the training dataset
train_tester = train_tester[torch.randperm(train_tester.size()[0])]
for wave in train_tester:
wave = wave.to(device)
output_thing = model(wave)
loss = loss_fn(output_thing, wave)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print batch loss
print('\t partial train loss (single batch): %f' % (loss.data))
train_loss.append(loss.detach().cpu().numpy())
return np.mean(train_loss)
# Testing function
def test_epoch(model, device, loss_fn):
# Set evaluation mode for model
model.eval()
with torch.no_grad(): # No need to track the gradients
# Define the lists to store the outputs for each batch
conc_out = []
conc_label = []
for wave in test_waves:
# Move tensor to the proper device
wave = wave.to(device)
# model data
output_thing = model(wave)
# Append the network output and the original image to the lists
conc_out.append(output_thing.cpu())
conc_label.append(wave.cpu())
# Create a single tensor with all the values in the lists
conc_out = torch.cat(conc_out)
conc_label = torch.cat(conc_label)
# Evaluate global loss
val_loss = loss_fn(conc_out, conc_label)
return val_loss.data
def plot_outputs(model):
rand_num = random.randint(0, 4000)
reconstructed = wave_torch_best[rand_num].to(device)
reconstructed = model(reconstructed)
new_numpy = reconstructed.detach().cpu().numpy()
og = wave_torch_best[rand_num].detach().cpu().numpy()
plt.plot(og[0, :])
plt.plot(new_numpy[0, :])
plt.savefig('/loggerhead/lwrigh89/Plots/Comparing Plots/reconstructed.png')
num_epochs = 4
diz_loss = {'train_loss':[],'val_loss':[]}
for epoch in range(num_epochs):
train_loss = train_epoch(model, device, loss_function_MSE, optimizer)
val_loss = test_epoch(model, device, loss_function_MSE)
print('\n EPOCH {}/{} \t train loss {} \t val loss {}'.format(epoch + 1, num_epochs, train_loss, val_loss))
diz_loss['train_loss'].append(train_loss)
diz_loss['val_loss'].append(val_loss)
if epoch == num_epochs + 1:
torch.save(model.state_dict(), '/loggerhead/lwrigh89/Model/newmodel.pt')
# plot og vs reconstructed
plot_outputs(model)
plt.figure(figsize=(10, 8))
plt.semilogy(diz_loss['train_loss'], label='Train')
plt.semilogy(diz_loss['val_loss'], label='Valid')
plt.xlabel('Epoch')
plt.ylabel('Average Loss')
plt.legend()
plt.savefig('/loggerhead/lwrigh89/Plots/Epochs/epochgraph.png')
# exit program
sys.exit()
I'm using a GPU server with CUDA version 11.4, Python version 3.10.5, and PyTorch version 1.12.0.
I would appreciate any help/guidance given.
Upvotes: 0
Views: 230
Reputation: 82
I tried your code(with a little change) and it seems to work well. I noticed several things from your post;
(1) I am afraid that if epoch == num_epochs + 1:
is always "not-satisfied". Because if you write for epoch in range(num_epochs):
, it means that the maximum value of epoch
in the loop would be num_epochs - 1
. So there is a possibility that your output '.png's are not updated. Please check the save time of the png files.
(2) I tried your code. Here is the result. I used the random data to train the model, so it does not mean that "the accuracy is high" or not, but at least, it is not "all outputs are zeros." I attached the code below. In the code, I added the comment ### changed
where I changed your original code.
import glob
import numpy as np
import obspy as obs
import sklearn.model_selection
import torch
import torch.nn as nn
import torch.nn.functional
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import random
import sys
# files = glob.glob('/loggerhead/coke/wf_Tony/trim/15_62.5/1108/DH1' + '/*.mseed') ### changed
# empty list to store the properly read waveforms
waves = []
# read all the files
for f in range(4):#files: ### changed
#temp_wave = obs.read(f) ### changed
#A = temp_wave[0].data ### changed
A = np.random.random([1, 3126]) ### changed
# normalization
B = A/np.max(np.abs(A))
# ensures every wave is size 3126
waves.append(np.pad(B, (0, 3126 - B.size), 'constant'))
wave_arr = np.vstack(waves)
train_arr, test_arr = sklearn.model_selection.train_test_split(wave_arr, train_size=0.95)
train_torch = torch.tensor(train_arr, requires_grad=True).clone().float()
test_torch = torch.tensor(test_arr, requires_grad=True).clone().float()
train_waves = train_torch.unsqueeze_(1)
test_waves = test_torch.unsqueeze_(1)
k = 7
p = k//2
class AutoEncoder(nn.Module):
def __init__(self):
# make sure to always initialize the super class when using outside methods
super().__init__()
self.encoder = nn.Sequential(
nn.Conv1d(1, 64, kernel_size=k, padding=p), nn.LeakyReLU(),
nn.Conv1d(64, 64, kernel_size=k, padding=p), nn.LeakyReLU(), nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(64, 128, kernel_size=k, padding=p), nn.LeakyReLU(), nn.Conv1d(128, 128, kernel_size=k, padding=p),
nn.LeakyReLU(), nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(128, 256, kernel_size=k, padding=p), nn.LeakyReLU(), nn.Conv1d(256, 256, kernel_size=k, padding=p),
nn.LeakyReLU(), nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(256, 512, kernel_size=k, padding=p), nn.LeakyReLU(), nn.Conv1d(512, 512, kernel_size=k, padding=p),
nn.LeakyReLU(), nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(512, 1024, kernel_size=k, padding=p), nn.LeakyReLU(), nn.Conv1d(1024, 1024, kernel_size=k, padding=p),
nn.LeakyReLU()
)
self.decoder = nn.Sequential(
nn.ConvTranspose1d(1024, 512, kernel_size=2, stride=2), nn.LeakyReLU(),
nn.ConvTranspose1d(512, 256, kernel_size=2, stride=2), nn.LeakyReLU(),
nn.ConvTranspose1d(256, 128, kernel_size=2, stride=2), nn.LeakyReLU(),
nn.ConvTranspose1d(128, 64, kernel_size=2, stride=2), nn.LeakyReLU(),
nn.Conv1d(64, 1, kernel_size=1, padding=p), nn.Tanh()
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
model = AutoEncoder()
loss_function_MSE = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
# Check if the GPU is available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Selected device: {device}')
model.to(device)
# Training function
def train_epoch(model, device, loss_fn, optimizer):
# Set train mode for both the encoder and the decoder
model.train()
train_loss = []
train_tester = train_waves.clone().detach()
# shuffle the training dataset
train_tester = train_tester[torch.randperm(train_tester.size()[0])]
for wave in train_tester:
wave = wave.to(device)
output_thing = model(wave)
loss = loss_fn(output_thing, wave)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print batch loss
print('\t partial train loss (single batch): %f' % (loss.data))
train_loss.append(loss.detach().cpu().numpy())
return np.mean(train_loss)
# Testing function
def test_epoch(model, device, loss_fn):
# Set evaluation mode for model
model.eval()
with torch.no_grad(): # No need to track the gradients
# Define the lists to store the outputs for each batch
conc_out = []
conc_label = []
for wave in test_waves:
# Move tensor to the proper device
wave = wave.to(device)
# model data
output_thing = model(wave)
# Append the network output and the original image to the lists
conc_out.append(output_thing.cpu())
conc_label.append(wave.cpu())
# Create a single tensor with all the values in the lists
conc_out = torch.cat(conc_out)
conc_label = torch.cat(conc_label)
# Evaluate global loss
val_loss = loss_fn(conc_out, conc_label)
return val_loss.data
def plot_outputs(model):
rand_num = 0#random.randint(0, 4000) ### changed
wave_torch_best = torch.from_numpy(B).float().unsqueeze_(0) ### changed
reconstructed = wave_torch_best[rand_num].to(device)
reconstructed = model(reconstructed)
new_numpy = reconstructed.detach().cpu().numpy()
og = wave_torch_best[rand_num].detach().cpu().numpy()
plt.plot(og[0, :])
plt.plot(new_numpy[0, :])
plt.savefig('reconstructed.png')
num_epochs = 100 ### changed
diz_loss = {'train_loss':[],'val_loss':[]}
if(True):
for epoch in range(num_epochs):
train_loss = train_epoch(model, device, loss_function_MSE, optimizer)
val_loss = test_epoch(model, device, loss_function_MSE)
print('\n EPOCH {}/{} \t train loss {} \t val loss {}'.format(epoch + 1, num_epochs, train_loss, val_loss))
diz_loss['train_loss'].append(train_loss)
diz_loss['val_loss'].append(val_loss)
if epoch == num_epochs - 1: ### changed
print('********')
torch.save(model.state_dict(), 'newmodel.pt')
# plot og vs reconstructed
plot_outputs(model)
plt.figure(figsize=(10, 8))
plt.semilogy(diz_loss['train_loss'], label='Train')
plt.semilogy(diz_loss['val_loss'], label='Valid')
plt.xlabel('Epoch')
plt.ylabel('Average Loss')
plt.legend()
plt.savefig('epochgraph.png')
# exit program
#sys.exit() ### changed
Upvotes: 0