saad sagheer
saad sagheer

Reputation: 11

Model Not Saving After Training in PyCharm Virtual Environment

# I'm running this python code in my pc windows 10 on PyCharm 2024 version in Virtual enviroment-----: -

`import os
import numpy as np
import librosa
import soundfile as sf
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Masking, LSTM, TimeDistributed
from tensorflow.keras.callbacks import ModelCheckpoint

def extract_features(file_path, sample_rate=22050, duration=4):
    try:
        audio, _ = librosa.load(file_path, sr=sample_rate, duration=duration, mono=True)
        mel_spec = librosa.feature.melspectrogram(y=audio, sr=sample_rate)
        mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
        return mel_spec
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

def load_dataset(directory, sample_rate=22050, duration=4):
    audio_extensions = ('.wav', '.mp3', '.flac', '.aac', '.m4a', '.ogg')
    features = []
    for root, _, files in os.walk(directory):
        for filename in files:
            if filename.lower().endswith(audio_extensions):
                file_path = os.path.join(root, filename)
                print(f"Processing file: {file_path}")
                mel_spec = extract_features(file_path, sample_rate, duration)
                if mel_spec is not None:
                    features.append(mel_spec)
                else:
                    print(f"Failed to extract features from {file_path}")
    if len(features) == 0:
        print("No valid audio files found in the directory.")
    return features

def pad_sequences(sequences, maxlen=None):
    if maxlen is None:
        maxlen = max(seq.shape[1] for seq in sequences)
    padded sequences = []
    for seq in sequences:
        if seq.shape[1] < maxlen:
            pad_width = maxlen - seq.shape[1]
            padded_seq = np.pad(seq, ((0, 0), (0, pad_width)), mode='constant')
        else:
            padded_seq = seq[:, :maxlen]
        padded_sequences.append(padded_seq)
    return np.array(padded_sequences)

def create_sequence_autoencoder(input_shape):
    input_layer = Input(shape=input_shape)
    masked = Masking(mask_value=0.0)(input_layer)
    encoded = LSTM(128, activation='relu', return_sequences=True)(masked)
    encoded = LSTM(64, activation='relu', return_sequences=False)(encoded)
    repeated = tf.keras.layers.RepeatVector(input_shape[0])(encoded)
    decoded = LSTM(64, activation='relu', return_sequences=True)(repeated)
    decoded = LSTM(128, activation='relu', return_sequences=True)(decoded)
    decoded = TimeDistributed(Dense(input_shape[1], activation='sigmoid'))(decoded)
    autoencoder = Model(input_layer, decoded)
    autoencoder.compile(optimizer='adam', loss='mean_squared_error')
    return autoencoder

# Training the model
qari_dataset_directory = r"E:\quran\Hindi\Hindi_Translation_Splitter\pythonProject1\pythonProject1\qari_voice\qari-dataset"  # Adjust the path as needed
X = load_dataset(qari_dataset_directory)

print("Loaded dataset shape:", [x.shape for x in X])

if len(X) > 0:
    max_length = max(x.shape[1] for x in X)
    X_padded = pad_sequences(X, maxlen=max_length)
    input_shape = (X_padded.shape[1], X_padded.shape[2])
    autoencoder = create_sequence_autoencoder(input_shape)

    # Save the best model
    while True:
        try:
            checkpoint_path = input("Enter the path to save the model checkpoint (e.g., qari_autoencoder.keras): ")
            checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', save_best_only=True, mode='min')
            autoencoder.fit(X_padded, X_padded, epochs=10, batch_size=16, validation_split=0.2, callbacks=[checkpoint])
            if os.path.exists(checkpoint_path):
                print(f"Model checkpoint saved at: {checkpoint_path}")
                break
            else:
                raise Exception("Checkpoint not saved.")
        except Exception as e:
            print(f"Failed to save the model checkpoint at: {checkpoint_path}, error: {e}")

# Load the trained model
if os.path.exists(checkpoint_path):
    autoencoder = load_model(checkpoint_path)
    print("Model loaded successfully.")
else:
    print(f"Model checkpoint not found at: {checkpoint_path}")
    exit(1)

def preprocess_audio(file_path, sample_rate=22050, duration=4):
    mel_spec = extract_features(file_path, sample_rate, duration)
    if mel_spec is None:
        raise ValueError(f"Failed to extract features from {file_path}")
    return mel_spec

def pad_and_reshape(mel_spec, max_length):
    if mel_spec.shape[1] < max_length:
        pad_width = max_length - mel_spec.shape[1]
        mel_spec_padded = np.pad(mel_spec, ((0, 0), (0, pad_width)), mode='constant')
    else:
        mel_spec_padded = mel_spec[:, :max_length]
    return np.expand_dims(mel_spec_padded, axis=0)  # Reshape to match model input shape

# Example file to process
audio_file_path = r"E:\quran\Hindi\Hindi_Translation_Splitter\Output\114 MSTR.wav"

# Preprocess the audio
mel_spec = preprocess_audio(audio_file_path)
max_length = autoencoder.input_shape[1]
mel_spec_padded = pad_and_reshape(mel_spec, max_length)

# Predict using the autoencoder
output = autoencoder.predict(mel_spec_padded)

# Reshape and convert the output back to the original shape
output_mel_spec = output[0]

# Convert mel spectrogram back to audio
def mel_spec_to_audio(mel_spec, sample_rate=22050):
    mel_spec = librosa.db_to_power(mel_spec)
    audio = librosa.feature.inverse.mel_to_audio(mel_spec, sr=sample_rate)
    return audio

# Convert the output mel spectrogram back to audio
audio_without_qari_voice = mel_spec_to_audio(output_mel_spec)

# Save the audio without Qari voice
output_audio_path = r"E:\quran\Hindi\Hindi_Translation_Splitter\Output\Without_qari_output.wav"
os.makedirs(os.path.dirname(output_audio_path), exist_ok=True)
sf.write(output_audio_path, audio_without_qari_voice, 22050)
print(f"Processed audio saved at: {output_audio_path}")`

My model is not saving after training, what can I do ?, Guide me please ,after completing 10 epoch this code has to save the model, but it doesn't save the model after taking 1 hour of training...........After that I thought to use google colab for this, but I have large dataset containing more than 20000 files , and the size of that dataset is 5gb . so, i can't upload this dataset on google colab `


I tried to solve this issue by chatgpt4o 

Upvotes: 1

Views: 21

Answers (1)

saad sagheer
saad sagheer

Reputation: 11

The Problem :- Actually I have too much of Data And the code I wrote Is not compatible with that data due to which model was not saving because in Training session I was seeing NaN value which means model is not training correctly . To solve This issue I used For loop to train The model ...................................

Upvotes: 0

Related Questions