Reputation: 623
I've developed a Flutter app that records audio and sends it to a Python Flask API for classification. The API uses Librosa for feature extraction and a pre-trained ML model for audio classification. However, I'm experiencing inconsistencies in the predictions.
.wav
format with a bit rate of 16 kHz.code
Future<void> _startRecording() async {
try {
Record record = Record();
if (await record.hasPermission()) {
print("startRecording() hasPermission ");
Directory tempDir = await getTemporaryDirectory();
String tempPath = tempDir.path + '/audio.wav';
await record.start(path: tempPath);
setState(() {
_isRecording = true;
_audioPath = tempPath;
print("tempPath $tempPath");
});
print("Start Recording - _audioPath: $_audioPath");
}
} catch (e) {
print("startRecording() has no Permission");
print(e);
}
}
Future<void> _stopRecording() async {
try {
Record record = Record();
String? path = await record.stop();
if (path != null) {
setState(() {
_isRecording = false;
_audioPath = path;
}); // Call the upload method here
print("Stop Recording - _audioPath: $_audioPath");
}
} catch (e) {
print(e);
}
Timer(Duration(seconds: 1), () {
uploadAudio(File(_audioPath!), 'balla');
});
}
Future<void> uploadAudio(File audioFile, String inputWord) async {
var request = http.MultipartRequest('POST', Uri.parse('http://192.168.8.181:5000/predict'));
request.fields['input_word'] = inputWord;
request.files.add(http.MultipartFile.fromBytes('audio_file', await audioFile.readAsBytes(), filename: 'audio.wav'));
var response = await request.send();
if (response.statusCode == 200) {
var result = await http.Response.fromStream(response);
print('Result: ${result.body}');
var parsedJson = json.decode(result.body);
if (parsedJson['result'] == "Correct Answer") {
audioPlayer.dispose(); audioPlayer.pause();
Navigator.push(
context,
MaterialPageRoute(
builder: (context) => Correct(),
),
);
}
if (parsedJson['result'] == "Wrong Answer") {
audioPlayer.dispose(); audioPlayer.pause();
Navigator.push(
context,
MaterialPageRoute(
builder: (context) => InCorrect(),
),
);
}
} else {
print('Failed to upload audio');
}
}
Flask API
from flask import Flask, request, jsonify
from pydub import AudioSegment
import os
import librosa
import numpy as np
import joblib
import subprocess
import logging
app = Flask(__name__)
# Initialize logging
logging.basicConfig(filename='app.log', level=logging.INFO)
# Define max_length
max_length = 100
# Function to convert audio file bit rate
def convert_audio_bit_rate(audio_file_path, target_bit_rate=256000):
output_file_path = "converted_audio.wav"
try:
# Delete the existing converted file if it exists
if os.path.exists(output_file_path):
os.remove(output_file_path)
subprocess.run([
"ffmpeg",
"-i", audio_file_path,
"-ab", str(target_bit_rate),
output_file_path
])
except Exception as e:
logging.error(f"Error in converting audio: {e}")
return None
return output_file_path
def predict_class(audio_file_path):
try:
# Load the scaler, label encoder, and the model
scaler = joblib.load('scaler.pkl')
le = joblib.load('label_encoder.pkl')
model = joblib.load('Student_audio_model.pkl')
# Load the audio file
waveform, sample_rate = librosa.load(audio_file_path, sr=None)
# Feature extraction
mfcc = librosa.feature.mfcc(y=waveform, sr=sample_rate)
# Padding feature array to a fixed length
if mfcc.shape[1] < max_length:
pad_width = max_length - mfcc.shape[1]
mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
else:
mfcc = mfcc[:, :max_length]
# Reshaping and scaling
features = mfcc.reshape(1, -1)
features = scaler.transform(features)
# Prediction
predicted_class = model.predict(features)
# Convert integer label to original class label
predicted_label = le.inverse_transform(predicted_class)[0]
except Exception as e:
logging.error(f"Error in prediction: {e}")
return None
return predicted_label
@app.route('/predict', methods=['POST'])
def predict():
try:
# Get the audio file
audio_file = request.files["audio_file"]
# Save the audio file
audio_file_path = "uploaded_audio.wav"
audio_file.save(audio_file_path)
# Predict the class
predicted_class = predict_class(audio_file_path)
if predicted_class is None:
return jsonify({"result": "Error in prediction"}), 500
# Get the input_word
input_word = request.form["input_word"]
# Clean-up
# if os.path.exists(audio_file_path):
# os.remove(audio_file_path)
#if os.path.exists(converted_audio_file_path):
# os.remove(converted_audio_file_path)
if input_word == predicted_class:
return jsonify({"result": "Correct Answer"})
else:
return jsonify({"result": "Wrong Answer"})
except Exception as e:
logging.error(f"General error: {e}")
return jsonify({"result": "An error occurred"}), 500
@app.errorhandler(404)
def not_found(error):
return jsonify({"error": "Not Found"}), 404
@app.errorhandler(500)
def internal_error(error):
return jsonify({"error": "Internal Server Error"}), 500
if __name__ == '__main__':
app.run(host="0.0.0.0", port=5000)
How i solve this issue ?
Upvotes: 0
Views: 157
Reputation: 75636
You wrote your training data is 16 KHz, but I'd double check that. For example your code contains convert_audio_bit_rate()
with the bitrate of 256 kbps. What is that method for? If you wanted to convert the data before use (i.e. in predict()
) then you apparently forgot.
BTW: if you want to use convert_audio_bit_rate()
, then note the current implementation uses fixed value output_file_path = "converted_audio.wav"
as write target, which will cause problems when you get multiple simultaneous requests.
Upvotes: 0