Reputation: 140
I'm trying to make an app that check accuracy percentage of pronunciation of users with an already recorded audio file , I have read about the subject and I got that the best way is to convert both audio files into MFCCs and then compare them using dynamic time warping (DTW) algorithm, but the results I get are always between 70 and 90 percent even if I recorded different sentences from the ones in the audio file in device, I've tried CosineDistance & EuclideanDistance algorithms, I also tried to normalize values for the MFCCs, but got nothing, still getting the same problem.
this is the class I use to check similarity percentage
class SimilarityChecker {
private val sampleRate = 16000
private val nMFCC = 13
private val bufferSize = 512
private val nFFT = 2048
private val hopLength = 512
private val nMels = 128
fun percentage(file1Path: String, file2Path: String): Double
{
val mfcc1 = extractMFCCs(file1Path)
val mfcc2 = extractMFCCs(file2Path)
val percentage = calculateDTWDistance(mfcc1 , mfcc2)
return (percentage * 100.0).roundToInt() / 100.0
}
private fun extractMFCCs(@NonNull path: String): MutableList<MutableList<Float>> {
Log.i("path from Main:", path)
var fileList = JLibrosa().loadAndRead(path, sampleRate, -1)
val MFCC = JLibrosa().generateMFCCFeatures(fileList, sampleRate, this.nMFCC, nFFT, nMels, hopLength)
.map { arr -> arr.toList() }
return MFCC.map { d -> d.toMutableList() }.toMutableList()
}
private fun calculateDTWDistance(
referenceFeatures: MutableList<MutableList<Float>>,
studentFeatures: MutableList<MutableList<Float>>
) : Double {
val dtw = DTW()
val ldTW = io.github.cawfree.dtw.DTW()
var distances: MutableList<Double> = mutableListOf()
for (i in referenceFeatures.indices) {
distances.add(
FastDTW.compare(
TimeSeriesBase(
referenceFeatures[i].mapIndexed { index, value ->
TimeSeriesItem(
index.toDouble(),
TimeSeriesPoint(doubleArrayOf(value.toDouble()))
)
}
),
TimeSeriesBase(
studentFeatures[i].mapIndexed { index, value ->
TimeSeriesItem(
index.toDouble(),
TimeSeriesPoint(doubleArrayOf(value.toDouble()))
)
}
),
10, Distances.EUCLIDEAN_DISTANCE
).distance
)
}
val distance = distances.average()
return if (distances.max() == 0.0) 100.0 else ((distances.max() - distance) / distances.max()) * 100
}
}
Upvotes: 1
Views: 142