Cooper
Cooper

Reputation: 140

How to compare pronouncation accuracy of a recoreded voice with an audio file already on device and return similarity percentage in android Kotlin

I'm trying to make an app that check accuracy percentage of pronunciation of users with an already recorded audio file , I have read about the subject and I got that the best way is to convert both audio files into MFCCs and then compare them using dynamic time warping (DTW) algorithm, but the results I get are always between 70 and 90 percent even if I recorded different sentences from the ones in the audio file in device, I've tried CosineDistance & EuclideanDistance algorithms, I also tried to normalize values for the MFCCs, but got nothing, still getting the same problem.

this is the class I use to check similarity percentage


class SimilarityChecker {
    private val sampleRate = 16000
    private val nMFCC = 13
    private val bufferSize = 512
    private val nFFT = 2048
    private val hopLength = 512
    private val nMels = 128

    fun percentage(file1Path: String, file2Path: String): Double
    {
        val mfcc1 = extractMFCCs(file1Path)
        val mfcc2 = extractMFCCs(file2Path)
        

        val percentage = calculateDTWDistance(mfcc1 , mfcc2)

        return (percentage * 100.0).roundToInt() / 100.0
    }


    private fun extractMFCCs(@NonNull path: String): MutableList<MutableList<Float>> {
        Log.i("path from Main:", path)
        var fileList = JLibrosa().loadAndRead(path, sampleRate, -1)

        val MFCC = JLibrosa().generateMFCCFeatures(fileList, sampleRate, this.nMFCC, nFFT, nMels, hopLength)
            .map { arr -> arr.toList() }


        return MFCC.map { d -> d.toMutableList() }.toMutableList()
    }


    private fun calculateDTWDistance(
        referenceFeatures: MutableList<MutableList<Float>>,
        studentFeatures: MutableList<MutableList<Float>>
    ) : Double {
        val dtw = DTW()
        val ldTW = io.github.cawfree.dtw.DTW()

        var distances: MutableList<Double> = mutableListOf()
        for (i in referenceFeatures.indices) {
            distances.add(
                FastDTW.compare(
                    TimeSeriesBase(
                        referenceFeatures[i].mapIndexed { index, value ->
                            TimeSeriesItem(
                                index.toDouble(),
                                TimeSeriesPoint(doubleArrayOf(value.toDouble()))
                            )
                        }
                    ),
                    TimeSeriesBase(
                        studentFeatures[i].mapIndexed { index, value ->
                            TimeSeriesItem(
                                index.toDouble(),
                                TimeSeriesPoint(doubleArrayOf(value.toDouble()))
                            )
                        }
                    ),
                    10, Distances.EUCLIDEAN_DISTANCE
                ).distance
            )

        }

        val distance = distances.average()


        return if (distances.max() == 0.0) 100.0 else ((distances.max() - distance) / distances.max()) * 100
        
    }

}

Upvotes: 1

Views: 142

Answers (0)

Related Questions