AudioKit down sample audio

Question

I have some existing code which uses the AVAudioEngine to take input from the microphone, downsample it and write it to a AVAudioFile

internal func setupNodeChain() {
    guard let audioEngine = audioEngine else { return } // Fatal error ?
    
    let engineInputNode = audioEngine.inputNode
    
    let bus = 0
    let engineInputNodeFormat = engineInputNode.outputFormat(forBus: bus)
    
    // This attempts to down sample the audio from the microphone
    let downSampleMixerNode = AVAudioMixerNode()
    let mixerOutputFormat = AVAudioFormat(standardFormatWithSampleRate: 8000, channels: 1)
    
    // Input -> (volume) -> down sample -> (volume) -> Output
    
    let inputVolumeMixerNode = AVAudioMixerNode()
    inputVolumeMixerNode.volume = Float(10 * microphoneVolume)
    
    audioEngine.attach(inputVolumeMixerNode)
    audioEngine.attach(downSampleMixerNode)
    
    self.downSampleMixerNode = downSampleMixerNode
    self.inputVolumeMixerNode = inputVolumeMixerNode
    
    let silenceNode = AVAudioMixerNode()
    silenceNode.outputVolume = 0
    
    self.silenceNode = silenceNode
    
    audioEngine.connect(engineInputNode, to: inputVolumeMixerNode, format: engineInputNodeFormat)
    audioEngine.connect(inputVolumeMixerNode, to: downSampleMixerNode, format: engineInputNodeFormat)
    
    // Try and stop the microphone audio from going through to the speaker
    audioEngine.attach(silenceNode)
    audioEngine.connect(downSampleMixerNode, to: silenceNode, format: mixerOutputFormat)
    audioEngine.connect(silenceNode, to: audioEngine.outputNode, format: mixerOutputFormat)

    downSampleMixerNode.installTap(onBus: bus, bufferSize: 1024 * 16, format: mixerOutputFormat) { (buffer: AVAudioPCMBuffer, time: AVAudioTime) in
        guard let tap = self.audioTap else { return }
        // Write buffer to AVAudioFile          
        tap.drip(buffer: buffer, time: time)
    }
}

This, mostly, works but I'm investigating replacing it with AudioKit but I'm having issues, I don't know how to create a mechanism to downsample the audio from the microphone to the recorder.

    AKSettings.enableEchoCancellation = true
    AKSettings.allowAirPlay = true
    AKSettings.useBluetooth = true
    
    do {
        try AKSettings.setSession(category: .playAndRecord,
                                                            with: [
                                                                .allowBluetoothA2DP,
        ])
        
        AKSettings.defaultToSpeaker = true
        
        let audioFile = try self.makeAudioFile(named: "Recording")
        
        let mixerOutputFormat = AVAudioFormat(standardFormatWithSampleRate: 8000, channels: 1)!

        let microphone = AKMicrophone()
        let microphoneBooster = AKBooster(microphone)
        microphoneBooster.gain = 0
        
        let recorder = try AKNodeRecorder(node: microphoneBooster)
        //recorder.recordFormat = mixerOutputFormat
        
        let silence = AKMixer(microphoneBooster)
        silence.volume = 0
        
        self.microphone = microphone
        self.microphoneBooster = microphoneBooster
        self.recorder = recorder
        self.silence = silence
        
        AKManager.output = silence
        
        log(debug: "Start")
        try AKManager.start()
        
        log(debug: "Record")
        try recorder.record()

        DispatchQueue.main.async {
            self.state = .recording
            self.plot?.node = microphone
            self.callButton.setImage(#imageLiteral(resourceName: "EndCall"), for: [])
        }
    } catch let error {
        log(error: "Failed to establish play and record session: \(error)")
    }

So, the question is - how would I go about creating a "down sampling" node/workflow, which would link the microphone to the "node" with the "default" format and the link the "node" to then next node in the chain with desired AVAudioFormat?

Microphone -> Down sample (default format)

Down sample -> Next node (target format) -> recorder

MadProgrammer · Accepted Answer

Essentially, I had to create my own "tap" to tap into the data

First, I had a "converter". This basically takes audio coming from another mixer (via a "tap") converts it to a target format and writes it out to an audio file

class TapConverter: NodeTapperDelegate {
    
    let audioConfig: AudioConfig
    
    internal var inputFormat: AVAudioFormat?
    internal var converter: AVAudioConverter?
    
    var onError: ((Error) -> Void)?
    
    init(audioConfig: AudioConfig) {
        self.audioConfig = audioConfig
    }
    
    func open(format: AVAudioFormat) throws {
        inputFormat = format
        converter = AVAudioConverter(from: format, to: audioConfig.audioFormat)
    }
    
    func drip(buffer: AVAudioPCMBuffer, time: AVAudioTime) {
        guard let converter = converter else {
            return
        }
        guard let inputFormat = inputFormat else {
            return
        }
        
        let inputBufferSize = inputFormat.sampleRate
        let sampleRateRatio = inputBufferSize / audioConfig.audioFormat.sampleRate
        let capacity = Int(Double(buffer.frameCapacity) / sampleRateRatio)
        
        let bufferPCM16 = AVAudioPCMBuffer(pcmFormat: audioConfig.audioFormat, frameCapacity: AVAudioFrameCount(capacity))!
        var error: NSError? = nil

        converter.convert(to: bufferPCM16, error: &error) { inNumPackets, outStatus in
            outStatus.pointee = AVAudioConverterInputStatus.haveData
            return buffer
        }
        if let error = error {
            // Handle error in someway
        } else {
            let audioFile = audioConfig.audioFile
            do {
                log(debug: "Write buffer")
                try audioFile.write(from: bufferPCM16)
            } catch let error {
                log(error: "Failed to write buffer to audio file: \(error)")
                onError?(error)
            }
        }
    }
    
    func close() {
        converter = nil
        inputFormat = nil
        // 🤞 we close the audio file
    }
}

AudioConfig is just a basic placeholder, it contains the audioFile which is been written to (must already be created) and the target AVAudioFormat

struct AudioConfig {
    let url: URL
    let audioFile: AVAudioFile
    let audioFormat: AVAudioFormat
}

Creation might look something like...

let settings: [String: Any] = [
    AVFormatIDKey: NSNumber(value: kAudioFormatMPEG4AAC),
    AVSampleRateKey: NSNumber(value: 8000),
    AVNumberOfChannelsKey: NSNumber(value: 1),
    AVEncoderBitRatePerChannelKey: NSNumber(value: 16),
    AVEncoderAudioQualityKey: NSNumber(value: AVAudioQuality.min.rawValue)
]
let audioFile = try AVAudioFile(forWriting: sourceURL, settings: settings)

let audioConfig = AudioConfig(url: sourceURL, audioFile: audioFile, audioFormat: audioFormat)

From there, I needed a way to tap the node (get it's data) and pass it onto my converter, for that, I used something like...

import Foundation
import AudioKit

protocol NodeTapperDelegate: class {
    func open(format: AVAudioFormat) throws
    func drip(buffer: AVAudioPCMBuffer, time: AVAudioTime)
    func close()
}

class NodeTapper: NSObject {
    // MARK: - Properties
    
    // The node we record from
    private(set) var node: AKNode?
    
    /// True if we are recording.
    @objc private(set) dynamic var isTapping = false
    
    /// The bus to install the recording tap on. Default is 0.
    private var bus: Int = 0
    
    /// Used for fixing recordings being truncated
    private var recordBufferDuration: Double = 16_384 / AKSettings.sampleRate
    
    weak var delegate: NodeTapperDelegate?
    
    // MARK: - Initialization
    
    /// Initialize the node recorder
    ///
    /// Recording buffer size is defaulted to be AKSettings.bufferLength
    /// You can set a different value by setting an AKSettings.recordingBufferLength
    ///
    /// - Parameters:
    ///   - node: Node to record from
    ///   - bus: Integer index of the bus to use
    ///
    @objc init(node: AKNode? = AKManager.output,
                         bus: Int = 0) throws {
        self.bus = bus
        self.node = node
    }
    
    // MARK: - Methods
    
    /// Start recording
    @objc func start() throws {
        if isTapping == true {
            return
        }
        
        guard let node = node else {
            return
        }
        
        guard let delegate = delegate else {
            return
        }
        
        let bufferLength: AVAudioFrameCount = AKSettings.recordingBufferLength.samplesCount
        isTapping = true
        
        // Note: if you install a tap on a bus that already has a tap it will crash your application.
        let nodeFormat = node.avAudioNode.outputFormat(forBus: 0)
        try delegate.open(format: nodeFormat)

        // note, format should be nil as per the documentation for installTap:
        // "If non-nil, attempts to apply this as the format of the specified output bus. This should
        // only be done when attaching to an output bus which is not connected to another node"
        // In most cases AudioKit nodes will be attached to something else.
        node.avAudioUnitOrNode.installTap(onBus: bus,
                                                                            bufferSize: bufferLength,
                                                                            format: nil, // Might need to the input node's format :/
                                                                            block: process(buffer:time:))
    }
    
    private func process(buffer: AVAudioPCMBuffer, time: AVAudioTime) {
        guard let sink = delegate else { return }
        sink.drip(buffer: buffer, time: time)
    }
    
    /// Stop recording
    @objc func stop() {
        if isTapping == false {
            return
        }
        
        isTapping = false
        
        if AKSettings.fixTruncatedRecordings {
            //  delay before stopping so the recording is not truncated.
            let delay = UInt32(recordBufferDuration * 1_000_000)
            usleep(delay)
        }
        node?.avAudioUnitOrNode.removeTap(onBus: bus)
        delegate?.close()
    }
}

And then, somehow, bind it altogether

let microphone = AKMicrophone()
microphone?.volume = 10 * volume

let monoToStereo = AKStereoFieldLimiter(microphone, amount: 1)
let microphoneMixer = AKMixer(monoToStereo)

// This is where we're converting the audio from
// the microphone and dripping it into the audio file
let converter = TapConverter(audioConfig: audioConfig)
// handleError is basically just a func in this case
converter.onError = handleError
// Here we tap the mixer/node and output to the converter
let tapper = try NodeTapper(node: microphoneMixer)
tapper.delegate = converter

// Silence the output from the microphone, so it's not
// fed back into the microphone
let silence = AKMixer(microphoneMixer)
silence.volume = 0

self.microphoneMixer = microphoneMixer
self.converter = converter
self.tapper = tapper
self.microphone = microphone
self.silence = silence

AKManager.output = silence

log(debug: "Start")
try AKManager.start()

log(debug: "Record")
try tapper.start()

So much of this came from scraps of different ideas from different posts around the web, so is it the best option? I don't know, but it does what I need it to do

AudioKit down sample audio

Answers (1)

Related Questions