daniel
daniel

Reputation: 966

How do I convert an audio mp3 file to audio type raw in iOS using Swift?

What is the most effective way to convert an audio mp3 file to audio type raw with the following characteristics in iOS using Swift?

single-channel (monaural)
little-endian
unheadered
16-bit signed
PCM
sampled at 16000 Hz

Upvotes: 4

Views: 2448

Answers (1)

Arashk
Arashk

Reputation: 627

I don't know this will help or this is what you after.

There is a solution related to this question related to what you after here

I modified the answer for what you after, I'm not expert in this but I try my best

import AVFoundation

public final class AVAudioFileConverter {

  var rwAudioSerializationQueue: DispatchQueue!
  var asset:AVAsset!
  var assetReader:AVAssetReader!
  var assetReaderAudioOutput:AVAssetReaderTrackOutput!
  var assetWriter:AVAssetWriter!
  var assetWriterAudioInput:AVAssetWriterInput!
  var outputURL:URL
  var inputURL:URL

    public init?(inputFileURL: URL, outputFileURL: URL) {
    inputURL = inputFileURL
    outputURL = outputFileURL

    if (FileManager.default.fileExists(atPath: inputURL.absoluteString)) {
      print("Input file does not exist at file path \(inputURL.absoluteString)")
      return nil
    }
  }

  public func convert() {
    let rwAudioSerializationQueueDescription = " rw audio serialization queue"
    // Create the serialization queue to use for reading and writing the audio data.
    rwAudioSerializationQueue = DispatchQueue(label: rwAudioSerializationQueueDescription)
    assert(rwAudioSerializationQueue != nil, "Failed to initialize Dispatch Queue")

    asset = AVAsset(url: inputURL)
    assert(asset != nil, "Error creating AVAsset from input URL")
    print("Output file path -> ", outputURL.absoluteString)

    asset.loadValuesAsynchronously(forKeys: ["tracks"], completionHandler: {
      var success = true
      var localError:NSError?
      success = (self.asset.statusOfValue(forKey: "tracks", error: &localError) == AVKeyValueStatus.loaded)
      // Check for success of loading the assets tracks.
      if (success) {
        // If the tracks loaded successfully, make sure that no file exists at the output path for the asset writer.
        let fm = FileManager.default
        let localOutputPath = self.outputURL.path
        if (fm.fileExists(atPath: localOutputPath)) {
          do {
            try fm.removeItem(atPath: localOutputPath)
            success = true
          } catch {
            print("Error trying to remove output file at path -> \(localOutputPath)")
          }
        }
      }

      if (success) {
        success = self.setupAssetReaderAndAssetWriter()
      } else {
        print("Failed setting up Asset Reader and Writer")
      }
      if (success) {
        success = self.startAssetReaderAndWriter()
        return
      } else {
        print("Failed to start Asset Reader and Writer")
      }

    })
  }

  func setupAssetReaderAndAssetWriter() -> Bool {
    do {
      assetReader = try AVAssetReader(asset: asset)
    } catch {
      print("Error Creating AVAssetReader")
    }

    do {
      assetWriter = try AVAssetWriter(outputURL: outputURL, fileType: AVFileType.wav)
    } catch {
      print("Error Creating AVAssetWriter")
    }

    var assetAudioTrack:AVAssetTrack? = nil
    let audioTracks = asset.tracks(withMediaType: AVMediaType.audio)

    if (audioTracks.count > 0) {
      assetAudioTrack = audioTracks[0]
    }

    if (assetAudioTrack != nil) {

      let decompressionAudioSettings:[String : Any] = [
        AVFormatIDKey:Int(kAudioFormatLinearPCM)
      ]

      assetReaderAudioOutput = AVAssetReaderTrackOutput(track: assetAudioTrack!, outputSettings: decompressionAudioSettings)
      assert(assetReaderAudioOutput != nil, "Failed to initialize AVAssetReaderTrackOutout")
      assetReader.add(assetReaderAudioOutput)

      var channelLayout = AudioChannelLayout()
      memset(&channelLayout, 0, MemoryLayout<AudioChannelLayout>.size);
      channelLayout.mChannelLayoutTag = kAudioChannelLayoutTag_Stereo;

        let outputSettings:[String : Any] = [
            AVFormatIDKey: Int(kAudioFormatLinearPCM),
            AVSampleRateKey: 44100,
            AVNumberOfChannelsKey: 2,
            AVChannelLayoutKey: NSData(bytes:&channelLayout, length:  MemoryLayout.size(ofValue: AudioChannelLayout.self)),
            AVLinearPCMBitDepthKey: 16,
            AVLinearPCMIsNonInterleaved: false,
            AVLinearPCMIsFloatKey: false,
            AVLinearPCMIsBigEndianKey: false,

        ]

      assetWriterAudioInput = AVAssetWriterInput(mediaType: AVMediaType.audio, outputSettings: outputSettings)
      assert(rwAudioSerializationQueue != nil, "Failed to initialize AVAssetWriterInput")
      assetWriter.add(assetWriterAudioInput)

    }
    print("Finsihed Setup of AVAssetReader and AVAssetWriter")
    return true
  }

  func startAssetReaderAndWriter() -> Bool {
    print("STARTING ASSET WRITER")
    assetWriter.startWriting()
    assetReader.startReading()
    assetWriter.startSession(atSourceTime: CMTime.zero)

    assetWriterAudioInput.requestMediaDataWhenReady(on: rwAudioSerializationQueue, using: {

      while(self.assetWriterAudioInput.isReadyForMoreMediaData ) {
        var sampleBuffer = self.assetReaderAudioOutput.copyNextSampleBuffer()
        if(sampleBuffer != nil) {
          self.assetWriterAudioInput.append(sampleBuffer!)
          sampleBuffer = nil
        } else {
          self.assetWriterAudioInput.markAsFinished()
          self.assetReader.cancelReading()
          self.assetWriter.finishWriting {
            print("Asset Writer Finished Writing")
          }
          break
        }
      }
    })
    return true
  }
}

usage

let inputFilePath = URL(fileURLWithPath: "/path/to/file.mp3")
let outputFileURL = URL(fileURLWithPath: "/path/to/output/output.wav")

if let audioConverter = AVAudioFileConverter(inputFileURL: inputFilePath, 
    outputFileURL: outputFileURL) {
    audioConverter.convert()
}

Input file info

File:           /path/12.mp3
File type ID:   MPG3
Num Tracks:     1
----
Data format:     2 ch,  44100 Hz, '.mp3' (0x00000000) 0 bits/channel, 0 bytes/packet, 1152 frames/packet, 0 bytes/frame
                no channel layout.
estimated duration: 1.149375 sec
audio bytes: 18390
audio packets: 44
bit rate: 128000 bits per second
packet size upper bound: 1052
maximum packet size: 418
audio data file offset: 417
optimized
audio 48510 valid frames + 576 priming + 1602 remainder = 50688

Output file info

File type ID:   WAVE
Num Tracks:     1
----
Data format:     2 ch,  44100 Hz, 'lpcm' (0x0000000C) 16-bit little-endian signed integer
                no channel layout.
estimated duration: 1.136327 sec
audio bytes: 200448
audio packets: 50112
bit rate: 1411200 bits per second
packet size upper bound: 4
maximum packet size: 4
audio data file offset: 4096
optimized
source bit depth: I16

and this the Info from output file from metadata2go

File Name: output.wav
File Size: 200 kB
File Type: WAV
File Type Extension: wav
MimeType: audio/x-wav
Encoding: Microsoft PCM
Num Channels: 2
Sample Rate: 44100
Avg Bytes Per Sec: 176400
Bits Per Sample: 16
Duration: 1.16 s
Category: audio

If you want to change the settings just change outputSettings this portion

let outputSettings:[String : Any] = [
    AVFormatIDKey: Int(kAudioFormatLinearPCM),
    AVSampleRateKey: 44100,
    AVNumberOfChannelsKey: 2,
    AVChannelLayoutKey: NSData(bytes:&channelLayout, length:  MemoryLayout.size(ofValue: AudioChannelLayout.self)),
    AVLinearPCMBitDepthKey: 16,
    AVLinearPCMIsNonInterleaved: false,
    AVLinearPCMIsFloatKey: false,
    AVLinearPCMIsBigEndianKey: false,
]

I hope this will help.

Upvotes: 8

Related Questions