Reputation: 11
I am creating a WebRTC streaming service using Swift.
I need the ability to record video while streaming, so I am saving the video using AVAssetWriter.
The problem is that if you leave the app on for a long time and continuously record, the audio and video are out of sync.
For example, in the initially recorded video, the audio and video are in sync, but if you leave the app on for a day and let it record, the audio and video come out together, but the audio comes out 1-2 seconds faster, then the video comes out, and the video ends. At some point, the video cuts out first, then the audio comes on for another 1-2 seconds and then ends. The longer I leave it on, the more out of sync it becomes. What am I doing wrong?
Video sampleBuffer is obtained by importing RTCVideoFrame
and converting it to CMSampleBuffer through the renderFrame
method of the RTCVideoRenderer protocol inside the WebRTC (v122.0.0) library.
And since there is no way to obtain audio sampleBuffer within the library, the audio sampleBuffer is used to save the video using the AVCaptureAudioDataOutputSampleBufferDelegate protocol with the sampleBuffer of the func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection)
method.
extension FrameRenderer: RTCVideoRenderer {
func setSize(_ size: CGSize) {
// print("=================== \(#function) size: \(size) ===================")
}
func renderFrame(_ frame: RTCVideoFrame?) {
guard let videoFrame = frame,
let videoPixelBuffer = convertRTCVideoFrameToPixelBuffer(videoFrame),
let videoBuffer = createSampleBuffer(pixelBuffer: videoPixelBuffer, frame: videoFrame)
else {
Log.debug("=================== \(#function) Failed to convert videoPixelBuffer or videoBuffer ===================")
return
}
let frame = calculateSNR(videoBuffer)
frameSubject.onNext(frame)
switch captureStatus {
case .idle:
return
case .start:
Log.debug("recording start")
setAudioCaptureSession()
setAssetWriter(sampleBuffer: videoBuffer)
case .capturing:
appendSampleBuffer(sampleBuffer: videoBuffer, isVideo: true)
case .end:
Observable<Int>
.just(1)
.subscribe(onNext: { [weak self] _ in
guard let self = self else { return }
self.finishRecording()
})
.disposed(by: disposeBag)
}
}
private func calculateSNR(_ sampleBuffer: CMSampleBuffer) -> Double {
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
return 0
}
let attachments = pixelBuffer.attachments
let propagated = attachments.propagated
guard let metadata = propagated["MetadataDictionary"] as? [String: Any] else {
print("There is no matadata from sampleBuffer")
return 0
}
guard let snr = metadata["SNR"] as? Double else {
print("can't cast snr's type")
return 0
}
return snr
}
/// get pixel data from RTCVideoFrame
func convertRTCVideoFrameToPixelBuffer(_ rtcVideoFrame: RTCVideoFrame) -> CVPixelBuffer? {
guard let rtcCVPixelBuffer = rtcVideoFrame.buffer as? RTCCVPixelBuffer else {
return nil
}
return rtcCVPixelBuffer.pixelBuffer
}
func createSampleBuffer(pixelBuffer: CVPixelBuffer, frame: RTCVideoFrame) -> CMSampleBuffer? {
var sampleBuffer: CMSampleBuffer?
let rtcTimeStamp = frame.timeStampNs
let timescale: CMTimeScale = 1_000_000_000
let presentationTimeStamp = CMTime(value: rtcTimeStamp, timescale: timescale)
let duration = CMTime.zero
var timingInfo = CMSampleTimingInfo(duration: duration,
presentationTimeStamp: presentationTimeStamp,
decodeTimeStamp: presentationTimeStamp)
var videoInfo: CMVideoFormatDescription?
CMVideoFormatDescriptionCreateForImageBuffer(allocator: kCFAllocatorDefault,
imageBuffer: pixelBuffer,
formatDescriptionOut: &videoInfo)
CMSampleBufferCreateReadyWithImageBuffer(allocator: kCFAllocatorDefault,
imageBuffer: pixelBuffer,
formatDescription: videoInfo!,
sampleTiming: &timingInfo,
sampleBufferOut: &sampleBuffer)
return sampleBuffer
}
}
// start
private func setAudioCaptureSession() {
if self.captureSession == nil {
self.captureSession = AVCaptureSession()
self.captureSession?.beginConfiguration()
let audioDevice = AVCaptureDevice.default(for: .audio)
let audioOutput = AVCaptureAudioDataOutput()
do {
let audioInput = try AVCaptureDeviceInput(device: audioDevice!)
if self.captureSession!.canAddInput(audioInput) {
self.captureSession?.addInput(audioInput)
}
} catch {
ErrorReportManager.shared.log(
message: "\(self)_\(#function): \(DGError.DGInternalError.audioInputCreateFail.messageDescription) \n reason: \(error.localizedDescription)"
)
}
guard self.captureSession!.canAddOutput(audioOutput) else { return }
self.captureSession?.addOutput(audioOutput)
audioOutput.setSampleBufferDelegate(self, queue: audioQueue)
self.captureSession?.commitConfiguration()
}
if self.captureSession?.isRunning == false {
self.captureSession?.startRunning()
}
}
// capturing
private func appendSampleBuffer(sampleBuffer: CMSampleBuffer, isVideo: Bool) {
videoQueue.async {
if isVideo {
if self.isStartedSession == true {
if self.assetWriterVideoInput?.isReadyForMoreMediaData == true {
Log.info("Video Buffer: \(sampleBuffer.presentationTimeStamp.seconds)")
self.assetWriterVideoInput?.append(sampleBuffer)
}
}
} else {
if self.assetWriterAudioInput?.isReadyForMoreMediaData == true {
if self.isStartedSession == false {
self.assetWriter?.startSession(atSourceTime: sampleBuffer.presentationTimeStamp)
self.isStartedSession = true
}
self.assetWriterAudioInput?.append(sampleBuffer)
Log.network("Audio Buffer: \(sampleBuffer.presentationTimeStamp.seconds)")
}
}
}
}
// Get audio sampleBuffer from AVCaptureAudioDataOutputSampleBufferDelegate
extension FrameRenderer: AVCaptureAudioDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
switch captureStatus {
case .capturing:
appendSampleBuffer(sampleBuffer: sampleBuffer, isVideo: false)
default:
return
}
}
}
// end
func finishRecording() {
guard assetWriter?.status != .unknown else { return }
assetWriterVideoInput?.markAsFinished()
assetWriterAudioInput?.markAsFinished()
assetWriter?.finishWriting {
Log.debug("finish recording")
}
self.isStartedSession = false
self.assetWriter = nil
self.assetWriterVideoInput = nil
self.assetWriterAudioInput = nil
self.captureStatus = .idle
if self.captureSession?.isRunning == true {
self.captureSession?.stopRunning()
}
remainTime = FrameRenderer.maximumRecordingTime
isMoved = false
self.videoDuration = videoDurationCount
isManualRecording = false
uploadVideo(self.fileURL!, self.fileName, isManualRecording: isManualRecording)
}
Below is the sampleBuffer.presentationTimeStamp.seconds
log of audio and video when out of sync.
- firstTime recording log
start
Audio Buffer: 607505.8207029478
Video Buffer: 607504.618464
Audio Buffer: 607505.8420181406
Audio Buffer: 607505.8633560091
Audio Buffer: 607505.8846938775
Video Buffer: 607504.667967
end
Video Buffer: 607556.333763
Audio Buffer: 607557.5753514739
Audio Buffer: 607557.5966893424
Video Buffer: 607556.375038
Audio Buffer: 607557.6180272109
Audio Buffer: 607557.6393650793
- a day later
start
Audio Buffer: 627125.3272789116
Audio Buffer: 627125.3485941043
Video Buffer: 627123.609696
Audio Buffer: 627125.3699319728
Audio Buffer: 627125.3912698412
Video Buffer: 627123.642925
end
Audio Buffer: 627215.0126077097
Audio Buffer: 627215.0339455783
Video Buffer: 627213.269289
Audio Buffer: 627215.055260771
Video Buffer: 627213.302609
Audio Buffer: 627215.0765986395
Audio Buffer: 627215.0979365079
Upvotes: 0
Views: 72