Reputation: 71
I am trying to implement text recognition with a live camera feed. I used the reference from Google's ML Kit example(https://github.com/googlesamples/mlkit/blob/master/ios/quickstarts/vision/VisionExample/CameraViewController.swift) but every time I attempt to start text recognition, I get a blank white screen instead of camera. I have already added the necessary camera permissions, so I don’t believe that’s the issue. Could anyone help me troubleshoot this problem? My code is below:
import UIKit
import SwiftUI
import AVFoundation
import MLKitTextRecognition
import MLKitVision
import FirebaseFirestore
import FirebaseCore
class ScannerViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
var codes: [String] = []
var bgColor: Binding<Color>!
var greenScans: Binding<Int>!
var scannedCode: Binding<String?>!
var audioPlayer: AVAudioPlayer?
var invalidAudioPlayer: AVAudioPlayer?
private var textRecognizer: TextRecognizer!
private var scanBuffer: [String] = []
private var previewLayer: AVCaptureVideoPreviewLayer!
private lazy var captureSession = AVCaptureSession()
private var isProcessingBuffer = false
private var isProcessingFrame = false
var isUsingFrontCamera = false
private let sessionQueue = DispatchQueue(label: "sessionQueue")
private var lastFrame: CMSampleBuffer?
private var cameraView: UIView!
private lazy var annotationOverlayView: UIView = {
precondition(isViewLoaded)
let annotationOverlayView = UIView(frame: .zero)
annotationOverlayView.translatesAutoresizingMaskIntoConstraints = false
return annotationOverlayView
}()
private lazy var previewOverlayView: UIImageView = {
precondition(isViewLoaded)
let previewOverlayView = UIImageView(frame: .zero)
previewOverlayView.contentMode = UIView.ContentMode.scaleAspectFill
previewOverlayView.translatesAutoresizingMaskIntoConstraints = false
return previewOverlayView
}()
override func viewDidLoad() {
super.viewDidLoad()
prepareAudioPlayer()
setupCameraView()
previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
previewLayer.videoGravity = .resizeAspectFill
let latinOptions = TextRecognizerOptions()
textRecognizer = TextRecognizer.textRecognizer(options: latinOptions)
setUpCaptureSessionOutput()
setUpCaptureSessionInput()
}
override func viewDidAppear(_ animated: Bool) {
super.viewDidAppear(animated)
startSession()
previewLayer.frame = cameraView.bounds
}
private func setupCameraView() {
cameraView = UIView()
cameraView.translatesAutoresizingMaskIntoConstraints = false
view.addSubview(cameraView)
// Set up constraints to make cameraView fill the screen or position as needed
NSLayoutConstraint.activate([
cameraView.leadingAnchor.constraint(equalTo: view.leadingAnchor),
cameraView.trailingAnchor.constraint(equalTo: view.trailingAnchor),
cameraView.topAnchor.constraint(equalTo: view.topAnchor),
cameraView.bottomAnchor.constraint(equalTo: view.bottomAnchor)
])
// Initialize the previewLayer after cameraView has been created
previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
previewLayer.videoGravity = .resizeAspectFill
previewLayer.frame = cameraView.bounds
cameraView.layer.addSublayer(previewLayer)
}
override func viewDidLayoutSubviews() {
super.viewDidLayoutSubviews()
// Ensure that previewLayer's frame matches cameraView's bounds
previewLayer.frame = cameraView.bounds
}
override func viewDidDisappear(_ animated: Bool) {
super.viewDidDisappear(animated)
stopSession()
}
private func setUpCaptureSessionOutput() {
weak var weakSelf = self
sessionQueue.async {
guard let strongSelf = weakSelf else {
print("Self is nil!")
return
}
strongSelf.captureSession.beginConfiguration()
strongSelf.captureSession.sessionPreset = .medium
let output = AVCaptureVideoDataOutput()
output.videoSettings = [
(kCVPixelBufferPixelFormatTypeKey as String): kCVPixelFormatType_32BGRA
]
output.alwaysDiscardsLateVideoFrames = true
let outputQueue = DispatchQueue(label: "videoDataOutputQueue")
output.setSampleBufferDelegate(strongSelf, queue: outputQueue)
guard strongSelf.captureSession.canAddOutput(output) else {
print("Failed to add capture session output.")
return
}
strongSelf.captureSession.addOutput(output)
strongSelf.captureSession.commitConfiguration()
}
}
private func setUpCaptureSessionInput() {
weak var weakSelf = self
sessionQueue.async {
guard let strongSelf = weakSelf else {
print("Self is nil!")
return
}
let cameraPosition: AVCaptureDevice.Position = strongSelf.isUsingFrontCamera ? .front : .back
guard let device = strongSelf.captureDevice(forPosition: cameraPosition) else {
print("Failed to get capture device for camera position: \(cameraPosition)")
return
}
do {
strongSelf.captureSession.beginConfiguration()
let currentInputs = strongSelf.captureSession.inputs
for input in currentInputs {
strongSelf.captureSession.removeInput(input)
}
let input = try AVCaptureDeviceInput(device: device)
guard strongSelf.captureSession.canAddInput(input) else {
print("Failed to add capture session input.")
return
}
strongSelf.captureSession.addInput(input)
strongSelf.captureSession.commitConfiguration()
} catch {
print("Failed to create capture device input: \(error.localizedDescription)")
}
}
}
private func captureDevice(forPosition position: AVCaptureDevice.Position) -> AVCaptureDevice? {
let devices = AVCaptureDevice.devices(for: .video)
return devices.first(where: { $0.position == position })
}
private func startSession() {
weak var weakSelf = self
sessionQueue.async {
guard let strongSelf = weakSelf else {
print("Self is nil!")
return
}
strongSelf.captureSession.startRunning()
}
}
private func stopSession() {
weak var weakSelf = self
sessionQueue.async {
guard let strongSelf = weakSelf else {
print("Self is nil!")
return
}
strongSelf.captureSession.stopRunning()
}
}
func captureOutput(
_ output: AVCaptureOutput,
didOutput sampleBuffer: CMSampleBuffer,
from connection: AVCaptureConnection
) {
guard let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
print("Failed to get image buffer from sample buffer.")
return
}
lastFrame = sampleBuffer
let visionImage = VisionImage(buffer: sampleBuffer)
let orientation = UIUtilities.imageOrientation(
fromDevicePosition: isUsingFrontCamera ? .front : .back
)
visionImage.orientation = orientation
guard let inputImage = MLImage(sampleBuffer: sampleBuffer) else {
print("Failed to create MLImage from sample buffer.")
return
}
inputImage.orientation = orientation
recognizeTextOnDevice(in: visionImage, width: CGFloat(CVPixelBufferGetWidth(imageBuffer)), height: CGFloat(CVPixelBufferGetHeight(imageBuffer)))
}
private func recognizeTextOnDevice(
in image: VisionImage, width: CGFloat, height: CGFloat
) {
var recognizedText: MLKitTextRecognition.Text?
let latinOptions = TextRecognizerOptions()
var detectionError: Error?
do {
recognizedText = try TextRecognizer.textRecognizer(options: latinOptions)
.results(in: image)
} catch let error {
detectionError = error
}
weak var weakSelf = self
DispatchQueue.main.sync {
guard let strongSelf = weakSelf else {
print("Self is nil!")
return
}
strongSelf.updatePreviewOverlayViewWithLastFrame()
if let detectionError = detectionError {
print("Failed to recognize text with error: \(detectionError.localizedDescription).")
return
}
guard let recognizedText = recognizedText else {
print("Text recognition returned no results.")
return
}
// Blocks.
for block in recognizedText.blocks {
let points = strongSelf.convertedPoints(
from: block.cornerPoints, width: width, height: height)
UIUtilities.addShape(
withPoints: points,
to: strongSelf.annotationOverlayView,
color: UIColor.purple
)
// Lines.
for line in block.lines {
let points = strongSelf.convertedPoints(
from: line.cornerPoints, width: width, height: height)
UIUtilities.addShape(
withPoints: points,
to: strongSelf.annotationOverlayView,
color: UIColor.orange
)
// Elements.
for element in line.elements {
let normalizedRect = CGRect(
x: element.frame.origin.x / width,
y: element.frame.origin.y / height,
width: element.frame.size.width / width,
height: element.frame.size.height / height
)
let convertedRect = strongSelf.previewLayer.layerRectConverted(
fromMetadataOutputRect: normalizedRect
)
UIUtilities.addRectangle(
convertedRect,
to: strongSelf.annotationOverlayView,
color: UIColor.green
)
let label = UILabel(frame: convertedRect)
label.text = element.text
label.adjustsFontSizeToFitWidth = true
strongSelf.rotate(label, orientation: image.orientation)
strongSelf.annotationOverlayView.addSubview(label)
}
}
}
}
}
private func removeDetectionAnnotations() {
for annotationView in annotationOverlayView.subviews {
annotationView.removeFromSuperview()
}
}
private func rotate(_ view: UIView, orientation: UIImage.Orientation) {
var degree: CGFloat = 0.0
switch orientation {
case .up, .upMirrored:
degree = 90.0
case .rightMirrored, .left:
degree = 180.0
case .down, .downMirrored:
degree = 270.0
case .leftMirrored, .right:
degree = 0.0
}
view.transform = CGAffineTransform.init(rotationAngle: degree * 3.141592654 / 180)
}
private func updatePreviewOverlayViewWithLastFrame() {
guard let lastFrame = lastFrame,
let imageBuffer = CMSampleBufferGetImageBuffer(lastFrame)
else {
return
}
self.updatePreviewOverlayViewWithImageBuffer(imageBuffer)
self.removeDetectionAnnotations()
}
private func updatePreviewOverlayViewWithImageBuffer(_ imageBuffer: CVImageBuffer?) {
guard let imageBuffer = imageBuffer else {
return
}
let orientation: UIImage.Orientation = isUsingFrontCamera ? .leftMirrored : .right
let image = UIUtilities.createUIImage(from: imageBuffer, orientation: orientation)
previewOverlayView.image = image
}
func processImage(_ image: UIImage) {
let visionImage = VisionImage(image: image)
textRecognizer.process(visionImage) { result, error in
guard error == nil, let result = result else {
print("Text recognition failed with error: \(error?.localizedDescription ?? "Unknown error")")
return
}
self.processRecognizedText(result)
}
}
}
Upvotes: 0
Views: 146