Jake3231
Jake3231

Reputation: 713

Vision + ARKit Object Tracking Returns No Results

I am working on an Vision app that incorporates ARKit for measuring distances. I've been running into an error in using Vision to track objects that causes the result coming from a VNSequenceRequestHandler to be empty. I will include the code for the view controller. Note that much of this is based off of sample code available online.

import UIKit
import SceneKit
import ARKit
import Vision

class ViewController: UIViewController, ARSCNViewDelegate {
    @IBOutlet weak var highlightView: UIView!
    var sceneSphere = SCNNode()

    @IBOutlet var sceneView: ARSCNView!
    private var lastObservation: VNDetectedObjectObservation?
    private let visionSequenceHandler = VNSequenceRequestHandler()

    override func viewDidLoad() {
        super.viewDidLoad()

        // Set the view's delegate
        sceneView.delegate = self
        // Show statistics such as fps and timing information
        sceneView.showsStatistics = true

        // Create a new scene
        let scene = SCNScene(named: "art.scnassets/VisionScene.scn")!

        // Set the scene to the view
        sceneView.scene = scene
    }

    override func viewWillAppear(_ animated: Bool) {
        super.viewWillAppear(animated)

        // Create a session configuration
        let configuration = ARWorldTrackingConfiguration()

        // Run the view's session
        sceneView.session.run(configuration)
        sceneSphere = (sceneView.scene.rootNode.childNode(withName: "sphere", recursively: false))!
        sceneSphere.scale = SCNVector3.init(0.5, 0.5, 0.5)
    }

    override func viewWillDisappear(_ animated: Bool) {
        super.viewWillDisappear(animated)

        // Pause the view's session
        sceneView.session.pause()
    }

    override func didReceiveMemoryWarning() {
        super.didReceiveMemoryWarning()
        // Release any cached data, images, etc that aren't in use.
    }

    // MARK: - ARSCNViewDelegate

/*
    // Override to create and configure nodes for anchors added to the view's session.
    func renderer(_ renderer: SCNSceneRenderer, nodeFor anchor: ARAnchor) -> SCNNode? {
        let node = SCNNode()

        return node
    }
*/
    @IBAction func userTapped(_ sender: UITapGestureRecognizer) {
        //print(sceneView.hitTest(sender.location(in: sceneView), types: ARHitTestResult.ResultType.featurePoint))
        print(sender.location(ofTouch: 0, in: view))
        let newRect = CGRect(x: (sender.location(ofTouch: 0, in: sceneView)).x, y: (sender.location(ofTouch: 0, in: sceneView)).y, width: 100, height: 100)
        highlightView.frame = newRect
        let newObservation = VNDetectedObjectObservation(boundingBox: newRect)
        sceneSphere.position.x = Float(newObservation.boundingBox.midX)
        sceneSphere.position.y = Float(newObservation.boundingBox.midY)
        self.lastObservation = newObservation
    }

    func session(_ session: ARSession, didFailWithError error: Error) {
        // Present an error message to the user

    }

    func sessionWasInterrupted(_ session: ARSession) {
        // Inform the user that the session has been interrupted, for example, by presenting an overlay

    }

    func sessionInterruptionEnded(_ session: ARSession) {
        // Reset tracking and/or remove existing anchors if consistent tracking is required

    }
}
extension ViewController: SCNSceneRendererDelegate {

    func renderer(_ renderer: SCNSceneRenderer, updateAtTime time: TimeInterval) {
        //print("updateFrame1")
        guard
            // make sure the pixel buffer can be converted
            let pixelBuffer = (sceneView.session.currentFrame?.capturedImage),
            // make sure that there is a previous observation we can feed into the request
            let lastObservation = self.lastObservation
            else {print("nolastrequest"); return }
        print("lO2:\(lastObservation)")
        print("lO3:\(lastObservation.boundingBox)")
        print(pixelBuffer)
        let request = VNTrackObjectRequest(detectedObjectObservation: lastObservation, completionHandler: self.handleVisionRequestUpdate)
        // set the accuracy to high
        // this is slower, but it works a lot better
        request.trackingLevel = .accurate
        do {
            print(pixelBuffer)
            print(request)
            try self.visionSequenceHandler.perform([request], on: pixelBuffer)
        } catch {
            print("Throws: \(error)")
        }
    }
    private func handleVisionRequestUpdate(_ request: VNRequest, error: Error?) {
        print("HandleVisionRequest")
        print(request.results?.first)
        print(request)
        // Dispatch to the main queue because we are touching non-atomic, non-thread safe properties of the view controller
        DispatchQueue.main.async {
            // make sure we have an actual result
            print("lO1:\(request.results?.first)")
            print("nORC: \(request.results)")
            //let newObservation = request.results?.first as! VNDetectedObjectObservation
            //print(newObservation.boundingBox.midX)
            //print(self.lastObservation?.boundingBox.midX)
            // prepare for next loop
            if request.results?.first != nil {
            self.lastObservation = newObservation
            }

            // check the confidence level before updating the UI
           /* guard newObservation.confidence >= 0.3 else {
                // hide the rectangle when we lose accuracy so the user knows something is wrong
                self.highlightView?.frame = .zero
                return
            }*/

            // calculate view rect
            //var transformedRect = newObservation.boundingBox
            //transformedRect.origin.y = 1 - transformedRect.origin.y
         //   let convertedRect = self.cameraLayer.layerRectConverted(fromMetadataOutputRect: transformedRect)
            /*// move the highlight view
            let xChange = ((self.highlightView?.frame.midX)!-convertedRect.midX)
            let yChange = ((self.highlightView?.frame.midY)!-convertedRect.midY)
            var inchChange = ((xChange + yChange)/72)
            inchChange = abs(round(inchChange*60))
            print(inchChange)
            self.speedLabal.text = "\(inchChange) In/sec"*/
            //self.highlightView?.frame = convertedRect
        }
    }

}

If I disable the line self.lastObservation = newObservation in handleVisionRequestUpdate this error does not occur, but the application does not function as intended.

Whenever the issue occurs, the result of print("nORC: \(request.results)") is nil. I can upload the entire project if that would be helpful. I would appreicate any feedback on this. Thank you in advance!

Upvotes: 1

Views: 1044

Answers (1)

SilentK
SilentK

Reputation: 652

Vision Framework uses a different coordinate system compared to UIKit. UIKit has its origin in the top left corner and the max width and height values are that of the screen size in points. Vision Framework, however, has the origin in the bottom left and the width and height have a max value of 1. This is the reason why your code is not working.

Convert your lastObservation.boundingBox property to UIKit coordinates before passing it to the VNTrackObjectRequest.

Upvotes: 1

Related Questions