Ibroad
Ibroad

Reputation: 39

Having trouble integrating a ARViewContainer into a CoreML playground

I tried to integrate a ARViewContainer which gets 3D coordinate from a 2D frame into a CoreML playground. Before integrating, both of them works: the ARViewContainter take a CGRect frame as input and then give the 3D coordinate of the middle point as output. The CoreML playground implemented a yolo model, frame the identified object. My expected behavior after integration is we can get the 3D coordinate of the identified object. But my preliminary result, is that the ARViewContainer works well but CoreML model seems didn't work.

My ARViewContainer is like

struct ARViewContainer: UIViewRepresentable {

    @Binding var frameInput: CGRect? // Use Binding to trigger updates

    func makeUIView(context: Context) -> ARSCNView {
        let arView = ARSCNView()
        let configuration = ARWorldTrackingConfiguration()
        configuration.planeDetection = [.horizontal, .vertical]
        arView.session.run(configuration)
        arView.delegate = context.coordinator

        let tapGesture = UITapGestureRecognizer(target: context.coordinator, action: #selector(Coordinator.handleTap))
        arView.addGestureRecognizer(tapGesture)

        return arView
    }

    func updateUIView(_ uiView: ARSCNView, context: Context) {
        if let frame = frameInput {
            context.coordinator.handleFrameInput(frame: frame, in: uiView)
        }
    }

    func makeCoordinator() -> Coordinator {
        return Coordinator()
    }

    class Coordinator: NSObject, ARSCNViewDelegate {
        // Handle single tap
        @objc func handleTap(recognizer: UITapGestureRecognizer) {
            guard let view = recognizer.view as? ARSCNView else { return }
            let location = recognizer.location(in: view)
            performRaycast(at: [location], in: view)
            print("Trigger Hand Input")

        }

        // Handle 2D frame input
        func handleFrameInput(frame: CGRect, in view: ARSCNView) {
            let samplePoints = generateSamplePoints(in: frame, sampleCount: 10)
            performRaycast(at: samplePoints, in: view)
            print("Trigger Fram Input")
        }

        // Perform raycasting for multiple points
        private func performRaycast(at points: [CGPoint], in view: ARSCNView) {
            for point in points {
                guard let raycastQuery = view.raycastQuery(from: point, allowing: .estimatedPlane, alignment: .any),
                      let raycastResult = view.session.raycast(raycastQuery).first else {
                    print("No valid surface at point \(point)")
                    continue
                }

                let position = raycastResult.worldTransform.columns.3
                print("3D position: (\(position.x), \(position.y), \(position.z))")

                // Optional: Add ARAnchor at this position
                let anchor = ARAnchor(transform: raycastResult.worldTransform)
                view.session.add(anchor: anchor)
            }
        }

        // Generate sample points within a given CGRect
        private func generateSamplePoints(in frame: CGRect, sampleCount: Int) -> [CGPoint] {
            print("Trigger Sample Generation")

            var points = [CGPoint]()
            let stepX = frame.width / CGFloat(sampleCount)
            let stepY = frame.height / CGFloat(sampleCount)

            for i in 0..<sampleCount {
                for j in 0..<sampleCount {
                    let x = frame.minX + CGFloat(i) * stepX
                    let y = frame.minY + CGFloat(j) * stepY
                    points.append(CGPoint(x: x, y: y))
                }
            }
            return points
        }

        // Render a visual marker for the anchor (optional)
        func renderer(_ renderer: SCNSceneRenderer, didAdd node: SCNNode, for anchor: ARAnchor) {
            let sphere = SCNSphere(radius: 0.02)
            let sphereNode = SCNNode(geometry: sphere)
            sphereNode.geometry?.firstMaterial?.diffuse.contents = UIColor.green
            node.addChildNode(sphereNode)
        }
    }
}

The CoreML playground is attached https://drive.google.com/file/d/1PddUgvLvtUmCVX9z0T2X0p_zbZzDvitv/view?usp=sharing

My current method is described below:

since original CoreML playground mainly run ObjectDetectionView (in ObjectDetectionView.swift), like

import ARKit
import SwiftUI

struct ObjectDetectionView {
    @State private var state = ObjectDetectionViewState()
    @State private var session = ARSession()
    private let configuration: AROrientationTrackingConfiguration = {
        let configuration = AROrientationTrackingConfiguration()
        return configuration
    }()

    private var imageResolution: CGSize { self.configuration.videoFormat.imageResolution }
    private var cameraFPS: Double { Double(self.configuration.videoFormat.framesPerSecond) }

    private func startSession() {
        self.session.run(self.configuration)
    }

    private func stopSession() {
        self.session.pause()
    }
}

extension ObjectDetectionView: View {
    var body: some View {
        ZStack {
            if self.state.isLoading {
                HStack(spacing: 5) {
                    ProgressView()
                    Text("Loading a model...")
                }
            } else {
                self.realtimePreview
            }
        }
        .task {
            self.session.delegate = self.state
            try? await self.state.loadModel()
        }
        .onAppear {
            self.startSession()
        }
        .onDisappear {
            self.stopSession()
        }
    }

    private var realtimePreview: some View {
        ZStack {
            ARViewContainer(session: self.session)
            OverlayView(frameData: self.state.frameData, imageResolution: self.imageResolution)
        }
        .ignoresSafeArea()
    }
}

Since it has an ARViewContainer basically do nothing special, I just replace it with my ARViewContainer, like following:

import ARKit
import SwiftUI

struct ObjectDetectionView {
    @State private var frameInput: CGRect? = nil
    @State private var state = ObjectDetectionViewState()
    @State private var session = ARSession()
    private let configuration: AROrientationTrackingConfiguration = {
        let configuration = AROrientationTrackingConfiguration()
        return configuration
    }()

    private var imageResolution: CGSize { self.configuration.videoFormat.imageResolution }
    private var cameraFPS: Double { Double(self.configuration.videoFormat.framesPerSecond) }

    private func startSession() {
        self.session.run(self.configuration)
    }

    private func stopSession() {
        self.session.pause()
    }
}    

extension ObjectDetectionView: View {
    var body: some View {
        ZStack {
            if self.state.isLoading {
                HStack(spacing: 5) {
                    ProgressView()
                    Text("Loading a model...")
                }
            } else {
                self.realtimePreview
            }
        }
        .task {
            self.session.delegate = self.state
            try? await self.state.loadModel()
        }
        .onAppear {
            self.startSession()
        }
        .onDisappear {
            self.stopSession()
        }
        .onChange(of: self.state.frameData) {  
            if let bbox = self.state.frameData?.detections.first?.bbox {
                self.frameInput = bbox
        }
    }
    }

    private var realtimePreview: some View {
        ZStack {
            ARViewContainer(frameInput: $frameInput)
            OverlayView(frameData: self.state.frameData, imageResolution: self.imageResolution)
        }
        .ignoresSafeArea()
    }
}

The result is the ARViewContainer works well but CoreML model seems didn't work, no object identification anymore.

Appreciate any insights on how to resolve it. DDL is approaching, lol :)

Upvotes: 0

Views: 16

Answers (0)

Related Questions