Reputation: 39
I tried to integrate a ARViewContainer which gets 3D coordinate from a 2D frame into a CoreML playground. Before integrating, both of them works: the ARViewContainter take a CGRect frame as input and then give the 3D coordinate of the middle point as output. The CoreML playground implemented a yolo model, frame the identified object. My expected behavior after integration is we can get the 3D coordinate of the identified object. But my preliminary result, is that the ARViewContainer works well but CoreML model seems didn't work.
My ARViewContainer is like
struct ARViewContainer: UIViewRepresentable {
@Binding var frameInput: CGRect? // Use Binding to trigger updates
func makeUIView(context: Context) -> ARSCNView {
let arView = ARSCNView()
let configuration = ARWorldTrackingConfiguration()
configuration.planeDetection = [.horizontal, .vertical]
arView.session.run(configuration)
arView.delegate = context.coordinator
let tapGesture = UITapGestureRecognizer(target: context.coordinator, action: #selector(Coordinator.handleTap))
arView.addGestureRecognizer(tapGesture)
return arView
}
func updateUIView(_ uiView: ARSCNView, context: Context) {
if let frame = frameInput {
context.coordinator.handleFrameInput(frame: frame, in: uiView)
}
}
func makeCoordinator() -> Coordinator {
return Coordinator()
}
class Coordinator: NSObject, ARSCNViewDelegate {
// Handle single tap
@objc func handleTap(recognizer: UITapGestureRecognizer) {
guard let view = recognizer.view as? ARSCNView else { return }
let location = recognizer.location(in: view)
performRaycast(at: [location], in: view)
print("Trigger Hand Input")
}
// Handle 2D frame input
func handleFrameInput(frame: CGRect, in view: ARSCNView) {
let samplePoints = generateSamplePoints(in: frame, sampleCount: 10)
performRaycast(at: samplePoints, in: view)
print("Trigger Fram Input")
}
// Perform raycasting for multiple points
private func performRaycast(at points: [CGPoint], in view: ARSCNView) {
for point in points {
guard let raycastQuery = view.raycastQuery(from: point, allowing: .estimatedPlane, alignment: .any),
let raycastResult = view.session.raycast(raycastQuery).first else {
print("No valid surface at point \(point)")
continue
}
let position = raycastResult.worldTransform.columns.3
print("3D position: (\(position.x), \(position.y), \(position.z))")
// Optional: Add ARAnchor at this position
let anchor = ARAnchor(transform: raycastResult.worldTransform)
view.session.add(anchor: anchor)
}
}
// Generate sample points within a given CGRect
private func generateSamplePoints(in frame: CGRect, sampleCount: Int) -> [CGPoint] {
print("Trigger Sample Generation")
var points = [CGPoint]()
let stepX = frame.width / CGFloat(sampleCount)
let stepY = frame.height / CGFloat(sampleCount)
for i in 0..<sampleCount {
for j in 0..<sampleCount {
let x = frame.minX + CGFloat(i) * stepX
let y = frame.minY + CGFloat(j) * stepY
points.append(CGPoint(x: x, y: y))
}
}
return points
}
// Render a visual marker for the anchor (optional)
func renderer(_ renderer: SCNSceneRenderer, didAdd node: SCNNode, for anchor: ARAnchor) {
let sphere = SCNSphere(radius: 0.02)
let sphereNode = SCNNode(geometry: sphere)
sphereNode.geometry?.firstMaterial?.diffuse.contents = UIColor.green
node.addChildNode(sphereNode)
}
}
}
The CoreML playground is attached https://drive.google.com/file/d/1PddUgvLvtUmCVX9z0T2X0p_zbZzDvitv/view?usp=sharing
My current method is described below:
since original CoreML playground mainly run ObjectDetectionView
(in ObjectDetectionView.swift
), like
import ARKit
import SwiftUI
struct ObjectDetectionView {
@State private var state = ObjectDetectionViewState()
@State private var session = ARSession()
private let configuration: AROrientationTrackingConfiguration = {
let configuration = AROrientationTrackingConfiguration()
return configuration
}()
private var imageResolution: CGSize { self.configuration.videoFormat.imageResolution }
private var cameraFPS: Double { Double(self.configuration.videoFormat.framesPerSecond) }
private func startSession() {
self.session.run(self.configuration)
}
private func stopSession() {
self.session.pause()
}
}
extension ObjectDetectionView: View {
var body: some View {
ZStack {
if self.state.isLoading {
HStack(spacing: 5) {
ProgressView()
Text("Loading a model...")
}
} else {
self.realtimePreview
}
}
.task {
self.session.delegate = self.state
try? await self.state.loadModel()
}
.onAppear {
self.startSession()
}
.onDisappear {
self.stopSession()
}
}
private var realtimePreview: some View {
ZStack {
ARViewContainer(session: self.session)
OverlayView(frameData: self.state.frameData, imageResolution: self.imageResolution)
}
.ignoresSafeArea()
}
}
Since it has an ARViewContainer basically do nothing special, I just replace it with my ARViewContainer, like following:
import ARKit
import SwiftUI
struct ObjectDetectionView {
@State private var frameInput: CGRect? = nil
@State private var state = ObjectDetectionViewState()
@State private var session = ARSession()
private let configuration: AROrientationTrackingConfiguration = {
let configuration = AROrientationTrackingConfiguration()
return configuration
}()
private var imageResolution: CGSize { self.configuration.videoFormat.imageResolution }
private var cameraFPS: Double { Double(self.configuration.videoFormat.framesPerSecond) }
private func startSession() {
self.session.run(self.configuration)
}
private func stopSession() {
self.session.pause()
}
}
extension ObjectDetectionView: View {
var body: some View {
ZStack {
if self.state.isLoading {
HStack(spacing: 5) {
ProgressView()
Text("Loading a model...")
}
} else {
self.realtimePreview
}
}
.task {
self.session.delegate = self.state
try? await self.state.loadModel()
}
.onAppear {
self.startSession()
}
.onDisappear {
self.stopSession()
}
.onChange(of: self.state.frameData) {
if let bbox = self.state.frameData?.detections.first?.bbox {
self.frameInput = bbox
}
}
}
private var realtimePreview: some View {
ZStack {
ARViewContainer(frameInput: $frameInput)
OverlayView(frameData: self.state.frameData, imageResolution: self.imageResolution)
}
.ignoresSafeArea()
}
}
The result is the ARViewContainer works well but CoreML model seems didn't work, no object identification anymore.
Appreciate any insights on how to resolve it. DDL is approaching, lol :)
Upvotes: 0
Views: 16