Purple_Ad
Purple_Ad

Reputation: 87

Parallel processing lidar point clouds to find best fit plane

I want to calculate surface roughness of lidar point cloud . For that i need to find best fit plane of the neighbors for each point and calculate the distance from the plane to the point. This process is taking long period of time when the radius is increased. I tried to use concurrent futures but this made it more slow than the code without parallel processing.

import numpy as np
import laspy
from concurrent.futures import ProcessPoolExecutor
from scipy.spatial import cKDTree as KDTree
from skspatial.objects import Plane

def process_point(point_idx, point, lidar_data, neighborhood_radius, tree):
    neighbor_indices = tree.query_ball_point(point, neighborhood_radius)
    neighbors = lidar_data[neighbor_indices]
    
    if neighbors.shape[0] >= 3:
        plane = Plane.best_fit(neighbors)
        # Distance calculation remains the same
        # Return roughness value and index
        return point_idx, calculate_distance_to_plane(point, plane)
    else:
        return point_idx, np.nan

# Parallel processing wrapper function
def calculate_roughness_parallel(lidar_data, neighborhood_radius, tree):
    roughness_values = np.zeros(len(lidar_data))
    with ProcessPoolExecutor() as executor:
        futures = [executor.submit(process_point, point_idx, point, lidar_data, neighborhood_radius, tree)
                   for point_idx, point in enumerate(lidar_data)]
        for future in futures:
            point_idx, roughness = future.result()
            roughness_values[point_idx] = roughness
    return roughness_values


if __name__ == "__main__":
    las = laspy.read("las0.laz")
    in_point = np.vstack((las.x, las.y, las.z)).transpose()
    tree = KDTree(in_point)
    roughness_result = calculate_roughness_parallel(in_point, 1, tree)

Is there any other way to make this faster?

Upvotes: 0

Views: 107

Answers (1)

SomeDude
SomeDude

Reputation: 26

import numpy as np
from concurrent.futures import ProcessPoolExecutor
from scipy.spatial import cKDTree as KDTree
from skspatial.objects import Plane

def process_batch(batch, lidar_data, neighborhood_radius, tree):
    results = []
    for point_idx, point in batch:
        neighbor_indices = tree.query_ball_point(point, neighborhood_radius)
        neighbors = lidar_data[neighbor_indices]

        if neighbors.shape[0] >= 3:
            plane = Plane.best_fit(neighbors)
            distance = calculate_distance_to_plane(point, plane)
            results.append((point_idx, distance))
        else:
            results.append((point_idx, np.nan))
    return results

def calculate_roughness_parallel(lidar_data, neighborhood_radius, tree, batch_size=100):
    roughness_values = np.zeros(len(lidar_data))
    batches = [(lidar_data[i:i + batch_size], neighborhood_radius, tree) 
               for i in range(0, len(lidar_data), batch_size)]
    
    with ProcessPoolExecutor() as executor:
        futures = [executor.submit(process_batch, [(idx, point) for idx, point in enumerate(batch)], 
                                   lidar_data, neighborhood_radius, tree) 
                   for batch in batches]
                   
        for future in futures:
            for point_idx, roughness in future.result():
                roughness_values[point_idx] = roughness
    
    return roughness_values

if __name__ == "__main__":
    las = laspy.read("path/to/your/lasfile.las")
    lidar_data = np.vstack((las.x, las.y, las.z)).transpose()
    tree = KDTree(lidar_data)
    roughness_result = calculate_roughness_parallel(lidar_data, 1, tree)
  • The process_batch function now takes a batch of points and returns a list of results. Each element in the list corresponds to a tuple containing the point index and the calculated roughness (or np.nan if the conditions aren't met).
  • The main function now divides the input data into batches and submits each batch as a separate task to the executor. This significantly reduces the overhead associated with task submission and management.
  • I've used a placeholder function calculate_distance_to_plane, assuming you have this function defined elsewhere.
  • The optimal batch size (batch_size=100) is something you would need to experiment with.

Upvotes: 0

Related Questions