Reputation: 336
So let's say I have a point cloud of data in the form of Z = f(X, Y)
The problem is that I have millions of points, with data that is extremely fine in some (X,Y) regions and extremely sparse in other regions.
Ideally the interpolated solution needs to be continuous, and as smooth as possible. The application is for finite element analysis.
I've tried:
I'm thinking the best way is some sort of hacked up combination of KDTree average nearest nodes and then some sort of interpolation for far away points, but I'm thinking that interpolating millions of points ought to be solved problem...
Anybody have any good ideas on what to do?
Upvotes: 1
Views: 2396
Reputation: 336
So to interpolate arbitrarily large point clouds I wrote a piece of code to partition data into smaller chunks. It's not the best piece of code but will be available for those too lazy to write their own.
import scipy.interpolate
from scipy.interpolate import griddata
from scipy.spatial.qhull import QhullError
class Interp2P(object):
"""
Reconstruction of interpolation for 2d applications.
This class is used to avoid any memory errors due to interpolation
of large numbers of points.
Built for use for extremely large point clouds. Interpolation
is partitioned into automatic control parameters px, py, pe, blockpts.
The scipy implementation of interpolation functions has memory problems
for large point clouds. This class divides the problem into several
smaller partitions.
Parameters
----------
points : array shape (a, 2)
table of point coordinates describing z = f(x,y) where
- column 0 = x
- column 1 = y
values : array of shape (a, b)
Corresponding values z = f(x, y)
values may possibly have multiple columns,
depending on the interpolator kind used.
kind : str
Interpolation method. Can be
- 'nearest'
- 'linear'
- 'cubic'
px : int or None
Number of partitions in x-direction. If None, a default is calculated
according to the number of blockpts
py : int or None
Number of partitions in y-direction. If None, a default is calculated
according to the number of blockpts.
pe : scalar
Proportion of block length to overlap on other blocks.
For example, if pe=0.25, the block will be extended 25% on both the
left and right sides of px to overlap on successive blocks.
blockpts : int
Approximate number of interpolation points within each partition block.
Defaults to 300*300. blockpts is used to automatically size either
px or py if these are set to None.
"""
def __init__(self, points, values, kind='linear',
px = None, py = None, pe = 0.5, blockpts = 300*300,
**kwargs):
points = np.array(points)
self.x = points[:, 0]
self.y = points[:, 1]
self.z = np.array(values)
self.points = points
self.values = np.array(self.z)
self.kind = kind
self.kwargs = kwargs
self.px = px
self.py = py
self.pe = pe
self.blockpts = blockpts
self._set_partitions()
return
def _set_partitions(self):
""" Calculate the number of partitions to use in data set"""
ptnum = len(self.x)
blockpts = self.blockpts
blocknum = ptnum / blockpts + 1
if self.px is None:
if self.py is None:
self.px = int(np.sqrt(blocknum))
self.py = int(blocknum / self.px)
else:
self.px = int(blocknum / self.py)
if self.py is None:
self.py = int(blocknum / self.px)
self.px = max(self.px, 1)
self.py = max(self.py, 1)
self.xmax = np.max(self.x)
self.xmin = np.min(self.x)
self.xlen = self.xmax - self.xmin
self.xp = self.xlen / self.px # block x length
self.xe = self.xp * self.pe # block x overlap length
self.ymax = np.max(self.y)
self.ymin = np.min(self.y)
self.ylen = self.ymax - self.ymin
self.yp = self.ylen / self.py # block y length
self.ye = self.yp * self.pe # block y overlap length
xfudge = (self.xmax - self.xmin) / 1000.
yfudge = (self.ymax - self.ymin) / 1000.
# Construct block upper/lower limits
xl = self.xmin - xfudge
xu = self.xmax + xfudge
yl = self.ymin - yfudge
yu = self.ymax + yfudge
# Construct blocks
self.xblocks = np.linspace(xl, xu, self.px + 1)
self.yblocks = np.linspace(yl, yu, self.py + 1)
return
def _choose_block(self, x, y):
"""
Calculate which interpolation block to use for the given
coordinates (x, y)
Returns
--------
xindex : int array of shape (N,)
index locations for x-dimension of blocks
yindex : int array of shape (N,)
index locations for y-dimension of blocks
"""
xindex = np.searchsorted(self.xblocks, x) - 1
yindex = np.searchsorted(self.yblocks, y) - 1
return xindex, yindex
@lazy_property
def _template_interp(self):
"""
Construct template interpolator function based on kind
"""
if self.kind == 'linear':
template = scipy.interpolate.LinearNDInterpolator
elif self.kind == 'cubic':
template = scipy.interpolate.CloughTocher2DInterpolator
elif self.kind == 'nearest':
template = scipy.interpolate.NearestNDInterpolator
elif self.kind == 'rbf':
template = Rbf_wrapper
# def func1(points, values, **kwargs):
# args = np.column_stack((points, values))
# f = scipy.interpolate.Rbf(args, **kwargs)
# return f
# template = func1
return template
@lazy_property
def _interpolators(self):
"""
Construct interpolators for every block.
- 0 dimension corresponds to x data.
- 1 dimension corresponds to y data.
"""
# Bounds of block interpolation points
xl_arr = self.xblocks[0:-1] - self.xe
xu_arr = self.xblocks[1:] + self.xe
yl_arr = self.yblocks[0:-1] - self.ye
yu_arr = self.yblocks[1:] + self.ye
# Loop through all block boundaries and construct interpolators.
interpolators = []
for (xl, xu) in zip(xl_arr, xu_arr):
interpx = []
for (yl, yu) in zip(yl_arr, yu_arr):
#Set original data partition
ix0 = np.logical_and(xl <= self.x, self.x <= xu)
iy0 = np.logical_and(yl <= self.y, self.y <= yu)
index1 = np.logical_and(ix0, iy0)
x0 = self.x[index1]
y0 = self.y[index1]
z0 = self.z[index1]
points = np.column_stack((x0, y0))
try:
interp1 = self._template_interp(points, z0, **self.kwargs)
interpx.append(interp1)
except ValueError:
interpx.append(None)
interpolators.append(interpx)
return interpolators
def interpolate(self, x, y):
"""Interpolate points.
Parameters
----------
x : array of shape (m,)
x-coordinates of desired points to interpolate
y : array of shape (m,)
y-coordinates of desired points to interpolate
Returns
-------
values : array of shape (m, n)
interpolated values of points.
"""
x = np.atleast_1d(x)
y = np.atleast_1d(y)
xlen = len(x)
# Property shape the result
shape = list(self.z.shape)
shape[0] = xlen
result = np.empty(shape)
result[:] = np.nan
# Loop through all block boundaries and send points to the block's
# corresponding interpolator.
xindex, yindex = self._choose_block(x, y)
for ix in range(self.px):
for iy in range(self.py):
index1 = xindex == ix
index2 = yindex == iy
index = np.logical_and(index1, index2)
interp = self._interpolators[ix][iy]
points = np.column_stack((x[index], y[index]))
if len(points) > 0:
result[index] = interp(points)
return result
def __call__(self, points):
"""
Interpolate in the style of LinearNDInterpolator.
Parameters
----------
points : array of shape (m, 2)
coordinates of x (column 0) and y (column 1).
Returns
-------
values : array of shape (m, n)
interpolated values of points.
"""
points = np.atleast_2d(points)
x = points[:, 0]
y = points[:, 1]
return self.interpolate(x, y)
def lazy_property(fn):
"""
Version of lazy_property by John Huang.
Decorator used to cache property results into dictionary.
The cache can be clered using clean_lazy_properties.
"""
cache_name = _data_holder_attr
attr_name = fn.__name__
def get_cache(instance):
if not hasattr(instance, cache_name):
setattr(instance, cache_name, {})
return getattr(instance, cache_name)
@property
@wraps(fn)
def get_attr(self):
cache = get_cache(self)
if attr_name not in cache:
cache[attr_name] = fn(self)
return cache[attr_name]
return get_attr
Upvotes: 3