Reputation: 123
I was wondering if it's possible to remove outliers from Raster dataset Data
> library (raster)
> ras <- raster("08_sa.tif")
> boxplot(ras)
> summary(ras)
08_sa.tif
Min. -6.010734e+17
1st Qu. -4.292327e+15
Median 3.456345e+15
3rd Qu. 5.913508e+15
Max. 3.954778e+17
NA's 0.000000e+00
> dput(ras)
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), offset = 0, gain = 1, inmemory = TRUE, fromdisk = FALSE,
isfactor = FALSE, attributes = list(), haveminmax = TRUE,
min = 213381595136, max = 395477771117133824, band = 1L,
unit = "", names = "Data"), legend = new(".RasterLegend",
type = character(0), values = logical(0), color = logical(0),
names = logical(0), colortable = logical(0)), title = character(0),
extent = new("Extent", xmin = 60.514678955, xmax = 97.416931152,
ymin = -0.701358795, ymax = 38.49804306), rotated = FALSE,
rotation = new(".Rotation", geotrans = numeric(0), transfun = function ()
NULL), ncols = 369L, nrows = 392L, crs = new("CRS", projargs = "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"),
history = list(), z = list())
> as.data.frame(ras,xy=TRUE) -> df.ras
> colnames(df.ras) <- c("x","y","value")
> df.ras$value[!df.ras$value %in% boxplot.stats(df.ras$value)$out] -> no.outliner
> boxplot(no.outliner)
> plot(no.outliner)
> summary(no.outliner)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
2.134e+11 3.315e+15 5.084e+15 4.936e+15 6.538e+15 1.145e+16 113153
Without outlier 'no.outlier' has lost it geographical location Plot of 'ras' before removing outliers
Upvotes: 0
Views: 1805
Reputation: 111
In all humility I'm suggesting this small function in Python since there is no programming language specified in the question.
Based on an histogram with nb_bins it identifies the bin_values (min and max outlier values) where consecutive amount (consecutive_low_bins) of low occurence (low_occurence_value) bins is encountered. It can slice values symetrically, only high values or only low values (slicing_type= 'symetric, 'high' or 'low'). Then it writes a new sliced raster (sliced).
The advantage over previous answers is that it can detect noise and truly outlying values which may not be identified with interquartile range or boxplot.
def remove_outliers_from_raster(raster, band, nan_values, nb_bins, consecutive_low_bins, low_occurence_value, slicing_type):
dataset=rasterio.open(raster)
band=dataset.read(band)
shape=band.shape
rave=np.ravel(band)
rave[rave == nan_values] = np.NAN
array=rave[~np.isnan(rave)]
bins, bin_values=np.histogram(array, bins=nb_bins)
cpt=0
compte=[]
for i in bins:
if i<=low_occurence_value:
cpt +=1
compte.append(cpt)
else:
cpt=0
compte.append(cpt)
compte=np.array(compte)
compte_min=compte[:math.ceil(len(compte)/2)]
compte_max=compte[math.floor(-len(compte)/2):]
try:
tresh_bin_min=np.where(compte_min>=consecutive_low_bins)[0][-1]
tresh_value_min=bin_values[tresh_bin_min]
except:
tresh_bin_min=0
tresh_value_min=-99999999
try:
tresh_bin_max=len(compte_min)+np.where(compte_max>=consecutive_low_bins)[0][0]-consecutive_low_bins
tresh_value_max=bin_values[tresh_bin_max]
except:
tresh_bin_max=0
tresh_value_max=99999999
print(tresh_bin_min, tresh_value_min, tresh_bin_max, tresh_value_max)
rave2=np.ravel(band)
if slicing_type == 'low':
rave2=np.where(rave2 > tresh_value_min, rave2, -3.2768e+04)
elif slicing_type == 'high':
rave2=np.where((rave2 < tresh_value_max), rave2, -3.2768e+04)
else:
rave2=np.where((rave2 < tresh_value_max) & (rave2 > tresh_value_min), rave2, -3.2768e+04)
band2=rave2.reshape(shape)
kwargs=dataset.meta
with rasterio.open(path, 'w', **kwargs) as dst:
dst.write_band(1, band2.astype(rasterio.float64))
Upvotes: 0
Reputation: 11
Try autooptimizer module
pip install autooptimizer
from autooptimizer.process import outlier_removal
Upvotes: -1
Reputation: 123
I look at the histogram and boxplot, noted the outliers range and deleted using reclassify. Thanks again @val your tip to reclassify is worth in r.
> x1 <- reclassify(ras, cbind(1.5e+16,Inf, NA), right=FALSE)
> spplot(x1)
> boxplot(x1)
> hist(x1)
Upvotes: 2
Reputation: 1898
Try this:
# load package raster
library(raster)
# load your data
ras <- raster("08_sa.tif")
# make a df
as.data.frame(ras,xy=TRUE) -> df.ras
# adjust col names
colnames(df.ras) <- c("x","y","value")
# remove outliner
df.ras$value[!df.ras$value %in% boxplot.stats(df.ras$value)$out] -> no.outliner
# draw boxplot without outliner
boxplot(no.outliner)
Upvotes: -1