Marlene_
Marlene_

Reputation: 9

Outlier removing based on spectral signal in Google Earth Engine (GEE)

I want to classify a Sentinel-2 scene using Rf-Algorithm in GEE.

Therefore, I have some training data and want to remove outliers based on their reflectance in Band 4 and Band 8. I considered outliers based on 1.5 times the standard deviation. When trying to filter the dataset based on the thresholds, I get the same amount of polygons for band 4 (filtered_B4) and band 8 (filtered_B8), which doesnt make sense for me. The number of outliers seems realistic.

Therefore, I tried to remove the outliers based on the geometry. When adding them to the Map, the outliers are removed, but the dataset (urban_without_outliers) still has the same amount of polygons as the unfiltered dataset (urban) which does't make sense. Maybe someone had the same issue or any ideas what might be the problem.

This is my code:

``// Filter features for urban
var urban = sample.filter(ee.Filter.eq('class', 1));

// Get the number of polygons in urbanFiltered
var numPolygons_urban = urban.size();
// Print the number of polygons
print('Number of polygons in urban:', numPolygons_urban);

// use  Band 4 and Band 8 to identify outliers later 
// Select the bands
var band4 = medianImage.select('B4')
var band8 = medianImage.select('B8')
// combine two bands
var bands = band4.addBands(band8);

// Calculate overall median and standard deviation for bands 8 and 4
var overallStats = bands.reduceRegion({
  reducer: ee.Reducer.median().combine({
    reducer2: ee.Reducer.stdDev(),
    sharedInputs: true
  }),
  geometry: urban,
  scale: 10,
  bestEffort: true
});
print(overallStats, 'overallStats')

// Get the overall median and standard deviation
var overallMedianBand4 = overallStats.getNumber('B4_median');
var overallStdDevBand4 = overallStats.getNumber('B4_stdDev');
var overallMedianBand8 = overallStats.getNumber('B8_median');
var overallStdDevBand8 = overallStats.getNumber('B8_stdDev');


// Define the threshold for outliers (1.5 times the standard deviation)
// anything above or below will be removed
var thresholdB8 = ee.Number(overallStdDevBand8).multiply(1.5).add(overallMedianBand8);
var thresholdB8_minus = ee.Number(overallMedianBand8).subtract(overallStdDevBand8.multiply(1.5));
var thresholdB4 = ee.Number(overallStdDevBand4).multiply(1.5).add(overallMedianBand4);
var thresholdB4_minus = ee.Number(overallMedianBand4).subtract(overallStdDevBand4.multiply(1.5));

// Calculate mean and standard deviation for bands 8 and 4 for each training polygon in urban
var stats_poly = bands.reduceRegions({
  collection: urban,
  reducer: ee.Reducer.median().combine({
    reducer2: ee.Reducer.stdDev(),
    sharedInputs: true
  }),
  scale: 10// Include bands B8 and B4 as properties in the output
});
print(stats_poly.first());

//Apply the thresholds to identify outliers and filter urban 
var filtered_B8 = stats_poly.filter(
  ee.Filter.or(
    ee.Filter.lt('B8_median', thresholdB8),
    ee.Filter.gt('B8_median', thresholdB8_minus)
  ));
// Get the number of polygons in filtered_B8
var numPolygons_fil8 = filtered_B8.size();
print('Number of polygons in filtered_B8:', numPolygons_fil8);

// filter Band 4
var filtered_B4 = stats_poly.filter(
  ee.Filter.or(
    ee.Filter.lt('B4_median', thresholdB4),
    ee.Filter.gt('B4_median', thresholdB4_minus)
  ));

// Get the number of polygons in filtered_B4
var numPolygons_fil4 = filtered_B4.size();
// Print the number of polygons
print('Number of polygons in filtered_B4:', numPolygons_fil4);

//urban without outliers
var urbanFiltered = filtered_B8.merge(filtered_B4);

// Get the number of polygons in urbanFiltered
var numPolygons = urbanFiltered.size();
// Print the number of polygons
print('Number of polygons in urbanFiltered:', numPolygons);


// get outliers to check
var outliersB8 = stats_poly.filter(
  ee.Filter.or(
    ee.Filter.gte('B8_median', thresholdB8),
    ee.Filter.lte('B8_median', thresholdB8_minus)
  )
);
print(outliersB8, 'outliersB8')
// get number of outliers for Band 8
var numPolygons_outB8 = outliersB8.size();
// Print the number of polygons
print('Number of polygons in numPolygons_outB8:', numPolygons_outB8);

// outliers for Band 4
var outliersB4 = stats_poly.filter(
  ee.Filter.or(
    ee.Filter.gte('B4_median', thresholdB4),
    ee.Filter.lte('B4_median', thresholdB4_minus)
  )
);
// get number of outliers for Band 4
var numPolygons_outB4 = outliersB4.size();
// Print the number of polygons
print('Number of polygons in numPolygons_outB4:', numPolygons_outB4);

// add the outliers to the map just to check 
Map.addLayer(outliersB8, {color:'blue'}, 'outliersB8');
Map.addLayer(outliersB4, {color: 'green'}, 'outliersB4');


// different approach to get urban without outliers features//

// Convert outliers to a single multipolygon geometry
var outliersGeometry = outliersB8.geometry().union(outliersB4.geometry());
Map.addLayer(outliersGeometry, {}, 'outliersGeometry')

// Remove outliers from urban feature collection
var urban_without_outliers = urban.map(function(feature) {
  return feature.difference(outliersGeometry);
});
// add urban without outliers to map
Map.addLayer(urban_without_outliers, {}, 'urban_without_outliers')

// get number of features in urban without outliers
var numPolygons_urb_no_out = urban_without_outliers.size();
print('Number of polygons in numPolygons_urb_no_out:', numPolygons_urb_no_out);`

Number of polygons in urban:7827
Number of polygons in filtered_B8:7784
Number of polygons in filtered_B4:7784
Number of polygons in numPolygons_outB8:407
Number of polygons in numPolygons_outB4:126
Number of polygons in numPolygons_urb_no_out:7827

Upvotes: 0

Views: 113

Answers (0)

Related Questions