Reputation: 11
I am trying to mark field boundaries by meta's segment anything pretrained model from a given lat-lon input.
My approach is capturing a screenshot from the google map using pyppeteer or google API, then feeding the image to SAM vit h, then find contours from the output mask and finally converting the pixel coordinates of the contours to geocoordinates using some formula provided by chatgpt.
For checking the accuracy I create a kml file with the geocoordinates generated and import to my maps google.
The results are inaccurate. Though the image output of SAM is correct. But there is some problem in pixel to lat-lon conversion or my approach. I may need to use georeferencing. I don't know how to do that accurately.
Any help or suggestions? I am also providing the code with one of my approaches
import pandas as pd
from pyproj import CRS, Transformer
from PIL import Image, ImageDraw
import asyncio
from pyppeteer import launch
import torch
import cv2
import numpy as np
import segment_anything
from segment_anything import sam_model_registry, SamPredictor
from google.colab.patches import cv2_imshow
import nest_asyncio
from shapely.geometry import Polygon, MultiPolygon
from shapely.wkt import loads as load_wkt
# Apply nest_asyncio to allow nested event loops
nest_asyncio.apply()
# Initialize projections
wgs84 = CRS.from_epsg(4326)
web_mercator = CRS.from_epsg(3857)
transformer_to_mercator = Transformer.from_crs(wgs84, web_mercator, always_xy=True)
transformer_to_wgs84 = Transformer.from_crs(web_mercator, wgs84, always_xy=True)
def latlon_to_meters(lat, lon):
mx, my = transformer_to_mercator.transform(lon, lat)
return mx, my
def meters_to_latlon(mx, my):
lon, lat = transformer_to_wgs84.transform(mx, my)
return lat, lon
def latlon_to_pixels(lat, lon, zoom, center_lat, center_lon, orig_width, orig_height):
"""
Convert latitude and longitude to pixel coordinates relative to the center of the image.
"""
mx_center, my_center = latlon_to_meters(center_lat, center_lon)
mx, my = latlon_to_meters(lat, lon)
# Calculate resolution at the given zoom level
res = 256 * (2 ** zoom) / (2 * 20037508.34)
# Calculate pixel coordinates
px = (mx - mx_center) * res + orig_width / 2
py = orig_height / 2 - (my - my_center) * res
return px, py
def pixels_to_latlon(px, py, zoom, center_lat, center_lon, orig_width, orig_height):
"""
Convert pixel coordinates to latitude and longitude relative to the center of the image.
"""
mx_center, my_center = latlon_to_meters(center_lat, center_lon)
# Calculate resolution at the given zoom level
res = 256 * (2 ** zoom) / (2 * 20037508.34)
# Convert pixel coordinates to meters
mx = (px - orig_width / 2) / res + mx_center
my = my_center - (py - orig_height / 2) / res
# Convert back to lat/lon
lat, lon = meters_to_latlon(mx, my)
return lat, lon
async def screenshot_google_map(center_lat, center_lon, zoom=19, file_path="map_screenshot.jpg"):
"""
Take a screenshot of Google Maps centered at (center_lat, center_lon) with a specific zoom level.
"""
browser = await launch(headless=True, args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu', '--disable-dev-shm-usage'])
page = await browser.newPage()
await page.setViewport({'width': 1024, 'height': 768})
url = f"https://www.google.com/maps/@{center_lat},{center_lon},{zoom}z/data=!3m1!1e3"
await page.goto(url)
await asyncio.sleep(5) # Adjust the delay as necessary
await page.screenshot({'path': file_path, 'fullPage': True})
await browser.close()
return file_path
def run_sam_model(image_path, sam_checkpoint, model_type="vit_h", simplification_tolerance=1.0):
# Load SAM Model
device = "cuda" if torch.cuda.is_available() else "cpu"
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device)
predictor = SamPredictor(sam)
# Load Image
image = cv2.imread(image_path)
height, width, _ = image.shape
# Calculate Multiple Click Points
base_x = width // 2
base_y = height // 2
input_points = np.array([
[base_x, base_y],
# Add additional points if needed
])
# Predict Mask with SAM (using multiple points)
predictor.set_image(image)
input_labels = np.ones(input_points.shape[0]) # All points are foreground
masks, _, _ = predictor.predict(
point_coords=input_points,
point_labels=input_labels,
multimask_output=True,
)
# Extract Boundary Contour
mask = masks[0].astype(np.uint8)
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Represent Contour as Polygon
if contours:
polygon = contours[0].squeeze().tolist()
polygon_shapely = Polygon(polygon)
# Simplify the polygon
simplified_polygon = polygon_shapely.simplify(simplification_tolerance, preserve_topology=True)
simplified_polygon_coords = list(simplified_polygon.exterior.coords)
else:
simplified_polygon_coords = []
# (Optional) Visualize Result
if simplified_polygon_coords:
boundary_image = image.copy()
cv2.drawContours(boundary_image, [np.array(simplified_polygon_coords, dtype=np.int32)], -1, (0, 0, 255), 2)
cv2_imshow(boundary_image)
else:
print("Warning: No contours found in the mask.")
return simplified_polygon_coords
def calculate_iou(polygon1, multipolygon_wkt):
# Convert predicted polygon to a Shapely Polygon
poly1 = Polygon(polygon1)
# Convert WKT multipolygon to a Shapely MultiPolygon
multipolygon = load_wkt(multipolygon_wkt)
# Calculate the intersection and union
intersection_area = poly1.intersection(multipolygon).area
union_area = poly1.union(multipolygon).area
# Calculate IoU
iou = intersection_area / union_area if union_area != 0 else 0
return iou
def draw_polygon(image, polygon, color):
draw = ImageDraw.Draw(image)
if polygon:
for i in range(len(polygon) - 1):
draw.line([polygon[i], polygon[i + 1]], fill=color, width=3)
draw.line([polygon[-1], polygon[0]], fill=color, width=3)
async def process_coordinates(xlsx_path, sam_checkpoint):
df = pd.read_excel(xlsx_path)
for index, row in df.iterrows():
center_lat = row['Lat']
center_lon = row['Long']
multipolygon_wkt = row['wkt_geom']
print(f"Processing coordinates: {center_lat}, {center_lon}")
try:
# Scrape image
zoom_level = 18.4 # Ensure this is the same as used in conversion
image_path = await screenshot_google_map(center_lat, center_lon, zoom=zoom_level)
# Run SAM model with simplification
simplification_tolerance = 1.0 # Adjust this value as needed
polygon = run_sam_model(image_path, sam_checkpoint, simplification_tolerance=simplification_tolerance)
if polygon:
# Convert polygon pixel coordinates to geo-coordinates
geo_polygon = [pixels_to_latlon(px, py, zoom_level, center_lat, center_lon, 1024, 768) for px, py in polygon]
# Debug: Print the geo-coordinates of the polygon
print(f"Geo Polygon Coordinates: {geo_polygon}")
# Accuracy Check: Validate known coordinates
known_coords = [(22.804861, 81.955284)] # Example known coordinate
for lat, lon in known_coords:
px, py = latlon_to_pixels(lat, lon, zoom_level, center_lat, center_lon, 1024, 768)
print(f"Pixel coordinates for known lat/lon ({lat}, {lon}): ({px}, {py})")
iou = calculate_iou(geo_polygon, multipolygon_wkt)
print(f"IoU for coordinates {center_lat}, {center_lon}: {iou}")
# Load the original image again to draw polygons
image = Image.open(image_path).convert("RGB")
# Convert geo_polygon back to pixel coordinates for drawing
geo_polygon_px = [latlon_to_pixels(lat, lon, zoom_level, center_lat, center_lon, 1024, 768) for lat, lon in geo_polygon]
draw_polygon(image, geo_polygon_px, 'blue')
# Draw the ground truth multipolygon
multipolygon = load_wkt(multipolygon_wkt)
for poly in multipolygon.geoms:
multipolygon_px = [latlon_to_pixels(lat, lon, zoom_level, center_lat, center_lon, 1024, 768) for lon, lat in poly.exterior.coords]
draw_polygon(image, multipolygon_px, 'red')
# Convert the PIL image to an OpenCV image
open_cv_image = np.array(image)
open_cv_image = open_cv_image[:, :, ::-1].copy() # Convert RGB to BGR
# Display the image with polygons
cv2_imshow(open_cv_image)
else:
print(f"No valid polygon found for the coordinates: {center_lat}, {center_lon}")
except Exception as e:
print(f"Error processing coordinates {center_lat}, {center_lon}: {e}")
# Example usage
xlsx_path = "/content/Single_test.xlsx" # Replace with your xlsx file path
sam_checkpoint = "/content/drive/MyDrive/Colab Notebooks/sam_vit_h_4b8939.pth" # Replace with your SAM checkpoint path
# Run the async function within an existing event loop
await process_coordinates(xlsx_path, sam_checkpoint)
The shape is not exactly overlapping on the map.
The georeference and lat-lon conversion needs to be accurate
Upvotes: 0
Views: 48