what is the best way to georeference image data to google staelite map?

Question

I am trying to mark field boundaries by meta's segment anything pretrained model from a given lat-lon input.

My approach is capturing a screenshot from the google map using pyppeteer or google API, then feeding the image to SAM vit h, then find contours from the output mask and finally converting the pixel coordinates of the contours to geocoordinates using some formula provided by chatgpt.

For checking the accuracy I create a kml file with the geocoordinates generated and import to my maps google.

The results are inaccurate. Though the image output of SAM is correct. But there is some problem in pixel to lat-lon conversion or my approach. I may need to use georeferencing. I don't know how to do that accurately.

Any help or suggestions? I am also providing the code with one of my approaches

import pandas as pd
from pyproj import CRS, Transformer
from PIL import Image, ImageDraw
import asyncio
from pyppeteer import launch
import torch
import cv2
import numpy as np
import segment_anything
from segment_anything import sam_model_registry, SamPredictor
from google.colab.patches import cv2_imshow
import nest_asyncio
from shapely.geometry import Polygon, MultiPolygon
from shapely.wkt import loads as load_wkt

# Apply nest_asyncio to allow nested event loops
nest_asyncio.apply()

# Initialize projections
wgs84 = CRS.from_epsg(4326)
web_mercator = CRS.from_epsg(3857)
transformer_to_mercator = Transformer.from_crs(wgs84, web_mercator, always_xy=True)
transformer_to_wgs84 = Transformer.from_crs(web_mercator, wgs84, always_xy=True)

def latlon_to_meters(lat, lon):
    mx, my = transformer_to_mercator.transform(lon, lat)
    return mx, my

def meters_to_latlon(mx, my):
    lon, lat = transformer_to_wgs84.transform(mx, my)
    return lat, lon

def latlon_to_pixels(lat, lon, zoom, center_lat, center_lon, orig_width, orig_height):
    """
    Convert latitude and longitude to pixel coordinates relative to the center of the image.
    """
    mx_center, my_center = latlon_to_meters(center_lat, center_lon)
    mx, my = latlon_to_meters(lat, lon)

    # Calculate resolution at the given zoom level
    res = 256 * (2 ** zoom) / (2 * 20037508.34)

    # Calculate pixel coordinates
    px = (mx - mx_center) * res + orig_width / 2
    py = orig_height / 2 - (my - my_center) * res

    return px, py

def pixels_to_latlon(px, py, zoom, center_lat, center_lon, orig_width, orig_height):
    """
    Convert pixel coordinates to latitude and longitude relative to the center of the image.
    """
    mx_center, my_center = latlon_to_meters(center_lat, center_lon)

    # Calculate resolution at the given zoom level
    res = 256 * (2 ** zoom) / (2 * 20037508.34)

    # Convert pixel coordinates to meters
    mx = (px - orig_width / 2) / res + mx_center
    my = my_center - (py - orig_height / 2) / res

    # Convert back to lat/lon
    lat, lon = meters_to_latlon(mx, my)
    return lat, lon

async def screenshot_google_map(center_lat, center_lon, zoom=19, file_path="map_screenshot.jpg"):
    """
    Take a screenshot of Google Maps centered at (center_lat, center_lon) with a specific zoom level.
    """
    browser = await launch(headless=True, args=['--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu', '--disable-dev-shm-usage'])
    page = await browser.newPage()
    await page.setViewport({'width': 1024, 'height': 768})

    url = f"https://www.google.com/maps/@{center_lat},{center_lon},{zoom}z/data=!3m1!1e3"
    await page.goto(url)
    await asyncio.sleep(5)  # Adjust the delay as necessary
    await page.screenshot({'path': file_path, 'fullPage': True})
    await browser.close()

    return file_path

def run_sam_model(image_path, sam_checkpoint, model_type="vit_h", simplification_tolerance=1.0):
    # Load SAM Model
    device = "cuda" if torch.cuda.is_available() else "cpu"
    sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
    sam.to(device)
    predictor = SamPredictor(sam)

    # Load Image
    image = cv2.imread(image_path)
    height, width, _ = image.shape

    # Calculate Multiple Click Points
    base_x = width // 2
    base_y = height // 2
    input_points = np.array([
        [base_x, base_y],
        # Add additional points if needed
    ])

    # Predict Mask with SAM (using multiple points)
    predictor.set_image(image)
    input_labels = np.ones(input_points.shape[0])  # All points are foreground
    masks, _, _ = predictor.predict(
        point_coords=input_points,
        point_labels=input_labels,
        multimask_output=True,
    )

    # Extract Boundary Contour
    mask = masks[0].astype(np.uint8)
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Represent Contour as Polygon
    if contours:
        polygon = contours[0].squeeze().tolist()
        polygon_shapely = Polygon(polygon)

        # Simplify the polygon
        simplified_polygon = polygon_shapely.simplify(simplification_tolerance, preserve_topology=True)
        simplified_polygon_coords = list(simplified_polygon.exterior.coords)
    else:
        simplified_polygon_coords = []

    # (Optional) Visualize Result
    if simplified_polygon_coords:
        boundary_image = image.copy()
        cv2.drawContours(boundary_image, [np.array(simplified_polygon_coords, dtype=np.int32)], -1, (0, 0, 255), 2)
        cv2_imshow(boundary_image)
    else:
        print("Warning: No contours found in the mask.")

    return simplified_polygon_coords

def calculate_iou(polygon1, multipolygon_wkt):
    # Convert predicted polygon to a Shapely Polygon
    poly1 = Polygon(polygon1)

    # Convert WKT multipolygon to a Shapely MultiPolygon
    multipolygon = load_wkt(multipolygon_wkt)

    # Calculate the intersection and union
    intersection_area = poly1.intersection(multipolygon).area
    union_area = poly1.union(multipolygon).area

    # Calculate IoU
    iou = intersection_area / union_area if union_area != 0 else 0
    return iou

def draw_polygon(image, polygon, color):
    draw = ImageDraw.Draw(image)
    if polygon:
        for i in range(len(polygon) - 1):
            draw.line([polygon[i], polygon[i + 1]], fill=color, width=3)
        draw.line([polygon[-1], polygon[0]], fill=color, width=3)

async def process_coordinates(xlsx_path, sam_checkpoint):
    df = pd.read_excel(xlsx_path)

    for index, row in df.iterrows():
        center_lat = row['Lat']
        center_lon = row['Long']
        multipolygon_wkt = row['wkt_geom']
        print(f"Processing coordinates: {center_lat}, {center_lon}")

        try:
            # Scrape image
            zoom_level = 18.4  # Ensure this is the same as used in conversion
            image_path = await screenshot_google_map(center_lat, center_lon, zoom=zoom_level)

            # Run SAM model with simplification
            simplification_tolerance = 1.0  # Adjust this value as needed
            polygon = run_sam_model(image_path, sam_checkpoint, simplification_tolerance=simplification_tolerance)

            if polygon:
                # Convert polygon pixel coordinates to geo-coordinates
                geo_polygon = [pixels_to_latlon(px, py, zoom_level, center_lat, center_lon, 1024, 768) for px, py in polygon]

                # Debug: Print the geo-coordinates of the polygon
                print(f"Geo Polygon Coordinates: {geo_polygon}")

                # Accuracy Check: Validate known coordinates
                known_coords = [(22.804861, 81.955284)]  # Example known coordinate
                for lat, lon in known_coords:
                    px, py = latlon_to_pixels(lat, lon, zoom_level, center_lat, center_lon, 1024, 768)
                    print(f"Pixel coordinates for known lat/lon ({lat}, {lon}): ({px}, {py})")

                iou = calculate_iou(geo_polygon, multipolygon_wkt)
                print(f"IoU for coordinates {center_lat}, {center_lon}: {iou}")

                # Load the original image again to draw polygons
                image = Image.open(image_path).convert("RGB")

                # Convert geo_polygon back to pixel coordinates for drawing
                geo_polygon_px = [latlon_to_pixels(lat, lon, zoom_level, center_lat, center_lon, 1024, 768) for lat, lon in geo_polygon]
                draw_polygon(image, geo_polygon_px, 'blue')

                # Draw the ground truth multipolygon
                multipolygon = load_wkt(multipolygon_wkt)
                for poly in multipolygon.geoms:
                    multipolygon_px = [latlon_to_pixels(lat, lon, zoom_level, center_lat, center_lon, 1024, 768) for lon, lat in poly.exterior.coords]
                    draw_polygon(image, multipolygon_px, 'red')

                # Convert the PIL image to an OpenCV image
                open_cv_image = np.array(image)
                open_cv_image = open_cv_image[:, :, ::-1].copy()  # Convert RGB to BGR

                # Display the image with polygons
                cv2_imshow(open_cv_image)

            else:
                print(f"No valid polygon found for the coordinates: {center_lat}, {center_lon}")

        except Exception as e:
            print(f"Error processing coordinates {center_lat}, {center_lon}: {e}")

# Example usage
xlsx_path = "/content/Single_test.xlsx"  # Replace with your xlsx file path
sam_checkpoint = "/content/drive/MyDrive/Colab Notebooks/sam_vit_h_4b8939.pth"  # Replace with your SAM checkpoint path

# Run the async function within an existing event loop
await process_coordinates(xlsx_path, sam_checkpoint)


The shape is not exactly overlapping on the map.
The georeference and lat-lon conversion needs to be accurate

what is the best way to georeference image data to google staelite map?

Answers (0)

Related Questions