How can I perform image clustering effectively?

Question

I have images of graph lines with trends, and I want to cluster similar trends together. However, after trying several clustering algorithms, they are not working as well as I expected. I believe that effective feature extraction is crucial, but since the images have a black background with only the graph lines, it seems to be challenging. In clustering, I feel that while there are lines that trend upwards similarly, they do not cluster well together, and instead, almost identical graphs are clustered together. I can provide an example of the images.

enter image description here

I would like to know techniques that can effectively cluster such images.

This is the code I used for feature extraction and clustering.

I extracted features from the above images using ResNet and clustered them using DBSCAN.

import os
import numpy as np
import torch
from torchvision import models, transforms
from sklearn.cluster import DBSCAN
from PIL import Image

def load_images_from_directory(directory):
    images = []
    for filename in os.listdir(directory):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(directory, filename)
            img = Image.open(img_path).convert('RGB')
            images.append((img, filename))
    return images

def extract_features(images):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = models.resnet18(pretrained=True)
    model = torch.nn.Sequential(*(list(model.children())[:-1]))  
    model = model.to(device)
    model.eval()

    preprocess = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    features = []

    with torch.no_grad():
        for img, filename in images:
            img_tensor = preprocess(img).unsqueeze(0).to(device)  
            feature = model(img_tensor).cpu().numpy()  
            features.append((feature.flatten(), filename)) 장

    return features

def cluster_images(features, eps=0.8, min_samples=3):
    feature_vectors = np.array([f[0] for f in features])  
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    labels = dbscan.fit_predict(feature_vectors)

    return labels

def save_clustered_images(images, labels, output_dir):

    os.makedirs(output_dir, exist_ok=True)
    noise_dir = os.path.join(output_dir, 'noise') 
    os.makedirs(noise_dir, exist_ok=True)

    unique_labels = set(labels)
    for label in unique_labels:
        if label == -1:  
            for i, (feature, filename) in enumerate(images):
                if labels[i] == -1:
                    img_path = os.path.join(noise_dir, filename)
                    images[i][0].save(img_path)  
            continue

        cluster_dir = os.path.join(output_dir, f'cluster_{label}')
        os.makedirs(cluster_dir, exist_ok=True)

        for i, (feature, filename) in enumerate(images):
            if labels[i] == label: 
                img_path = os.path.join(cluster_dir, filename)
                images[i][0].save(img_path) 

input_dir = 'images' 
output_dir = 'clusterd_images'  

images = load_images_from_directory(input_dir)
features = extract_features(images)
labels = cluster_images(features, eps=0.3, min_samples=3) 
save_clustered_images(images, labels, output_dir)

I'm new to image processing, so I would appreciate any advice from my seniors.

Since all the images have a black background with only blue lines, it is essential to extract features effectively, and I hope to cluster similar trends together during the clustering process.

How can I perform image clustering effectively?

Answers (1)

Related Questions