Stephanie Omwanda
Stephanie Omwanda

Reputation: 25

Custom dataset with YOLOv5

I'm new to YOLOv5 and V8, I'm already having issues with Yolov5 and I wanted to train with v8 just to compare. I had different sets of toilets (4 classes, already annotated via Roboflow) that I merged and was training on, Initially , the model was training on the merged dataset with with 1331 images, but at the end it only trained on 1 class.

I followed a tutorial on YouTube, been racking my brain and yet I'm still stuck. I don't want to proceed with yolov8 if I haven't figured out the root cause.

# Import necessary libraries
from roboflow import Roboflow
import os
import shutil
# Initialize Roboflow with the API key
rf = Roboflow(api_key="*****") #redacted

# Define the list of projects with their respective workspaces and names
projects = [
    {"workspace": "coronaimageclassification-xfuzr", "project_name": "smart-fczkj"},
    {"workspace": "coronaimageclassification-xfuzr", "project_name": "ecoclean-hglrk"},
    {"workspace": "coronaimageclassification-xfuzr", "project_name": "montecarlo"},
    {"workspace": "coronaimageclassification-xfuzr", "project_name": "montecarloadv"}
]

# Dictionary to hold the paths to the datasets
datasets = {}

# Loop through the projects and download datasets for each
for proj in projects:
    # Access the project and version
    project = rf.workspace(proj["workspace"]).project(proj["project_name"])
    version = project.version(1)

    # Download the dataset for YOLOv5 format and capture the location
    dataset = version.download("yolov5")

    # Store the path to the dataset in the dictionary
    datasets[proj["project_name"]] = dataset.location

    print(f"Downloaded dataset for project: {proj['project_name']} at {dataset.location}")

#dictionary contains the paths to all downloaded datasets
print(datasets)

import os
import shutil
# Define the paths to the individual datasets
datasets = {
    "ecoclean": '/content/yolov5/ecoclean-1',
    "smart": '/content/yolov5/smart-1',
    "montecarlo": '/content/yolov5/montecarlo-1',
    "montecarloadv": '/content/yolov5/montecarloadv-1'
}

# Path where the merged dataset will be stored in the yolov5 directory
merged_dataset_path = "/content/yolov5/yolov5merged"

# Define the folders to merge: train, val, and test
folders = ["train", "valid", "test"]

# Define the subfolders within each folder: images and labels
subfolders = ["images", "labels"]

# Create folders for the merged dataset if they don't exist
for folder in folders:
    for subfolder in subfolders:
        os.makedirs(os.path.join(merged_dataset_path, folder, subfolder), exist_ok=True)

# Function to copy files from source to destination
def copy_files(src, dst):
    if os.path.exists(src):  # Ensure source folder exists before copying
        for filename in os.listdir(src):
            file_path = os.path.join(src, filename)
            if os.path.isfile(file_path):
              print(f"Copy the following file {file_path} at {dst}")
              shutil.copy(file_path, dst)

# Loop through each dataset and copy files to the merged dataset
for dataset_name, dataset_path in datasets.items():
    for folder in folders:  # loop through train, val, and test
        for subfolder in subfolders:  # loop through images and labels
            src_folder = os.path.join(dataset_path, folder, subfolder)  # source path
            dst_folder = os.path.join(merged_dataset_path, folder, subfolder)  # destination path
            copy_files(src_folder, dst_folder)

print(f"Datasets merged successfully and stored at {merged_dataset_path}!")
import os

# Define the folders to merge and check: train, val, and test
folders = ["train", "valid", "test"]

# Function to count images in each dataset
def count_images_in_dataset(dataset_path):
    image_count = 0
    for folder in folders:  # train, val, test
        images_folder = os.path.join(dataset_path, folder, 'images')
        if os.path.exists(images_folder):
            image_count += len([f for f in os.listdir(images_folder) if os.path.isfile(os.path.join(images_folder, f))])
    return image_count

# Loop through each dataset and count images
# Dictionary to store image counts for each dataset
image_counts = {dataset_name: count_images_in_dataset(dataset_path) for dataset_name, dataset_path in datasets.items()}

# Print image count for each individual dataset
for dataset_name, count in image_counts.items():
    print(f"Number of images in {dataset_name}: {count}")

# Path to the merged dataset
merged_dataset_path = "/content/yolov5/yolov5merged"

# Count total images in merged dataset
total_images_merged = count_images_in_dataset(merged_dataset_path)
print(f"\nTotal number of images in the merged dataset: {total_images_merged}")
[![IMAGE COUNT IN ALL 4 CLASSES, & MERGED DATASET][1]][1]


  [1]: https://i.sstatic.net/A22qjbw8.png

# train yolov5
#time its performance

%%time
%cd /content/yolov5
!python train.py --img 640 --batch 16 --epochs 100 --data /content/yolov5/yolov5merged/combined_dataset.yaml --cfg /content/yolov5/models/custom_yolov5s.yaml --weights 'yolov5s.pt' --name yolov5s_results --cache # Redirect output to a text file named training_log.txt.

Upvotes: 1

Views: 40

Answers (0)

Related Questions