Reputation: 11
I'm working on creating a dataset for Yolo model training. Currently the frames and annotation files are present for multiple videos in their separate folders.
Here's the code I wrote to split the images and annotation file from the different folders.
The images to be added in test/val folders are selected randomly and moved.
I've tried various copy methods (including copy2
, copyfileobj
from shutils
etc) but only a few images are copied properly, the rest are empty files with size 38kb.
#Assume other libraries are imported
for video_folder in video_folders:
os.chdir(os.path.join(video_folder, "obj_train_data"))
print(f"Currently in {os.getcwd()}")
sample_files = os.listdir("./")
for image_file in sample_files:
label_file = os.path.join(str(image_file[:-3] + "txt"))
print(f"Image file name = {image_file}")
print(f"label file name = {label_file}")
#Generate full path.
image_file = os.path.join(os.getcwd(), image_file)
label_file = os.path.join(os.getcwd(), label_file)
assert(os.path.exists(image_file))
assert(os.path.exists(label_file))
#rename the iamge and label file to avoid overwriting
new_image_file_name = f"frame_{sample_moved}.PNG"
new_label_file_name = f"frame_{sample_moved}.txt"
# Copy the background file
if not (os.path.exists(label_file)):
os.system(f"cp {image_file} {os.path.join(__IMAGE_TRAIN_FOLDER__,new_image_file_name)}")
continue
if(sample_moved == random_indices[0]):
try:
random_indices.pop(0)
except IndexError as e:
print("array is empty!!")
#Move file to test folder
if(test_counter < max_test_samples_count):
print("moving to test folder")
os.system(f"cp {image_file} {os.path.join(__IMAGE_TEST_FOLDER__,new_image_file_name)}")
os.system(f"cp {label_file} {os.path.join(__LABEL_TEST_FOLDER__,new_label_file_name)}")
sleep(0.1)
test_counter+=1
#Move file to validation counter
elif(val_counter < max_val_samples_count):
print("moving to validation folder")
os.system(f"cp {image_file} {os.path.join(__IMAGE_VAL_FOLDER__,new_image_file_name)}")
os.system(f"cp {label_file} {os.path.join(__LABEL_VAL_FOLDER__,new_label_file_name)}")
sleep(0.1)
val_counter+=1
else:
print("Moving to training folder")
os.system(f"cp {image_file} {os.path.join(__IMAGE_TRAIN_FOLDER__,new_image_file_name)}")
os.system(f"cp {label_file} {os.path.join(__LABEL_TRAIN_FOLDER__,new_label_file_name)}")
sleep(0.1)
sample_moved+=1
# Move to base directory.
os.chdir("../../")
print(f"Total samples moved = {sample_moved}")
Following is the dir strucure:
-Images
-Labels
-video_1
-obj_train_data
-video_2
--obj_train_data
-video_3
--obj_train_data
I utilized the following methods:
os.system("cp src dest")
shutil.copy(" src dest")
What I expect:
What is happening:
Upvotes: -1
Views: 53
Reputation: 11
print(f"Currently in {os.getcwd()}")
sample_files = os.listdir("./")
This generates a list of "all" the files in the directory, which also includes .txt
files. These files are then copied as image files later on in code.
create a list of images only using:
print(f"Currently in {os.getcwd()}")
#create a list of all the images in the folder.
sample_files = natsort.natsorted(img for img in os.listdir("./") if img.endswith(".PNG"))
This will ensure that loop only runs on images and not on .txt
files.
Upvotes: 0