delViento
delViento

Reputation: 167

match subfolder name with file name and output folder python

I have the following folder-file structure:

I want to be able to load all the files from the same folder (segment_polygon), apply a function to them, and export to another set of folders (segment_multipoly) with the same structure.

and so on...

The names "mainfolder", "folder", "subfolder", "file" are there just to indicate to which level the names belong, but they are not part of the folder's labels.

input_path = os.path.join(src, "segment_polygon\\")
output_path = os.path.join(src, "segment_multipoly\\")

root = Path(input_path)

for maindir, subdirs, shpfiles in os.walk(input_path):
    for shp in shpfiles:
        aoi_root, shp_ext = shp.split("_")
        for file in root.glob("*/*/*/*.shp"):
            part_path = Path(file).parts
            folder_numSeg_name = part_path[9] #here I get the subfolder "poly5numSeg", "poly6numSeg", etc
            folder_aoi_name = part_path[11] #here I get the subfolder "aoi1", "aoi2", etc...
            aoiprep_seg = part_path[12] # here I get the name of the file "aoi1_seg0.shp", aoi1_seg1.shp", etc
            if aoi_root == folder_aoi_name:
                '''apply a function to shp'''
                shp.to_file(os.path.join(output_path, folder_numSeg_name, "compactness40\\", folder_aoi_name, shp)

I am a bit at lost. Working in Windows 10, Python 3. Thank you for all help.

UPDATE OF THE SCRIPT

segment_polygon = os.path.join(output, "segment_polygon\\") # input path
segment_multipoly = os.path.join(output, "segment_multipoly\\") # output path

# 1. get aoi directories
aoi_dir = [path for path in glob.glob(os.path.join(segment_polygon, "*/*/*"))
           if os.path.isdir(path)]

# list to store the shapefiles to be intersected
input_list = []

for path in aoi_dir:
    # 2. get the files
    shp_paths = glob.glob(path + os.sep + '*.shp')
    for shp_path in shp_paths:
        # 3. do things with shp_path
        full_path, seg_shp = os.path.split(shp_path)
        aoi_folder = full_path[-5:] # aoi01, aoi02, aoi03....aoi25
        if seg_shp.startswith(aoi_folder):
            input_list.append(shp_path) # creates the new list with shapefiles that start with the same aoiX value
        auto_inter = gpd.GeoDataFrame.from_file(input_list[0]) #process shp
        for i in range(len(input_list)-1):
            mp = gpd.GeoDataFrame.from_file(input_list[i+1]) # process shp
            auto_inter = gpd.overlay(auto_inter, mp, how='intersection') #process shp
        print(f"shp included in the list:\n {input_list}")
            # 4. create your output file path
        print(full_path)
        output_path = full_path.replace("poly", "multi")
        N_output_path = output_path.replace("gon", "polygon")
        print(f"output_path:\n {N_output_path}")
        # make sure the directories exist
        if not os.path.exists(os.path.dirname(N_output_path)):
            os.makedirs(os.path.dirname(N_output_path), exist_ok=True)
            # create output file name
            multipoly_name = aoi_folder + ".shp"
            # export
            auto_inter.to_file(os.path.join(N_output_path, multipoly_name)) #export shp

Incorporated changes from ygorg. However, it takes ALL the shapefiles for intersection. I want only aoi1 files for intersection and save on aoi1 folder. Then, aoi2 shapefiles and save on aoi2 folder, and so on. This doesn't work yet.

Upvotes: 1

Views: 566

Answers (2)

delViento
delViento

Reputation: 167

I managed to solve the problem. Thank you ygorg for the input. It led me to the right path.

# Create a list of the subfolders of segment_polygon
poly_dir = [path for path in glob.glob(os.path.join(segment_polygon, "*/*/*"))
       if os.path.isdir(path)]

for aoi_poly in poly_dir:

    # define input folder
    input_subfolder = aoi_poly.split("segment_polygon\\")[1] # splits the path at "...\\" and keeps the tail (position:1)
    #print(f"input folder: {input_subfolder}")

    #define export folder
    export_subfolder = input_subfolder.replace("poly", "multi")
    export_folder = os.path.join(segment_multipoly, export_subfolder)
    #print(f"output folder: {export_folder}")

    # define name output shapefile
    numseg, compactness, aoi = [int(s) for s in re.findall(r'\d+', aoi_poly)] #extract only the integers from the "poly" path
    name_output = "aoi" + str(aoi)+ "_" + "numSeg"+ str(numseg) + "_c" + str(compactness) + ".shp" # str() is used to concatenate integers as part of the string
    #print(f"shapefile label: {name_output}")

    full_outputpath = os.path.join(export_folder, name_output)
    #print(f"full output path: {full_outputpath}")

    # intersect and merge all single polygons
    input_list = list(filter(lambda mpoly: mpoly.endswith('.shp'), os.listdir(aoi_poly)))

     ###### apply my function here ######

    # export
    filetoexport.to_file(full_outputpath)

Upvotes: 0

ygorg
ygorg

Reputation: 770

Mixing os.walk and glob seem to be quite confusing. If you want to process each aoiX folder. Try to first list all those directories, then list the .shp files in each directory, then apply the function and finally create your output_path and write to it.

When working with files it's always good to decompose what you need to not get overwhelmed.

# 1. get aoi directories
aoi_dir = [path for path in glob.glob('segment_polygon/*/*/*')
           if os.path.isdir(path)]
for path in aoi_dir:
    # 2. get the files
    shp_paths = glob.glob(path + os.sep + '*.shp')
    for shp_path in shp_paths:
        # 3. do things with shp_path
        # 4. create your output file path
        output_path = shp_path.replace('segment_polygon', 'segment_multipoly')
        # make sure the directories exist
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        # write in output file

And always do a dry run without processing or writing anything, and printing the paths so you are sure of what goes where !

Upvotes: 1

Related Questions