Reputation: 167
I have the following folder-file structure:
I want to be able to load all the files from the same folder (segment_polygon), apply a function to them, and export to another set of folders (segment_multipoly) with the same structure.
The files from r".\segmentation_aoi\segment_polygon\poly5numSeg\compactness40\aoi1"
should be processed together and be exported to r".\segmentation_aoi\segment_multipoly\multi5numSeg\compactness40\aoi1"
The files from r".\segmentation_aoi\segment_polygon\poly6numSeg\compactness40\aoi2"
should be processed together and be exported to r".\segmentation_aoi\segment_multipoly\multi6numSeg\compactness40\aoi2"
and so on...
The names "mainfolder", "folder", "subfolder", "file" are there just to indicate to which level the names belong, but they are not part of the folder's labels.
input_path = os.path.join(src, "segment_polygon\\")
output_path = os.path.join(src, "segment_multipoly\\")
root = Path(input_path)
for maindir, subdirs, shpfiles in os.walk(input_path):
for shp in shpfiles:
aoi_root, shp_ext = shp.split("_")
for file in root.glob("*/*/*/*.shp"):
part_path = Path(file).parts
folder_numSeg_name = part_path[9] #here I get the subfolder "poly5numSeg", "poly6numSeg", etc
folder_aoi_name = part_path[11] #here I get the subfolder "aoi1", "aoi2", etc...
aoiprep_seg = part_path[12] # here I get the name of the file "aoi1_seg0.shp", aoi1_seg1.shp", etc
if aoi_root == folder_aoi_name:
'''apply a function to shp'''
shp.to_file(os.path.join(output_path, folder_numSeg_name, "compactness40\\", folder_aoi_name, shp)
I am a bit at lost. Working in Windows 10, Python 3. Thank you for all help.
UPDATE OF THE SCRIPT
segment_polygon = os.path.join(output, "segment_polygon\\") # input path
segment_multipoly = os.path.join(output, "segment_multipoly\\") # output path
# 1. get aoi directories
aoi_dir = [path for path in glob.glob(os.path.join(segment_polygon, "*/*/*"))
if os.path.isdir(path)]
# list to store the shapefiles to be intersected
input_list = []
for path in aoi_dir:
# 2. get the files
shp_paths = glob.glob(path + os.sep + '*.shp')
for shp_path in shp_paths:
# 3. do things with shp_path
full_path, seg_shp = os.path.split(shp_path)
aoi_folder = full_path[-5:] # aoi01, aoi02, aoi03....aoi25
if seg_shp.startswith(aoi_folder):
input_list.append(shp_path) # creates the new list with shapefiles that start with the same aoiX value
auto_inter = gpd.GeoDataFrame.from_file(input_list[0]) #process shp
for i in range(len(input_list)-1):
mp = gpd.GeoDataFrame.from_file(input_list[i+1]) # process shp
auto_inter = gpd.overlay(auto_inter, mp, how='intersection') #process shp
print(f"shp included in the list:\n {input_list}")
# 4. create your output file path
print(full_path)
output_path = full_path.replace("poly", "multi")
N_output_path = output_path.replace("gon", "polygon")
print(f"output_path:\n {N_output_path}")
# make sure the directories exist
if not os.path.exists(os.path.dirname(N_output_path)):
os.makedirs(os.path.dirname(N_output_path), exist_ok=True)
# create output file name
multipoly_name = aoi_folder + ".shp"
# export
auto_inter.to_file(os.path.join(N_output_path, multipoly_name)) #export shp
Incorporated changes from ygorg. However, it takes ALL the shapefiles for intersection. I want only aoi1 files for intersection and save on aoi1 folder. Then, aoi2 shapefiles and save on aoi2 folder, and so on. This doesn't work yet.
Upvotes: 1
Views: 566
Reputation: 167
I managed to solve the problem. Thank you ygorg for the input. It led me to the right path.
# Create a list of the subfolders of segment_polygon
poly_dir = [path for path in glob.glob(os.path.join(segment_polygon, "*/*/*"))
if os.path.isdir(path)]
for aoi_poly in poly_dir:
# define input folder
input_subfolder = aoi_poly.split("segment_polygon\\")[1] # splits the path at "...\\" and keeps the tail (position:1)
#print(f"input folder: {input_subfolder}")
#define export folder
export_subfolder = input_subfolder.replace("poly", "multi")
export_folder = os.path.join(segment_multipoly, export_subfolder)
#print(f"output folder: {export_folder}")
# define name output shapefile
numseg, compactness, aoi = [int(s) for s in re.findall(r'\d+', aoi_poly)] #extract only the integers from the "poly" path
name_output = "aoi" + str(aoi)+ "_" + "numSeg"+ str(numseg) + "_c" + str(compactness) + ".shp" # str() is used to concatenate integers as part of the string
#print(f"shapefile label: {name_output}")
full_outputpath = os.path.join(export_folder, name_output)
#print(f"full output path: {full_outputpath}")
# intersect and merge all single polygons
input_list = list(filter(lambda mpoly: mpoly.endswith('.shp'), os.listdir(aoi_poly)))
###### apply my function here ######
# export
filetoexport.to_file(full_outputpath)
Upvotes: 0
Reputation: 770
Mixing os.walk
and glob
seem to be quite confusing. If you want to process each aoiX
folder. Try to first list all those directories, then list the .shp
files in each directory, then apply the function and finally create your output_path and write to it.
When working with files it's always good to decompose what you need to not get overwhelmed.
# 1. get aoi directories
aoi_dir = [path for path in glob.glob('segment_polygon/*/*/*')
if os.path.isdir(path)]
for path in aoi_dir:
# 2. get the files
shp_paths = glob.glob(path + os.sep + '*.shp')
for shp_path in shp_paths:
# 3. do things with shp_path
# 4. create your output file path
output_path = shp_path.replace('segment_polygon', 'segment_multipoly')
# make sure the directories exist
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# write in output file
And always do a dry run without processing or writing anything, and printing the paths so you are sure of what goes where !
Upvotes: 1