Reputation: 185
I am trying to create a function that takes in a the name of a root file then traverses through the directory and returns a list like this.
[["folder1",[
["subfolder1",[
"file1",
"file2"
]],
["subfolder2",[
"file3",
"file4"
]]
],"file5","file6"]
Below is my attempt at the function:
def traverse(rootdir):
names = []
for cdirname, dirnames, filenames in os.walk(rootdir):
# record path to all subdirectories first.
for subdirname in dirnames:
names.append([subdirname,traverse(os.path.join(cdirname, subdirname))])
# record path to all filenames.
for filename in filenames:
names.append(os.path.join(cdirname, filename))
return names
My problem is that I always end up getting duplications of the same files/folders being recorded with the function and that I the paths are always shown relative to the "rootdir" instead of just the names of the respective file/folder. How do I weed out the duplicates? Additionally how could I make it so that it's not the full path that gets recorded.
Upvotes: 0
Views: 602
Reputation: 369274
sorted
is used to make directory come first. If you don't mind that order, just return names
.
def traverse(rootdir):
names = []
dirs, files = [], []
for filename in os.listdir(rootdir):
filepath = os.path.join(rootdir, filename)
if os.path.isdir(filepath):
names.append([filename, traverse(filepath)])
else:
names.append(filename)
return sorted(names, key=lambda x: (0, x[0]) if isinstance(x, list) else (1, x))
Another version that use os.walk
:
def traverse(rootdir):
names = []
dir_to_names = {rootdir: names}
for cdirname, dirnames, filenames in os.walk(rootdir):
subnames = dir_to_names[cdirname]
for subdirname in sorted(dirnames):
subnames2 = dir_to_names[os.path.join(cdirname, subdirname)] = []
subnames.append([subdirname, subnames2])
for filename in sorted(filenames):
subnames.append(filename)
return names
Upvotes: 1
Reputation: 113
You could use os.walk()
to get all subdirs and subfiles. It returns a list containing a "triple" with ('current path', [subdirs], [subfiles]). But that didn't work for my needs so I coded the following script. Hope this helps.
What it does is, that it creates an object for each folder containg the files and dirs and sorts them alphabetically. I looked at os.walk and how it works and this is a similar approach (with isdir()). The tab variable is just for a better look of the output.
import os
class Folder():
""" Generate a tree list from a given directory """
# List of prohibited_dirs folders on any levels
prohibited_dirs = set([])
prohibited_files = set([])
tab = 0
def __init__(self, path, folder_name):
""" path should be /home/example, folder_name: example """
self.path = path
self.folder_name = folder_name
self.sub_dirs = []
self.sub_files = []
self.__class__.tab += 1
# print self.tab
def sorter(self):
""" sorts listdir output for folders and files"""
# Sort Folders and Files
names = os.listdir(self.path)
for name in names:
if os.path.isdir(os.path.join(self.path, name)):
self.sub_dirs.append(name)
else:
self.sub_files.append(name)
def list_stuff(self):
""" sort lists, and iterate overall subfolders/files."""
# Sort alphabetically
self.sub_dirs.sort(key=str.lower)
self.sub_files.sort(key=str.lower)
# all subfolders, if is also break condition
if self.sub_dirs:
# Filter prohibited_dirs Folders
for sub_dir in self.sub_dirs:
if sub_dir in self.__class__.prohibited_dirs:
continue
print "\t" * self.tab + sub_dir
# Go deeper
deeper = Folder(os.path.join(self.path, sub_dir), sub_dir)
deeper.sorter()
deeper.list_stuff()
# Free object
del deeper
self.__class__.tab -= 1
# list all Files, if is also break condition
if self.sub_files:
for sub_file in self.sub_files:
if sub_file in self.__class__.prohibited_files:
continue
print "\t" * self.tab + sub_file
STARTDIRECTORY = "."
STARTFOLDER = "."
runner = Folder(STARTDIRECTORY, STARTFOLDER)
runner.sorter()
runner.list_stuff()
Upvotes: 0