Reputation: 514
I want to list a google drive folders and files in nested json tree using python class.
i want each data point of structure should be object, i want to wrap whole script in class
Tree should be like this
[
{
"name":<folder_name>,
"id":<folder_id>,
"type":'folder',
"children":[
{
"name":<folder_name>,
"id":<folder_id>,
"type":'folder',
"children":[
{
"name":<folder_name>,
"id":<folder_id>,
"type":'folder',
"children":[..........]
},
{
"name":<file_name>,
"id":<file_id>,
"type":'file',
}
]
},
{
"name":<file_name>,
"id":<file_id>,
"type":'file',
}
]
},
{
"name":<file_name>,
"id":<file_id>,
"type":'file',
},
......................
]
Thanks
Upvotes: 6
Views: 3415
Reputation: 22058
In general, Google drive is not a typical tree structure (folders are labels and a file can have multiple parents).
BUT, I would consider using a tree visualization library like treelib.
Below is a full solution for printing your Google drive file system recursively.
from treelib import Node, Tree
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)
def get_children(root_folder_id):
str = "\'" + root_folder_id + "\'" + " in parents and trashed=false"
file_list = drive.ListFile({'q': str}).GetList()
return file_list
def get_folder_id(root_folder_id, root_folder_title):
file_list = get_children(root_folder_id)
for file in file_list:
if(file['title'] == root_folder_title):
return file['id']
def add_children_to_tree(tree, file_list, parent_id):
for file in file_list:
tree.create_node(file['title'], file['id'], parent=parent_id)
print('parent: %s, title: %s, id: %s' % (parent_id, file['title'], file['id']))
def populate_tree_recursively(tree,parent_id):
children = get_children(parent_id)
add_children_to_tree(tree, children, parent_id)
if(len(children) > 0):
for child in children:
populate_tree_recursively(tree, child['id'])
def main():
root_folder_title = "your-root-folder"
root_folder_id = get_folder_id("root", root_folder_title)
tree = Tree()
tree.create_node(root_folder_title, root_folder_id)
populate_tree_recursively(tree, root_folder_id)
tree.show()
if __name__ == "__main__":
main()
Upvotes: 0
Reputation: 71
scope = ['https://www.googleapis.com/auth/drive']
credentials = ServiceAccountCredentials.from_json_keyfile_name(your creds, scope)
service = build('drive', 'v3', credentials=credentials)
def check_for_subfolders(folder_id):
new_sub_patterns = {}
folders = service.files().list(q="mimeType='application/vnd.google-apps.folder' and parents in '"+folder_id+"' and trashed = false",fields="nextPageToken, files(id, name)",pageSize=400).execute()
all_folders = folders.get('files', [])
all_files = check_for_files(folder_id)
n_files = len(all_files)
n_folders = len(all_folders)
old_folder_tree = folder_tree
if n_folders != 0:
for i,folder in enumerate(all_folders):
folder_name = folder['name']
subfolder_pattern = old_folder_tree + '/'+ folder_name
new_pattern = subfolder_pattern
new_sub_patterns[subfolder_pattern] = folder['id']
print('New Pattern:', new_pattern)
all_files = check_for_files(folder['id'])
n_files =len(all_files)
new_folder_tree = new_pattern
if n_files != 0:
for file in all_files:
file_name = file['name']
new_file_tree_pattern = subfolder_pattern + "/" + file_name
new_sub_patterns[new_file_tree_pattern] = file['id']
print("Files added :", file_name)
else:
print('No Files Found')
else:
all_files = check_for_files(folder['id'])
n_files = len(all_files)
if n_files != 0:
for file in all_files:
file_name = file['name']
subfolders[folder_tree + '/'+file_name] = file['id']
new_file_tree_pattern = subfolder_pattern + "/" + file_name
new_sub_patterns[new_file_tree_pattern] = file['id']
print("Files added :", file_name)
return new_sub_patterns
def check_for_files(folder_id):
other_files = service.files().list(q="mimeType!='application/vnd.google-apps.folder' and parents in '"+folder_id+"' and trashed = false",fields="nextPageToken, files(id, name)",pageSize=400).execute()
all_other_files = other_files.get('files', [])
return all_other_files
def get_folder_tree(folder_id):
global folder_tree
sub_folders = check_for_subfolders(folder_id)
for i,sub_folder_id in enumerate(sub_folders.values()):
folder_tree = list(sub_folders.keys() )[i]
print('Current Folder Tree : ', folder_tree)
folder_ids.update(sub_folders)
print('****************************************Recursive Search Begins**********************************************')
try:
get_folder_tree(sub_folder_id)
except:
print('---------------------------------No furtherance----------------------------------------------')
return folder_ids
Upvotes: 0
Reputation: 1375
Need some refactoring, but works. Just add values to dictionary
, for example x.name: x.make_dict()
for every element.
import logging
import copy
import connect
logger = logging.getLogger('importer')
class DriveObj():
def __init__(self, drive_obj):
self.id = drive_obj.get('id')
self.name = drive_obj.get('name')
def make_dict(self):
props = copy.deepcopy(vars(self))
del props['name']
return props
@property
def parents(self):
try:
return service.files().get(fileId=self.id,
fields='parents').execute()['parents']
except KeyError:
return []
class Directory(DriveObj):
def __init__(self, drive_obj):
super().__init__(drive_obj)
self.mimeType = 'application/vnd.google-apps.folder'
assert drive_obj.get('mimeType') == self.mimeType, 'Not dir :('
self.children = []
def add_content(self, drive_obj):
self.children.append(drive_obj)
class Document(DriveObj):
def __init__(self, drive_obj):
super().__init__(drive_obj)
assert 'vnd.google-apps.folder' not in drive_obj.get('mimeType')
self.mimeType = drive_obj.get('mimeType')
def get_filelist(service):
return service.files().list().execute()
def get_content(service):
content = list()
for drive_obj in get_filelist(service)['files']:
try:
content.append(Directory(drive_obj))
except AssertionError:
content.append(Document(drive_obj))
return content
def find_id(content, id):
for drive_obj in content:
logger.debug('Got {}'.format(drive_obj.id))
if drive_obj.id == id:
logger.debug('Find id in {}'.format(drive_obj.id))
return drive_obj
elif type(drive_obj) == Directory and drive_obj.children:
logger.debug('{} has children'.format(drive_obj.id))
result = find_id(drive_obj.children, id)
if result:
return result
def create_corr_structure(content):
for obj in content:
if obj.parents:
for parent in obj.parents:
parent_obj = find_id(content, parent)
if parent_obj:
parent_obj.add_content(obj)
else:
logger.debug(
'There is no parent directory for {}'.format(obj.name))
content[:] = [value for value in content if not value.parents]
if __name__ == "__main__":
structure = dict()
service = connect.connect_drive()
content = get_content(service)
create_corr_structure(content)
Upvotes: 1
Reputation: 6791
You can do that but you have to implement it yourself.
After getting the files, check if it is a folder. If yes, check if it has files under.
2.1 Insert data to children
property
Repeat process until you've successfully listed all files.
References:
Hope this helps.
Upvotes: 2