Soni Pandey
Soni Pandey

Reputation: 514

representation of files and folder-tree of Google Drive folder using Python API

I want to list a google drive folders and files in nested json tree using python class.

i want each data point of structure should be object, i want to wrap whole script in class

Tree should be like this

[
{
"name":<folder_name>,
"id":<folder_id>,
"type":'folder',
 "children":[
        {
        "name":<folder_name>,
        "id":<folder_id>,
        "type":'folder',
         "children":[
         {
            "name":<folder_name>,
            "id":<folder_id>,
            "type":'folder',
             "children":[..........]
        },
        {
            "name":<file_name>,
            "id":<file_id>,
            "type":'file',
        }
        ]
        },
        {
        "name":<file_name>,
        "id":<file_id>,
        "type":'file',
        }
]
},
{
"name":<file_name>,
"id":<file_id>,
"type":'file',
},
......................
]

Thanks

Upvotes: 6

Views: 3415

Answers (4)

Rotem jackoby
Rotem jackoby

Reputation: 22058

In general, Google drive is not a typical tree structure (folders are labels and a file can have multiple parents).

BUT, I would consider using a tree visualization library like treelib.

Below is a full solution for printing your Google drive file system recursively.

from treelib import Node, Tree

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive

gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)


def get_children(root_folder_id):
    str = "\'" + root_folder_id + "\'" + " in parents and trashed=false"
    file_list = drive.ListFile({'q': str}).GetList()
    return file_list

def get_folder_id(root_folder_id, root_folder_title):
    file_list = get_children(root_folder_id)
    for file in file_list:
        if(file['title'] == root_folder_title):
            return file['id']

def add_children_to_tree(tree, file_list, parent_id):
    for file in file_list:
        tree.create_node(file['title'], file['id'], parent=parent_id)
        print('parent: %s, title: %s, id: %s' % (parent_id, file['title'], file['id']))


def populate_tree_recursively(tree,parent_id):
    children = get_children(parent_id)
    add_children_to_tree(tree, children, parent_id)
    if(len(children) > 0):
        for child in children:
            populate_tree_recursively(tree, child['id'])



def main():
    root_folder_title = "your-root-folder"
    root_folder_id = get_folder_id("root", root_folder_title)

    tree = Tree()
    tree.create_node(root_folder_title, root_folder_id)
    populate_tree_recursively(tree, root_folder_id)
    tree.show()

if __name__ == "__main__":
    main()

Upvotes: 0

tomdxb0004
tomdxb0004

Reputation: 71

scope = ['https://www.googleapis.com/auth/drive']
credentials = ServiceAccountCredentials.from_json_keyfile_name(your creds, scope)
service = build('drive', 'v3', credentials=credentials)
def check_for_subfolders(folder_id):
    new_sub_patterns = {}
    folders = service.files().list(q="mimeType='application/vnd.google-apps.folder' and parents in '"+folder_id+"' and trashed = false",fields="nextPageToken, files(id, name)",pageSize=400).execute()
    all_folders = folders.get('files', [])
    all_files = check_for_files(folder_id)
    n_files = len(all_files)
    n_folders = len(all_folders)
    old_folder_tree = folder_tree
    if n_folders != 0:
        for i,folder in enumerate(all_folders):
            folder_name =  folder['name']
            subfolder_pattern = old_folder_tree + '/'+ folder_name
            new_pattern = subfolder_pattern
            new_sub_patterns[subfolder_pattern] = folder['id']
            print('New Pattern:', new_pattern)
            all_files = check_for_files(folder['id'])
            n_files =len(all_files)
            new_folder_tree = new_pattern 
            if n_files != 0:
                for file in all_files:
                    file_name = file['name']
                    new_file_tree_pattern = subfolder_pattern + "/" + file_name
                    new_sub_patterns[new_file_tree_pattern] = file['id']
                    print("Files added :", file_name)
            else:
                print('No Files Found')
    else:
        all_files = check_for_files(folder['id'])
        n_files = len(all_files)
        if n_files != 0:
            for file in all_files:
                file_name = file['name']
                subfolders[folder_tree + '/'+file_name] = file['id']
                new_file_tree_pattern = subfolder_pattern + "/" + file_name
                new_sub_patterns[new_file_tree_pattern] = file['id']
                print("Files added :", file_name)
    return new_sub_patterns 
def check_for_files(folder_id):
    other_files = service.files().list(q="mimeType!='application/vnd.google-apps.folder' and parents in '"+folder_id+"' and trashed = false",fields="nextPageToken, files(id, name)",pageSize=400).execute()
    all_other_files = other_files.get('files', [])   
    return all_other_files
def get_folder_tree(folder_id):
    global folder_tree
    sub_folders = check_for_subfolders(folder_id)
    for i,sub_folder_id in enumerate(sub_folders.values()):
        folder_tree = list(sub_folders.keys() )[i]
        print('Current Folder Tree : ', folder_tree)
        folder_ids.update(sub_folders)
        print('****************************************Recursive Search Begins**********************************************')
        try:
            get_folder_tree(sub_folder_id)
        except:
            print('---------------------------------No furtherance----------------------------------------------')
    return folder_ids 

Upvotes: 0

Alex
Alex

Reputation: 1375

Need some refactoring, but works. Just add values to dictionary, for example x.name: x.make_dict() for every element.

import logging
import copy

import connect


logger = logging.getLogger('importer')


class DriveObj():

    def __init__(self, drive_obj):
        self.id = drive_obj.get('id')
        self.name = drive_obj.get('name')

    def make_dict(self):
        props = copy.deepcopy(vars(self))
        del props['name']
        return props

    @property
    def parents(self):
        try:
            return service.files().get(fileId=self.id,
                                       fields='parents').execute()['parents']
        except KeyError:
            return []


class Directory(DriveObj):

    def __init__(self, drive_obj):
        super().__init__(drive_obj)
        self.mimeType = 'application/vnd.google-apps.folder'
        assert drive_obj.get('mimeType') == self.mimeType, 'Not dir :('
        self.children = []

    def add_content(self, drive_obj):
        self.children.append(drive_obj)


class Document(DriveObj):

    def __init__(self, drive_obj):
        super().__init__(drive_obj)
        assert 'vnd.google-apps.folder' not in drive_obj.get('mimeType')
        self.mimeType = drive_obj.get('mimeType')


def get_filelist(service):
    return service.files().list().execute()


def get_content(service):
    content = list()
    for drive_obj in get_filelist(service)['files']:
        try:
            content.append(Directory(drive_obj))
        except AssertionError:
            content.append(Document(drive_obj))
    return content


def find_id(content, id):
    for drive_obj in content:
        logger.debug('Got {}'.format(drive_obj.id))
        if drive_obj.id == id:
            logger.debug('Find id in {}'.format(drive_obj.id))
            return drive_obj
        elif type(drive_obj) == Directory and drive_obj.children:
            logger.debug('{} has children'.format(drive_obj.id))
            result = find_id(drive_obj.children, id)
            if result:
                return result


def create_corr_structure(content):
    for obj in content:
        if obj.parents:
            for parent in obj.parents:
                parent_obj = find_id(content, parent)
                if parent_obj:
                    parent_obj.add_content(obj)
                else:
                    logger.debug(
                        'There is no parent directory for {}'.format(obj.name))
    content[:] = [value for value in content if not value.parents]


if __name__ == "__main__":
    structure = dict()
    service = connect.connect_drive()
    content = get_content(service)
    create_corr_structure(content)

Upvotes: 1

Mr.Rebot
Mr.Rebot

Reputation: 6791

You can do that but you have to implement it yourself.

  1. First, get files with a parent of the rootFolder.
  2. After getting the files, check if it is a folder. If yes, check if it has files under.

    2.1 Insert data to children property

  3. Repeat process until you've successfully listed all files.

References:

Hope this helps.

Upvotes: 2

Related Questions