WHOATEMYNOODLES
WHOATEMYNOODLES

Reputation: 897

How to get all the files in OneDrive account using the graph SDK?

I'm trying to use a service account to pull all files from a OneDrive business account using the MS Graph Python SDK.

import asyncio
from msgraph import GraphServiceClient
from azure.identity import ClientSecretCredential

microsoft_tenant_id = '123abc'
client_id = '123abc'
client_secret = '123abc'

SCOPES = ['https://graph.microsoft.com/.default']

credential = ClientSecretCredential(microsoft_tenant_id, client_id, client_secret)
graph_client = GraphServiceClient(credential, SCOPES)

user_id = '[email protected]'


async def get_drive_count():

# What do I use after .drives?

    response = await graph_client.users.by_user_id(user_id).drives... # not sure what to use next
    

asyncio.run(get_drive_count())

I can't find any examples on how to use the graph client to pull one drive files.

I've tried using .root.children.get() but the SDK doesn't have any of those methods.

Does anyone know how to pull all OneDrives files using their SDK?

Upvotes: 0

Views: 207

Answers (2)

Philippe Guarino
Philippe Guarino

Reputation: 161

Be aware that the above solution it will not work if the number of items are more than 200. In this case is safer to check the property odata next link, and recall the file

async def extractfile (item  ,drive ):
if ( item.folder) :
      rootContent=  await  graph_client.drives.by_drive_id(drive.id).items.by_drive_item_id(item.id).children.get();
    
     
      while ( rootContent.odata_next_link != None):
      
       
        rootContentnext =  await  graph_client.drives.with_url(rootContent.odata_next_link).get();
        
        rootContent.odata_next_link = rootContentnext.odata_next_link
        rootContent.value = rootContent.value + rootContentnext.value

      for children in rootContent.value:
          await extractfile (children,drive )
else :
  await downloadFile (item)

Upvotes: 0

user2250152
user2250152

Reputation: 20788

You need to get user's drive id and then recursively iterate through the all folders in that drive and count number of items.

I'm not familiar with python, but something like the code below should work.

Be aware that if a folder has more than 200 items, you need to use paging to retrieve all items.

import asyncio
from typing import List
from msgraph import GraphServiceClient
from azure.identity import ClientSecretCredential

microsoft_tenant_id = '123abc'
client_id = '123abc'
client_secret = '123abc'

SCOPES = ['https://graph.microsoft.com/.default']

credential = ClientSecretCredential(microsoft_tenant_id, client_id, client_secret)
graph_client = GraphServiceClient(credential, SCOPES)

user_id = '[email protected]'

items_count = 0

async def get_child_items_count(driveId: str, driveItemId: str) -> None:
    all_drive_item_folders_ids: List[str] = []
    # process first page
    childItems = await graph_client.drives.by_drive_id(driveId).items.by_drive_item_id(driveItemId).children.get()
    if childItems:
        # increment global counter
        items_count+=len(childItems.value)
        # store folders ids
        for i in range(len(childItems.value)):
            child_item = childItems.value[i]
            if child_item.folder is not None and child_item.folder.child_count > 0:
                all_drive_item_folders_ids.append(child_item.id)
    
    # process other pages
    while childItems is not None and childItems.odata_next_link is not None:
        childItems = await graph_client.drives.by_drive_id(driveId).items.by_drive_item_id(driveItemId).children.with_url(childItems.odata_next_link).get()
        if childItems:
            # increment global counter
            items_count+=len(childItems.value)
            # store folders ids
            for i in range(len(childItems.value)):
                child_item = childItems.value[i]
                if child_item.folder is not None and child_item.folder.child_count > 0:
                    all_drive_item_folders_ids.append(child_item.id)

    # counts items in subfolders
    for i in range(len(all_drive_item_folders_ids)):
        await get_child_items_count(driveId, all_drive_item_folders_ids[i])  

async def get_drive_count():
    # get drive id
    response = await graph_client.users.by_user_id(user_id).drive.get()
    driveId = response.id
    # start with root item
    await get_child_items_count(driveId, 'root')
    
    print(f"count: {items_count}")

asyncio.run(get_drive_count())

Upvotes: 1

Related Questions