Read excel data from Azure blob and convert into csv using Python azure function

I would like to deploy azure function with following functionality

  1. read excel data from Azure blob into stream object instead of downloading onto VM.
  2. read into data frame I require help to read the excel file to into data frame. How to update placed holder download_file_path to read excel data .
    import pandas as pd 
    import os 
    import io
    from azure.storage.blob import BlobClient,BlobServiceClient,ContentSettings
        
    connectionstring="XXXXXXXXXXXXXXXX" 
    excelcontainer = "excelcontainer"        
    excelblobname="Resource.xlsx" 
    sheet ="Resource" 
            
    blob_service_client =BlobServiceClient.from_connection_string(connectionstring)
    download_file_path =os.path.join(excelcontainer)
    blob_client = blob_service_client.get_blob_client(container=excelcontainer, blob=excelblobname)
    with open(download_file_path, "rb") as f:
       data_bytes = f.read()
    df =pd.read_excel(data_bytes, sheet_name=sheet, encoding = "utf-16")

Upvotes: 2

Views: 4273

Answers (1)

Jim Xu
Jim Xu

Reputation: 23111

If you want to read an excel file from Azure blob with panda, you have two choice

  1. Generate SAS token for the blob then use blob URL with SAS token to access it
from datetime import datetime, timedelta
import pandas as pd
from azure.storage.blob import BlobSasPermissions, generate_blob_sas
def main(req: func.HttpRequest) -> func.HttpResponse:
    account_name = 'andyprivate'
    account_key = 'h4pP1fe76*****A=='
    container_name = 'test'
    blob_name="sample.xlsx"
    sas=generate_blob_sas(
      account_name=account_name,
      container_name=container_name,
      blob_name=blob_name,
      account_key=account_key,
      permission=BlobSasPermissions(read=True),
      expiry=datetime.utcnow() + timedelta(hours=1)
    )

    blob_url = f'https://{account_name}.blob.core.windows.net/{container_name}/{blob_name}?{sas}'
    df=pd.read_excel(blob_url)
    print(df)
    ......

enter image description here

  1. Download the blob
from azure.storage.blob import  BlobServiceClient
def main(req: func.HttpRequest) -> func.HttpResponse:
    account_name = 'andyprivate'
    account_key = 'h4pP1f****='

    blob_service_client = BlobServiceClient(account_url=f'https://{account_name }.blob.core.windows.net/', credential=account_key)
    blob_client = blob_service_client.get_blob_client(container='test', blob='sample.xlsx')
    downloader =blob_client.download_blob()
    df=pd.read_excel(downloader.readall())
    print(df)
    ....

enter image description here

Upvotes: 5

Related Questions