Vishnukk
Vishnukk

Reputation: 564

Pytest patch not returning expected value

I am using Pytest to mock a function whose return value is defined in another function.

This is my original function which reads all the files from Azure Data Lake that I would like to mock

class DataLakeEngine:
  def read_folder_as_pandas_dataframe(
        self,
        folder_path_remote: str,
        file_type: str = "parquet",
        options: Dict = dict(),
    ) -> pd.DataFrame:
        """
        Download all files from the data lake in the data area and concatenate them into a single pandas data frame.

        Args:
            folder_path_remote (str): full path to the folder in the data area
            file_type (str): file type, possible values are 'csv', 'xlsx', 'json', 'pkl' and 'parquet'
            options (Dict): additional parameters used for converting file content to data frame

        Returns:
            (pd.DataFrame): concatenated
        """

        df = pd.DataFrame()
        paths_in_folder = self.file_system_client.get_paths(folder_path_remote)

        for path_in_folder in tqdm(paths_in_folder):
            if path_in_folder.is_directory or "delta_log" in path_in_folder.name:
                continue
            current_file = path_in_folder.name
            temp_df = self.read_file_as_pandas_dataframe(
                current_file, file_type, options
            )
            df = pd.concat([df, temp_df]).reset_index(drop=True)
        return df

and my logic function which I want to test looks like this

class Preprocessor:
  def method1(self):
    data = DataLakeEngine.read_folder_as_pandas_dataframe(
        self.ticket_data_path, "parquet"
    )    
    return data

My test class looks like this where I read from my local file system instead of reading from Data Lake.

import unittest
import pytest
from preprocessing import PreprocessingPipeline
from unittest.mock import patch
import pandas as pd
import os
from tqdm.autonotebook import tqdm
from typing import Dict
from datalake import DatalakeEngine



class TestPreprocessing(unittest.TestCase):
    def mock_read_folder_as_pandas_dataframe(
        self, folder_path: str, file_type: str, options: Dict = dict()
    ) -> pd.DataFrame:
        """
        Read all files in a folder and return them as a pandas dataframe

        Args:
            folder_path (str): path to the folder
            file_type (str): type of file to read
            options (dict): options to pass to the read

        Returns:
            pd.DataFrame: dataframe with all the files
        """
        file_paths = []
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                if file.endswith(file_type):
                    file_paths.append(os.path.join(root, file))

        df = pd.DataFrame()
        for file_path in tqdm(file_paths):
            if file_type == "parquet":
                temp_df = pd.read_parquet(file_path, **options)
            else:
                temp_df = pd.read_excel(file_path, **options)
            df = pd.concat([df, temp_df]).reset_index(drop=True)

        return df

    @pytest.fixture(autouse=True)
    def data1(self) -> pd.DataFrame:
        with patch.object(
            DatalakeEngine,
            "read_folder_as_pandas_dataframe",
            side_effect=self.mock_read_folder_as_pandas_dataframe(
               "folder1", "parquet"
            ),
        ) as mock_func:
            data1 = Preprocessor.method1()
            return data1


    def test_logic(
        self, data1: pd.DataFrame
    ):
        transformed_df = Preprocessor.preprocess_data(
            data1
        )
        assert transformed_df.shape[0] > 0

However, when I run the below test, the mock function returns a string (the name of the first column of my data) instead of pandas dataframe at the read_folder_as_pandas_dataframe call inside the method1() function.

How can I change my patch here to return a data frame here?

Upvotes: 0

Views: 35

Answers (0)

Related Questions