Reputation: 85
I would like to append the resulting dataframe i.e. the return dataframe from the createDF function to the full_df(an empty df).
In the run_parallel function below, I am passing args=(weight, eduList,) but I cannot seem to understand how to retrieve the return value from createDF and append it to full_df.
import pandas as pd
import numpy as np
import random
# dummy function to create a df
def createDF(weight, eduList):
#initialize list of lists
data = [['kappa', weight*random.randint(5, 10), eduList[0]],
['ombee', weight*random.randint(5, 10), eduList[1]],
['babad', weight*random.randint(4, 10), eduList[2]],
['matth', weight*random.randint(4, 9), eduList[3]],
['allis', weight*random.randint(4, 9), eduList[4]]]
# Create the pandas DataFrame
df = pd.DataFrame(data, columns = ['Name', 'Score', 'Education'])
return df
weight = 9
depts = ['FIN', 'CONS', 'CONS', 'MANF', 'MED']
eduList = ['RWTH', 'EBS', 'OSU', 'OKSTATE', 'OK-State']
full_df = pd.DataFrame() # empty dataframe to which I would like to append all the return dfs from createDF
#function to create the dfs parallely
def run_parallel(weight, depts, eduList):
from multiprocessing import Process, current_process
processes = []
for dept in depts:
process = Process(target= createDF, args=(weight,eduList,))
processes.append(process)
process.start()
for process in processes:
process.join()
# desired result:
Name Score Education
0 kappa 72 RWTH
1 ombee 72 EBS
2 babad 40 OSU
3 matth 28 OKSTATE
4 allis 56 OK-State
5 kappa 63 RWTH
6 ombee 83 EBS
7 babad 60 OSU
8 matth 56 OKSTATE
9 allis 40 OK-State
.. .. .. ..
.. .. .. ..
.. .. .. ..
Upvotes: 1
Views: 339
Reputation: 4569
You can define an empty list and append dataframes to this, and afterwards concat these to a single dataframe:
df_list = []
# dummy function to create a df
def createDF(weight, eduList):
#initialize list of lists
data = [['kappa', weight*random.randint(5, 10), eduList[0]],
['ombee', weight*random.randint(5, 10), eduList[1]],
['babad', weight*random.randint(4, 10), eduList[2]],
['matth', weight*random.randint(4, 9), eduList[3]],
['allis', weight*random.randint(4, 9), eduList[4]]]
# Create the pandas DataFrame
df = pd.DataFrame(data, columns = ['Name', 'Score', 'Education'])
df_list.append(df)
...
# And after all processes finish:
full_df = pd.concat(df_list)
Upvotes: 1