Reputation: 5458
Im trying to get the number of simulteneaous telephone call. I have this dataframe and I want to get for each user how many simulteanous call they had. my desired output is [{'A': 4}, {'E': 3}]
user = ['A', 'A',
'A', 'E',
'F', 'E',
'E', 'A',
'G', 'A']
started_time = [
'2020-04-02 16:16:11',
'2020-04-02 16:06:25',
'2020-04-02 16:11:53',
'2020-04-02 16:29:29',
'2020-04-10 16:09:56',
'2020-04-02 16:30:18',
'2020-04-02 16:25:20',
'2020-04-02 16:00:47',
'2020-04-07 16:11:44',
'2020-04-05 16:55:25'
]
ended_time = [
'2020-04-02 16:22:05',
'2020-04-02 16:17:22',
'2020-04-02 16:21:50',
'2020-04-02 16:34:29',
'2020-04-10 16:44:15',
'2020-04-02 16:41:26',
'2020-04-02 16:53:02',
'2020-04-02 16:45:49',
'2020-04-07 16:57:37',
'2020-04-05 16:59:26',
]
df = pd.DataFrame({
'user':user,
'started_time':started_time,
'ended_time':ended_time
})
df['started_time'] = pd.to_datetime(df['started_time'])
df['ended_time'] = pd.to_datetime(df['ended_time'])
df['sim_calls'] = None
user started_time ended_time sim_calls
0 A 2020-04-02 16:16:11 2020-04-02 16:22:05 None
1 A 2020-04-02 16:06:25 2020-04-02 16:17:22 None
2 A 2020-04-02 16:11:53 2020-04-02 16:21:50 None
3 E 2020-04-02 16:29:29 2020-04-02 16:34:29 None
4 F 2020-04-10 16:09:56 2020-04-10 16:44:15 None
5 E 2020-04-02 16:30:18 2020-04-02 16:41:26 None
6 E 2020-04-02 16:25:20 2020-04-02 16:53:02 None
7 A 2020-04-02 16:00:47 2020-04-02 16:45:49 None
8 G 2020-04-07 16:11:44 2020-04-07 16:57:37 None
9 A 2020-04-05 16:55:25 2020-04-05 16:59:26 None
Removing all operators that have less than 3 calls that day
ab = df.groupby('user').count()
ab = ab.reset_index('user')
ab = ab[ab['started_time']>2]
operators = list(ab.user.unique())
result
user started_time ended_time sim_calls
0 A 5 5 0
1 E 3 3 0
computation
active_events_index= []
simulteaneous_call = []
for user in operators:
my_list_of_operators =[]
my_list_of_operators.append(user)
my_list_of_operators_count = 0
new_df = df[df['user']==user]
for i in new_df.index:
started_time = new_df.loc[i,"started_time"]
ended_time = new_df.loc[i,"ended_time"]
for row in new_df.index:
if (new_df.loc[row,"started_time"] <= started_time and new_df.loc[row,"ended_time"] >= started_time or new_df.loc[row,"ended_time"] <= ended_time ) :
print(new_df.loc[row])
my_list_of_operators_count += 1
simulteaneous_call.append({my_list_of_operators[0]:my_list_of_operators_count})
result
print(simulteaneous_call)
[{'A': 18}, {'E': 8}]
My desired output should have been
[{'A': 4}, {'E': 3}]
Upvotes: 1
Views: 239
Reputation: 93181
I assume any overlapping call of the same user to be "simultaneous". Explanation in code:
def count_simul(group):
n = 0
g = []
ranges = {}
# For each user, start the loop with a time range covering the distant
# past to distant future
started_time = pd.Timestamp('1900-01-01')
ended_time = pd.Timestamp('2099-12-31')
for index, row in group.iterrows():
if (row['started_time'] < ended_time) and (started_time < row['ended_time']):
# If the current row overlaps with the time range defined by
# `started_time` and `ended_time`, set `started_time` and
# `ended_time` to the intersection of the two. And keep the row
# in the current time group
started_time = max(started_time, row['started_time'])
ended_time = min(ended_time, row['ended_time'])
else:
# Otherwise, set `started_time` and `ended_time` to those of the
# current row and assign the current row to a new time group
started_time, ended_time = row[['started_time', 'ended_time']]
n += 1
# `ranges` is a dictionary mapping each group number to the time range
ranges[n] = (started_time, ended_time)
g.append(n)
# Group the rows by their time group number and get the size
freq = group.groupby(np.array(g)).size()
freq.index = freq.index.map(ranges)
return freq
df.sort_values(['user', 'started_time', 'ended_time']) \
.groupby('user') \
.apply(count_simul) \
.replace(1, np.nan).dropna() # we don't consider groups of 1 to be "simultaneous"
Result:
user
A 2020-04-02 16:16:11 2020-04-02 16:17:22 4.0
E 2020-04-02 16:30:18 2020-04-02 16:34:29 3.0
dtype: float64
Upvotes: 1