Papouche Guinslyzinho
Papouche Guinslyzinho

Reputation: 5458

How to get a list of event that occurred simultaneous

Im trying to get the number of simulteneaous telephone call. I have this dataframe and I want to get for each user how many simulteanous call they had. my desired output is [{'A': 4}, {'E': 3}]

user = ['A', 'A', 
        'A', 'E', 
        'F', 'E', 
        'E', 'A',
        'G', 'A']
started_time = [
    '2020-04-02 16:16:11',
    '2020-04-02 16:06:25',
    '2020-04-02 16:11:53',
    '2020-04-02 16:29:29',
    '2020-04-10 16:09:56',
    '2020-04-02 16:30:18',
    '2020-04-02 16:25:20',
    '2020-04-02 16:00:47',
    '2020-04-07 16:11:44',
    '2020-04-05 16:55:25'
]
ended_time = [
    '2020-04-02 16:22:05',
    '2020-04-02 16:17:22',
    '2020-04-02 16:21:50',
    '2020-04-02 16:34:29',
    '2020-04-10 16:44:15',
    '2020-04-02 16:41:26',
    '2020-04-02 16:53:02',
    '2020-04-02 16:45:49',
    '2020-04-07 16:57:37',
    '2020-04-05 16:59:26',
]
df = pd.DataFrame({
    'user':user,
    'started_time':started_time,
    'ended_time':ended_time
    })
df['started_time'] = pd.to_datetime(df['started_time'])
df['ended_time'] = pd.to_datetime(df['ended_time'])
df['sim_calls'] = None

print

  user        started_time          ended_time sim_calls
0    A 2020-04-02 16:16:11 2020-04-02 16:22:05      None
1    A 2020-04-02 16:06:25 2020-04-02 16:17:22      None
2    A 2020-04-02 16:11:53 2020-04-02 16:21:50      None
3    E 2020-04-02 16:29:29 2020-04-02 16:34:29      None
4    F 2020-04-10 16:09:56 2020-04-10 16:44:15      None
5    E 2020-04-02 16:30:18 2020-04-02 16:41:26      None
6    E 2020-04-02 16:25:20 2020-04-02 16:53:02      None
7    A 2020-04-02 16:00:47 2020-04-02 16:45:49      None
8    G 2020-04-07 16:11:44 2020-04-07 16:57:37      None
9    A 2020-04-05 16:55:25 2020-04-05 16:59:26      None

Removing all operators that have less than 3 calls that day

ab = df.groupby('user').count()
ab = ab.reset_index('user')
ab = ab[ab['started_time']>2]
operators = list(ab.user.unique())

result

  user  started_time  ended_time  sim_calls
0    A             5           5          0
1    E             3           3          0

computation

active_events_index= []
simulteaneous_call = []
for user in operators:
    my_list_of_operators =[]
    my_list_of_operators.append(user)
    my_list_of_operators_count = 0 

    new_df = df[df['user']==user]

    for i in new_df.index:
        started_time =  new_df.loc[i,"started_time"]
        ended_time =  new_df.loc[i,"ended_time"]
        for row in new_df.index:
            if (new_df.loc[row,"started_time"] <= started_time and new_df.loc[row,"ended_time"] >= started_time or new_df.loc[row,"ended_time"] <= ended_time ) :
                print(new_df.loc[row])
                my_list_of_operators_count += 1 
    simulteaneous_call.append({my_list_of_operators[0]:my_list_of_operators_count})

result

print(simulteaneous_call)
[{'A': 18}, {'E': 8}]

My desired output should have been

[{'A': 4}, {'E': 3}]

Upvotes: 1

Views: 239

Answers (1)

Code Different
Code Different

Reputation: 93181

I assume any overlapping call of the same user to be "simultaneous". Explanation in code:

def count_simul(group):
    n = 0
    g = []
    ranges = {}

    # For each user, start the loop with a time range covering the distant
    # past to distant future
    started_time = pd.Timestamp('1900-01-01')
    ended_time = pd.Timestamp('2099-12-31')

    for index, row in group.iterrows():
        if (row['started_time'] < ended_time) and (started_time < row['ended_time']):
            # If the current row overlaps with the time range defined by
            # `started_time` and `ended_time`, set `started_time` and
            # `ended_time` to the intersection of the two. And keep the row
            # in the current time group
            started_time = max(started_time, row['started_time'])
            ended_time = min(ended_time, row['ended_time'])
        else:
            # Otherwise, set `started_time` and `ended_time` to those of the
            # current row and assign the current row to a new time group
            started_time, ended_time = row[['started_time', 'ended_time']]
            n += 1

        # `ranges` is a dictionary mapping each group number to the time range
        ranges[n] = (started_time, ended_time)
        g.append(n)

    # Group the rows by their time group number and get the size
    freq = group.groupby(np.array(g)).size()
    freq.index = freq.index.map(ranges)
    return freq

df.sort_values(['user', 'started_time', 'ended_time']) \
    .groupby('user') \
    .apply(count_simul) \
    .replace(1, np.nan).dropna() # we don't consider groups of 1 to be "simultaneous"

Result:

user                                          
A     2020-04-02 16:16:11  2020-04-02 16:17:22    4.0
E     2020-04-02 16:30:18  2020-04-02 16:34:29    3.0
dtype: float64

Upvotes: 1

Related Questions