Reputation: 69
I wrote the following code that goes over a csv file and checks if a user did two actions in less than 10 seconds:
import csv
import datetime
import dateutil.parser
import sys
csvFileName = 'edited.csv'
with open(csvFileName, 'r') as csvFile:
csvReader = csv.DictReader(csvFile)
for index, row in enumerate(csvReader):
userIdentity = row['useridentity']
eventTime = dateutil.parser.parse(row['eventtime'])
for subIndex, subRow in enumerate(csvReader):
subUserIdentity = subRow['useridentity']
subEventTime = dateutil.parser.parse(subRow['eventtime'])
if subIndex - index == 1 and userIdentity == subUserIdentity:
if subEventTime - eventTime < datetime.timedelta(seconds=10):
print('heads up!')
print(eventTime)
print(subEventTime)
'eventtime' contains a time in an iso 8601 format which is converted to a datetime object There are 2 problem:
Nesting means it runs over the file ^2 times
On this format if I want to change it to compare to 3 actions instead of 2 it will need another nested for loop
I would to understand what is the better and correct way to write something like this.
Update this is my updated attempt but it is duplicating results:
import csv
import datetime
import dateutil.parser
import sys
csvFileName = 'edited.csv'
def seqCounter(index, currentTime):
tmpIndex = 0
with open(csvFileName, 'r') as tmpFile:
tmpReader = csv.DictReader(tmpFile)
for row in tmpReader:
if tmpIndex <= index:
tmpIndex += 1
continue
eventTime = dateutil.parser.parse(row['eventtime'])
if eventTime - currentTime < datetime.timedelta(seconds=2) and eventTime - currentTime > datetime.timedelta(seconds=0):
print('heads up')
print(row['useridentity'])
print(eventTime)
print(currentTime)
with open(csvFileName, 'r') as csvFile:
csvReader = csv.DictReader(csvFile)
for index, row in enumerate(csvReader):
currentTime = dateutil.parser.parse(row['eventtime'])
seqCounter(index, currentTime)
Upvotes: 0
Views: 60
Reputation: 1850
This will work if you are only checking the previous event by the same user. Instead of looping through the whole file again for every entry, this will store the last event by the same user in a dict
. This lets you loop through the whole file exactly once.
with open(csvFileName, 'r') as csvFile:
csvReader = csv.DictReader(csvFile)
user_events = dict()
for index, row in enumerate(csvReader):
userIdentity = row['useridentity']
eventTime = dateutil.parser.parse(row['eventtime'])
# Get the last event for this user
lastEventTime = user_events.get(userIdentity)
if lastEventTime:
if lastEventTime - eventTime < datetime.timedelta(seconds=10):
print('heads up!')
print(eventTime)
print(lastEventTime)
# Set the one we just looked at as the last event
user_events[userIdentity] = eventTime
Upvotes: 2