Reputation: 23
I have a file with about 2000 lines of Data on sunspots. I need to take each month and find the average of it and write it to a new file. How do I group the months so I can get a average? I've read a few threads suggesting using panda, but since we haven't got there yet in class, I'd rather not use it without having a full grasp of what it does.
So far, my code separates the years and months and the days. How do I group the months together to find the average sunspots?
Here is my code so far:
def OpenFile(File):
outfile = open ("Monthlytemp.txt","w")
try:
Lines= open(File).readlines()
except IOError:
Lines=[]
for line in Lines:
Dates = line.split()
Year= str(Dates[0][0:4])
Month = str(Dates[0][4:6])
Date = str(Dates [0][6:8])
Spots = int(Dates [2])
if Spots == 999:
Spots= ''
Spots = str(Spots)
Data = [Year, Month, Date, Spots, '\n']
Data = ' '.join(Data)
outfile.write(str(Data))
#print (Data)
outfile.close()
return Data
Upvotes: 0
Views: 1348
Reputation: 12563
One possible solution (with minimal changes from your approach):
def WriteAvg(outfile, year, month, avg):
Data = [year, month, avg, '\n']
Data = ' '.join(Data)
outfile.write(str(Data))
def OpenFile(File):
outfile = open ("Monthlytemp.txt","w")
PrevMonth = ""
PrevYear = ""
SpotSum = 0
Days = 0
try:
Lines= open(File).readlines()
except IOError:
Lines=[]
for line in Lines:
Dates = line.split()
Year= str(Dates[0][0:4])
Month = str(Dates[0][4:6])
Date = str(Dates [0][6:8])
Spots = int(Dates [2])
if PrevMonth != Month && PrevMonth!="":
MonthAvg = str(SpotSum*1./Days)
WriteAvg(outfile, PrevYear, PrevMonth, MonthAvg)
Days = 0
SpotSum = 0
if Spots!= 999:
Days +=1
SpotSum += Spots
PrevMonth = Month
PrevYear = Year
#one last time
MonthAvg = str(SpotSum*1./Days)
WriteAvg(outfile, PrevYear, PrevMonth, MonthAvg)
outfile.close()
return Data
Upvotes: 0
Reputation: 3134
You can use a dictionary.
def OpenFile(File):
outfile = open ("Monthlytemp.txt","w")
# stores (year, month): spots
spots_by_month = dict()
try:
Lines= open(File).readlines()
except IOError:
Lines=[]
for line in Lines:
Dates = line.split()
Year= str(Dates[0][0:4])
Month = str(Dates[0][4:6])
Date = str(Dates [0][6:8])
Spots = int(Dates [2])
# Not sure if this should be here, might want to place it
# in an else clause after that if clause
spots_by_month.get((Year, Month), []).append(Spots)
if Spots == 999:
Spots= ''
Spots = str(Spots)
Data = [Year, Month, Date, Spots, '\n']
Data = ' '.join(Data)
outfile.write(str(Data))
#print (Data)
# Getting averages as a dictionary
averages = {
date:sum(spots_list) / len(spots_list)
for date, spots_list in spots_by_month.items()
}
print(averages)
# Alternatively getting the averages as a sorted list
averages = [
(date, sum(spots_list) / len(spots_list))
for date, spots_list in spots_by_month.items()
]
averages.sort()
print(averages)
outfile.close()
return Data
Upvotes: 0