Reputation: 333
I'm parsing several years worth of daily solar generation CSV files that look like this (with YY-MM-DD.CSV filename format)
sep=;
Version CSV|Tool SunnyBeam11|Linebreaks CR/LF|Delimiter semicolon|Decimalpoint point|Precision 3|Language en-UK|TZO=0|DST|11/03/2012
;SN: xyz
;SB blah
;xyz
Time;Power
HH:mm;kW
00:10;0.000
00:20;0.000
00:30;0.000
00:40;0.000
00:50;0.000
01:00;0.000
01:10;0.000
01:20;0.000
01:30;0.000
01:40;0.000
01:50;0.000
02:00;0.000
02:10;0.000
02:20;0.000
02:30;0.000
02:40;0.000
02:50;0.000
03:00;0.000
03:10;0.000
03:20;0.000
03:30;0.000
03:40;0.000
03:50;0.000
04:00;0.000
04:10;0.000
04:20;0.000
04:30;0.000
04:40;0.000
04:50;0.000
05:00;0.000
05:10;0.000
05:20;0.000
05:30;0.000
05:40;0.000
05:50;0.000
06:00;0.000
06:10;0.000
06:20;0.000
06:30;0.000
06:40;0.000
06:50;0.000
07:00;0.000
07:10;0.012
07:20;0.048
07:30;0.072
07:40;0.078
07:50;0.114
08:00;0.150
08:10;0.156
08:20;0.168
08:30;0.204
08:40;0.180
08:50;0.198
09:00;0.210
09:10;0.222
09:20;0.294
09:30;0.330
09:40;0.342
09:50;0.402
10:00;0.492
10:10;0.396
10:20;0.474
10:30;1.224
10:40;1.374
10:50;1.416
11:00;1.470
11:10;1.500
11:20;1.530
11:30;1.530
11:40;1.542
11:50;1.536
12:00;1.554
12:10;1.584
12:20;1.584
12:30;1.572
12:40;1.560
12:50;1.548
13:00;1.518
13:10;1.494
13:20;1.470
13:30;1.440
13:40;1.404
13:50;1.356
14:00;1.332
14:10;1.284
14:20;1.236
14:30;1.194
14:40;1.110
14:50;1.050
15:00;0.972
15:10;0.888
15:20;0.816
15:30;0.744
15:40;0.648
15:50;0.594
16:00;0.528
16:10;0.438
16:20;0.342
16:30;0.264
16:40;0.228
16:50;0.180
17:00;0.120
17:10;0.084
17:20;0.048
17:30;0.024
17:40;0.006
17:50;0.000
18:00;0.000
18:10;0.000
18:20;0.000
18:30;0.000
18:40;0.000
18:50;0.000
19:00;0.000
19:10;0.000
19:20;0.000
19:30;0.000
19:40;0.000
19:50;0.000
20:00;0.000
20:10;0.000
20:20;0.000
20:30;0.000
20:40;0.000
20:50;0.000
21:00;0.000
21:10;0.000
21:20;0.000
21:30;0.000
21:40;0.000
21:50;0.000
22:00;0.000
22:10;0.000
22:20;0.000
22:30;0.000
22:40;0.000
22:50;0.000
23:00;0.000
23:10;0.000
23:20;0.000
23:30;0.000
23:40;0.000
23:50;0.000
00:00;0.000
E-Today kWh;8.313
E-Total kWh;67.471
My Python is able to parse out the date in the last field of line 2 and then iteratively combine it with the time on each row. Using the combo of date and time as a timestamp (type incompatibility is probably the problem?) I try to insert it into a mySQL database into a column of type timestamp (and power into a column of type DECIMAL(4,3))
I've played around with various type combinations but clearly not in the right way. I'd be really grateful for a Python expert to clarify how to successfully get the synthesised date into a form compatible with MySQL timestamp.
The Python is:
#!/usr/bin/python
from os import listdir
from datetime import datetime
import MySQLdb
#from sys import argv
def is_dated_csv(filename):
"""
Return True if filename matches format YY-MM-DD.csv, otherwise False.
"""
date_format = '%y-%m-%d.csv'
try:
date = datetime.strptime(filename, date_format)
return True
except ValueError:
# filename did not match pattern
print filename + ' did NOT match'
pass
#'return' terminates a function
return False
def parse_for_date(filename):
"""
Read file for the date - from line 2 field 10
"""
currentFile = open(filename,'r')
l1 = currentFile.readline() #ignore first line read
date_line = currentFile.readline() #read second line
dateLineArray = date_line.split("|")
day_in_question = dateLineArray[-1]#save the last element (date)
currentFile.close()
return day_in_question
def normalise_date_to_UTF(day_in_question):
"""
Rather wierdly, some days use YYYY.MM.DD format & others use DD/MM/YYYY
This function normalises either to UTC with a blank time (midnight)
"""
if '.' in day_in_question: #it's YYYY.MM.DD
dateArray = day_in_question.split(".")
dt = (dateArray[0] +dateArray[1] + dateArray[2].rstrip() + '000000')
elif '/' in day_in_question: #it's DD/MM/YYYY
dateArray = day_in_question.split("/")
dt = (dateArray[2].rstrip() + dateArray[1] + dateArray[0] + '000000')
theDate = datetime.strptime(dt,'%Y%m%d%H%M%S')
return theDate #A datetime object
def parse_power_values(filename, theDate):
currentFile = open(filename,'r')
for i, line in enumerate(currentFile):
if i <= 7:
doingSomething = True
#print 'header' + str(i) + '/ ' + line.rstrip()
elif ((i > 7) and (i <= 151)):
lineParts = line.split(';')
theTime = lineParts[0].split(':')
theHour = theTime[0]
theMin = theTime[1]
timestamp = theDate.replace(hour=int(theHour),minute=int(theMin))
power = lineParts[1].rstrip()
if (float(power) > 0):
#print str(i) + '/ ' + str(timestamp) + ' power = ' + power + 'kWh'
append_to_database(timestamp,power)
#else:
# print str(i) + '/ '
elif i > 151:
print str(timestamp) + ' DONE!'
print '----------------------'
break
currentFile.close()
def append_to_database(timestampval,powerval):
a = datetime.strptime(timestampval,"%b %d %Y %H:%M")
host="localhost", # your host, usually localhost
user="root", # your username
passwd="******"
database_name = 'SunnyData'
table_name = 'DTP'
timestamp_column = 'DT'
power_column = 'PWR'
sql = ("""INSERT INTO %s(%s,%s) VALUES(%s,'%s')""", ('table_name', 'timestamp_column', 'power_column', a.strftime('%Y-%m-%d %H:%M:%S'), powerval) )
print sql
#db = MySQLdb.connect(host,user,passwd,database_name)
cur = SD.cursor()
try:
cur.execute(sql)
print 'SQL: ' + sql
SD.commit()
except:
print 'DB append failed!'
SD.rollback()
# Main start of program
path = '.'
for filename in listdir(path):
if is_dated_csv(filename):
SD = MySQLdb.connect(host="localhost", user="root",passwd="**********", db = 'SunnyData')
print filename + ' matched'
# Do something with the desired .csv file
day_in_question = parse_for_date(filename)
print 'the date is ' + day_in_question
theDate = normalise_date_to_UTF(day_in_question)
parse_power_values(filename, theDate)
SD.close()
pass
and the DB create SQL looks like this
CREATE TABLE `DTP` (
`idDTP` int(11) NOT NULL,
`DT` timestamp NULL DEFAULT NULL,
`PWR` decimal(4,3) DEFAULT NULL,
PRIMARY KEY (`idDTP`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
Many thanks,
Greg (Python novice)
Upvotes: 2
Views: 2595
Reputation: 168
I took your code, and it seems like the main issues are in the append_to_database
function.
It looks like the return value of normalise_date_to_UTF
returns a datetime object, which is being passed to append_to_database
(as the timestampval
argument), and that is being run through strptime, which is unnecessary since it's not a string and already a datetime.
The SQL statement also needed some adjusting for how you replace variables in strings. In python, the %
operator when following a string, allows you to insert values into that string via format codes. Also, table_name
, timestamp_column
, and power_column
had single quotes around them, which meant that the literal string would be inserted, rather than the variable's value.
Basically your append_to_database
function should look like this:
def append_to_database(timestampval,powerval):
host="localhost", # your host, usually localhost
user="root", # your username
passwd="******"
database_name = 'SunnyData'
table_name = 'DTP'
timestamp_column = 'DT'
power_column = 'PWR'
sql = ("INSERT INTO %s (%s,%s) VALUES('%s','%s')" % (table_name, timestamp_column, power_column, timestampval.strftime('%Y-%m-%d %H:%M:%S'), powerval) )
print sql
#db = MySQLdb.connect(host,user,passwd,database_name)
cur = SD.cursor()
try:
cur.execute(sql)
print 'SQL: ' + sql
SD.commit()
except:
print 'DB append failed!'
SD.rollback()
As for the MySQL DB, the SQL inserts are not specifying a value for the idDTP
column, and the column isn't auto incrementing, so after python inserts the first value, all subsequent inserts will fail, because the value defaults to 0 for each insert. This can be resolved in one of two ways, altering the python code to specify a value for idDTP
or setting idDTP to be auto incrementing.
If you want to alter your table to have auto incrementing ids, the SQL syntax (run on the MySQL server) is:
ALTER TABLE DTP MODIFY COLUMN idDTP INT auto_increment
or you can recreate the table like this:
CREATE TABLE `DTP` (
`idDTP` int(11) NOT NULL AUTO_INCREMENT,
`DT` timestamp NULL DEFAULT NULL,
`PWR` decimal(4,3) DEFAULT NULL,
PRIMARY KEY (`idDTP`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
I ran your code, making the changes above, and with the first part of the data provided, when querying MySQL for the DTP table, it looks like this:
select * from DTP;
+-------+---------------------+-------+
| idDTP | DT | PWR |
+-------+---------------------+-------+
| 1 | 2012-03-11 07:10:00 | 0.012 |
| 2 | 2012-03-11 07:20:00 | 0.048 |
| 3 | 2012-03-11 07:30:00 | 0.072 |
| 4 | 2012-03-11 07:40:00 | 0.078 |
| 5 | 2012-03-11 07:50:00 | 0.114 |
| 6 | 2012-03-11 08:00:00 | 0.150 |
| 7 | 2012-03-11 08:10:00 | 0.156 |
| 8 | 2012-03-11 08:20:00 | 0.168 |
| 9 | 2012-03-11 08:30:00 | 0.204 |
| 10 | 2012-03-11 08:40:00 | 0.180 |
+-------+---------------------+-------+
Hope that helps you with this project!
Upvotes: 2