Reputation: 3954
I have dfs with the following column headers:
Year JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC WIN SPR SUM AUT ANN ATTRIBUTE LOCATION
Each column contains random floating point value, except for ATTRIBUTE, which contains a value from this list:
[Max_temp, Min_temp, Mean_temp, Sunshine, Rainfall]
and LOCATION, which contains a value from this list:
[UK, England, Scotland, Wales]
I need to put it into the sqlite3 database that comes with django using the following models:
from django.db import models
from django.core.validators import MaxValueValidator, MinValueValidator
class Location(models.Model):
LOCATIONS = (
('EN', 'England'),
('SC', 'Scotland'),
('WA', 'Wales'),
('UK', 'United Kingdom'),
)
location = models.CharField(max_length=2, choices=LOCATIONS)
class Meta:
verbose_name_plural = "Location"
def __str__(self):
return self.location
class Max_temp(models.Model):
location = models.ForeignKey(Location, on_delete=models.CASCADE)
year = models.IntegerField(
default=0,
validators=[MaxValueValidator(9999), MinValueValidator(0)]
)
MONTH_OR_SEASON = (
("JAN", "January"),
("FEB", "February"),
("MAR", "March"),
("APR", "April"),
("MAY", "May"),
("JUN", "June"),
("JUL", "July"),
("AUG", "August"),
("SEP", "September"),
("OCT", "October"),
("NOV", "November"),
("DEC", "December"),
("WIN", "Winter"),
("SPR", "Spring"),
("SUM", "Summer"),
("AUT", "Autumn"),
("ANN", "Annual"),
)
month_or_season = models.CharField(max_length=3, choices=MONTH_OR_SEASON)
class Meta:
verbose_name_plural = "Maximum Temperature"
def __str__(self):
return self.year
class Min_temp(models.Model):
location = models.ForeignKey(Location, on_delete=models.CASCADE)
year = models.IntegerField(
default=0,
validators=[MaxValueValidator(9999), MinValueValidator(0)]
)
MONTH_OR_SEASON = (
("JAN", "January"),
("FEB", "February"),
("MAR", "March"),
("APR", "April"),
("MAY", "May"),
("JUN", "June"),
("JUL", "July"),
("AUG", "August"),
("SEP", "September"),
("OCT", "October"),
("NOV", "November"),
("DEC", "December"),
("WIN", "Winter"),
("SPR", "Spring"),
("SUM", "Summer"),
("AUT", "Autumn"),
("ANN", "Annual"),
)
month_or_season = models.CharField(max_length=3, choices=MONTH_OR_SEASON)
class Meta:
verbose_name_plural = "Minimum Temperature"
def __str__(self):
return self.year
class Mean_temp(models.Model):
location = models.ForeignKey(Location, on_delete=models.CASCADE)
year = models.IntegerField(
default=0,
validators=[MaxValueValidator(9999), MinValueValidator(0)]
)
MONTH_OR_SEASON = (
("JAN", "January"),
("FEB", "February"),
("MAR", "March"),
("APR", "April"),
("MAY", "May"),
("JUN", "June"),
("JUL", "July"),
("AUG", "August"),
("SEP", "September"),
("OCT", "October"),
("NOV", "November"),
("DEC", "December"),
("WIN", "Winter"),
("SPR", "Spring"),
("SUM", "Summer"),
("AUT", "Autumn"),
("ANN", "Annual"),
)
month_or_season = models.CharField(max_length=3, choices=MONTH_OR_SEASON)
class Meta:
verbose_name_plural = "Mean Temperature"
def __str__(self):
return self.year
class Sunshine(models.Model):
location = models.ForeignKey(Location, on_delete=models.CASCADE)
year = models.IntegerField(
default=0,
validators=[MaxValueValidator(9999), MinValueValidator(0)]
)
MONTH_OR_SEASON = (
("JAN", "January"),
("FEB", "February"),
("MAR", "March"),
("APR", "April"),
("MAY", "May"),
("JUN", "June"),
("JUL", "July"),
("AUG", "August"),
("SEP", "September"),
("OCT", "October"),
("NOV", "November"),
("DEC", "December"),
("WIN", "Winter"),
("SPR", "Spring"),
("SUM", "Summer"),
("AUT", "Autumn"),
("ANN", "Annual"),
)
month_or_season = models.CharField(max_length=3, choices=MONTH_OR_SEASON)
class Meta:
verbose_name_plural = "Sunshine"
def __str__(self):
return self.year
class Rainfall(models.Model):
location = models.ForeignKey(Location, on_delete=models.CASCADE)
year = models.IntegerField(
default=0,
validators=[MaxValueValidator(9999), MinValueValidator(0)]
)
MONTH_OR_SEASON = (
("JAN", "January"),
("FEB", "February"),
("MAR", "March"),
("APR", "April"),
("MAY", "May"),
("JUN", "June"),
("JUL", "July"),
("AUG", "August"),
("SEP", "September"),
("OCT", "October"),
("NOV", "November"),
("DEC", "December"),
("WIN", "Winter"),
("SPR", "Spring"),
("SUM", "Summer"),
("AUT", "Autumn"),
("ANN", "Annual"),
)
month_or_season = models.CharField(max_length=3, choices=MONTH_OR_SEASON)
class Meta:
verbose_name_plural = "Rainfall"
def __str__(self):
return self.year
I'm a bit stumped on how to do this - I'm not sure if my model structure is right? And is there a way to structure the models so I don't have to repeat code? I need a database structured like so:
Location - Max_temp
- Min_temp
- Mean_temp
- Sunshine
- Rainfall
So each location has a Max_temp table, a Min_temp table etc...
The main thing tho, is how do I tell Django to put the pandas df into the database and where in the Django project do I put that code?
Any help appreciated!
Upvotes: 1
Views: 889
Reputation: 3384
You were right to question your model structure :) Typically when you have that much code duplication there is an easier way. Key here is the fact that your measurements all have the same schema. Each model you define represents a database table, and we really don't need to create a new database table for each measurement type- we can just store the type as a column in a "Measurement" table.
I also question whether you need the separate table for Location
as all it contains is the verbose name for the country code- you may as well use a ChoiceField
(if your usecase is more advanced than this then feel free to keep the location model separate).
from django.db import models
from django.core.validators import MaxValueValidator, MinValueValidator
class Measurement(models.Model):
MEASUREMENT_TYPES = (
("max_temp", "Maximum Temperature"),
("min_temp", "Minimum Temperature"),
("mean_temp", "Mean Temperature"),
("sunshine", "Sunshine"),
("rainfall", "Rainfall"),
)
LOCATIONS = (
('EN', 'England'),
('SC', 'Scotland'),
('WA', 'Wales'),
('UK', 'United Kingdom'),
)
MONTH_OR_SEASON = (
("JAN", "January"),
("FEB", "February"),
("MAR", "March"),
("APR", "April"),
("MAY", "May"),
("JUN", "June"),
("JUL", "July"),
("AUG", "August"),
("SEP", "September"),
("OCT", "October"),
("NOV", "November"),
("DEC", "December"),
("WIN", "Winter"),
("SPR", "Spring"),
("SUM", "Summer"),
("AUT", "Autumn"),
("ANN", "Annual"),
)
measurement_type = models.CharField(
max_length=255, choices=MEASUREMENT_TYPES, default="max_temp",
)
location = models.CharField(
max_length=2, choices=LOCATIONS, default="EN",
)
year = models.IntegerField(
default=0,
validators=[MaxValueValidator(9999), MinValueValidator(0)],
)
month_or_season = models.CharField(
max_length=3, choices=MONTH_OR_SEASON, default="ANN",
)
value = models.FloatField(
default=0,
validators=[MinValueValidator(0)],
)
def __str__(self):
return "{0} in {1} - {2}, {3}".format(
self.measurement_type, self.location,
self.month_or_season, self.year,
)
Now when you loop over each row in your dataframe, you get the ATTRIBUTE
value (you can cast it to .lower()
to be in-line with the MEASUREMENT_TYPES
choices defined in your models.py
) and the LOCATION
, and for each value in JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC WIN SPR SUM AUT ANN
you create a new Measurement
with that attribute, location and month/season and the value from that column.
To create a new Measurement
object you can just do:
for row in my_data_frame:
attribute = row['ATTRIBUTE'] # (or whatever)
location = row['LOCATION']
...
for month_or_season in MONTH_AND_SEASONS:
value = row[month_or_season]
....
Measurement(
measurement_type=attribute,
location=location,
value=value,
...
).save() # Saves model instance to database
Upvotes: 2