Davtho1983
Davtho1983

Reputation: 3954

Transfer pandas df to db.sqlite3 using django models

I have dfs with the following column headers:

Year JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC WIN SPR SUM AUT ANN ATTRIBUTE LOCATION

Each column contains random floating point value, except for ATTRIBUTE, which contains a value from this list:

[Max_temp, Min_temp, Mean_temp, Sunshine, Rainfall]

I need to put it into the sqlite3 database that comes with django using the following models:

from django.db import models
from django.core.validators import MaxValueValidator, MinValueValidator


class Location(models.Model):
    LOCATIONS = (
        ('EN', 'England'),
        ('SC', 'Scotland'),
        ('WA', 'Wales'),
        ('UK', 'United Kingdom'),
    )
    location = models.CharField(max_length=2, choices=LOCATIONS)

    class Meta:
        verbose_name_plural = "Location"

    def __str__(self):
        return self.location

class Max_temp(models.Model):
    location = models.ForeignKey(Location, on_delete=models.CASCADE)
    year = models.IntegerField(
    default=0,
    validators=[MaxValueValidator(9999), MinValueValidator(0)]
)
    MONTH_OR_SEASON = (
        ("JAN", "January"),
        ("FEB", "February"),
        ("MAR", "March"),
        ("APR", "April"),
        ("MAY", "May"),
        ("JUN", "June"),
        ("JUL", "July"),
        ("AUG", "August"),
        ("SEP", "September"),
        ("OCT", "October"),
        ("NOV", "November"),
        ("DEC", "December"),
        ("WIN", "Winter"),
        ("SPR", "Spring"),
        ("SUM", "Summer"),
        ("AUT", "Autumn"),
        ("ANN", "Annual"),
)
    month_or_season = models.CharField(max_length=3, choices=MONTH_OR_SEASON)

    class Meta:
        verbose_name_plural = "Maximum Temperature"

    def __str__(self):
        return self.year

class Min_temp(models.Model):
    location = models.ForeignKey(Location, on_delete=models.CASCADE)
    year = models.IntegerField(
    default=0,
    validators=[MaxValueValidator(9999), MinValueValidator(0)]
)
    MONTH_OR_SEASON = (
        ("JAN", "January"),
        ("FEB", "February"),
        ("MAR", "March"),
        ("APR", "April"),
        ("MAY", "May"),
        ("JUN", "June"),
        ("JUL", "July"),
        ("AUG", "August"),
        ("SEP", "September"),
        ("OCT", "October"),
        ("NOV", "November"),
        ("DEC", "December"),
        ("WIN", "Winter"),
        ("SPR", "Spring"),
        ("SUM", "Summer"),
        ("AUT", "Autumn"),
        ("ANN", "Annual"),
)
    month_or_season = models.CharField(max_length=3, choices=MONTH_OR_SEASON)

    class Meta:
        verbose_name_plural = "Minimum Temperature"

    def __str__(self):
        return self.year

class Mean_temp(models.Model):
    location = models.ForeignKey(Location, on_delete=models.CASCADE)
    year = models.IntegerField(
    default=0,
    validators=[MaxValueValidator(9999), MinValueValidator(0)]
)
    MONTH_OR_SEASON = (
        ("JAN", "January"),
        ("FEB", "February"),
        ("MAR", "March"),
        ("APR", "April"),
        ("MAY", "May"),
        ("JUN", "June"),
        ("JUL", "July"),
        ("AUG", "August"),
        ("SEP", "September"),
        ("OCT", "October"),
        ("NOV", "November"),
        ("DEC", "December"),
        ("WIN", "Winter"),
        ("SPR", "Spring"),
        ("SUM", "Summer"),
        ("AUT", "Autumn"),
        ("ANN", "Annual"),
)
    month_or_season = models.CharField(max_length=3, choices=MONTH_OR_SEASON)

    class Meta:
        verbose_name_plural = "Mean Temperature"

    def __str__(self):
        return self.year

class Sunshine(models.Model):
    location = models.ForeignKey(Location, on_delete=models.CASCADE)
    year = models.IntegerField(
    default=0,
    validators=[MaxValueValidator(9999), MinValueValidator(0)]
)
    MONTH_OR_SEASON = (
        ("JAN", "January"),
        ("FEB", "February"),
        ("MAR", "March"),
        ("APR", "April"),
        ("MAY", "May"),
        ("JUN", "June"),
        ("JUL", "July"),
        ("AUG", "August"),
        ("SEP", "September"),
        ("OCT", "October"),
        ("NOV", "November"),
        ("DEC", "December"),
        ("WIN", "Winter"),
        ("SPR", "Spring"),
        ("SUM", "Summer"),
        ("AUT", "Autumn"),
        ("ANN", "Annual"),
)
    month_or_season = models.CharField(max_length=3, choices=MONTH_OR_SEASON)

    class Meta:
        verbose_name_plural = "Sunshine"

    def __str__(self):
        return self.year

class Rainfall(models.Model):
    location = models.ForeignKey(Location, on_delete=models.CASCADE)
    year = models.IntegerField(
    default=0,
    validators=[MaxValueValidator(9999), MinValueValidator(0)]
)
    MONTH_OR_SEASON = (
        ("JAN", "January"),
        ("FEB", "February"),
        ("MAR", "March"),
        ("APR", "April"),
        ("MAY", "May"),
        ("JUN", "June"),
        ("JUL", "July"),
        ("AUG", "August"),
        ("SEP", "September"),
        ("OCT", "October"),
        ("NOV", "November"),
        ("DEC", "December"),
        ("WIN", "Winter"),
        ("SPR", "Spring"),
        ("SUM", "Summer"),
        ("AUT", "Autumn"),
        ("ANN", "Annual"),
)
    month_or_season = models.CharField(max_length=3, choices=MONTH_OR_SEASON)

    class Meta:
        verbose_name_plural = "Rainfall"

    def __str__(self):
        return self.year

I'm a bit stumped on how to do this - I'm not sure if my model structure is right? And is there a way to structure the models so I don't have to repeat code? I need a database structured like so:

Location - Max_temp
- Min_temp
- Mean_temp
- Sunshine
- Rainfall

So each location has a Max_temp table, a Min_temp table etc...

The main thing tho, is how do I tell Django to put the pandas df into the database and where in the Django project do I put that code?

Any help appreciated!

Upvotes: 1

Views: 889

Answers (1)

ptr
ptr

Reputation: 3384

You were right to question your model structure :) Typically when you have that much code duplication there is an easier way. Key here is the fact that your measurements all have the same schema. Each model you define represents a database table, and we really don't need to create a new database table for each measurement type- we can just store the type as a column in a "Measurement" table.

I also question whether you need the separate table for Location as all it contains is the verbose name for the country code- you may as well use a ChoiceField (if your usecase is more advanced than this then feel free to keep the location model separate).

from django.db import models
from django.core.validators import MaxValueValidator, MinValueValidator


class Measurement(models.Model):
    MEASUREMENT_TYPES = (
        ("max_temp", "Maximum Temperature"),
        ("min_temp", "Minimum Temperature"),
        ("mean_temp", "Mean Temperature"),
        ("sunshine", "Sunshine"),
        ("rainfall", "Rainfall"),
    )

    LOCATIONS = (
        ('EN', 'England'),
        ('SC', 'Scotland'),
        ('WA', 'Wales'),
        ('UK', 'United Kingdom'),
    )

    MONTH_OR_SEASON = (
        ("JAN", "January"),
        ("FEB", "February"),
        ("MAR", "March"),
        ("APR", "April"),
        ("MAY", "May"),
        ("JUN", "June"),
        ("JUL", "July"),
        ("AUG", "August"),
        ("SEP", "September"),
        ("OCT", "October"),
        ("NOV", "November"),
        ("DEC", "December"),
        ("WIN", "Winter"),
        ("SPR", "Spring"),
        ("SUM", "Summer"),
        ("AUT", "Autumn"),
        ("ANN", "Annual"),
    )

    measurement_type = models.CharField(
        max_length=255, choices=MEASUREMENT_TYPES, default="max_temp",
    )
    location = models.CharField(
        max_length=2, choices=LOCATIONS, default="EN",
    )
    year = models.IntegerField(
        default=0,
        validators=[MaxValueValidator(9999), MinValueValidator(0)],
    )
    month_or_season = models.CharField(
        max_length=3, choices=MONTH_OR_SEASON, default="ANN",
    )
    value = models.FloatField(
        default=0,
        validators=[MinValueValidator(0)],
    )

    def __str__(self):
        return "{0} in {1} - {2}, {3}".format(
            self.measurement_type, self.location,
            self.month_or_season, self.year, 
        )

Now when you loop over each row in your dataframe, you get the ATTRIBUTE value (you can cast it to .lower() to be in-line with the MEASUREMENT_TYPES choices defined in your models.py) and the LOCATION, and for each value in JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC WIN SPR SUM AUT ANN you create a new Measurement with that attribute, location and month/season and the value from that column.

To create a new Measurement object you can just do:

for row in my_data_frame:
    attribute = row['ATTRIBUTE']  # (or whatever)
    location = row['LOCATION']
    ...
    for month_or_season in MONTH_AND_SEASONS:
        value = row[month_or_season]
        ....
        Measurement(
            measurement_type=attribute,
            location=location,
            value=value,
            ...
        ).save()  # Saves model instance to database

Upvotes: 2

Related Questions