mahrab_lib
mahrab_lib

Reputation: 1

Reading a csv file in Django (Python)

I'm trying to figure out how to read from a csv file in a django app. One row of the csv file has data that needs to be parcelled out into different tables/models. But I also need to perform checks on that data to see if it's already in the database.

The app is a continuation of the local library tutorial on the Mozilla website. I'm trying to update the library database by uploading data from a csv file, where each line is a record from a book.

It kinda works, but not without errors. The main struggle is understanding how to iterate through the file correctly. Do I need to be using an input stream, if so am I doing that correctly, or can I just open the file some other way? I've tried that, but had issues with decoding.

The view function I've tried to write (cobbled together from other tutorials, videos and stackexchange posts):

import csv, io

from django.contrib.auth.decorators import permission_required

@permission_required('admin.can_add_log_entry')
def book_upload(request):
    template = "catalog/book_upload.html"

    prompt = {
        'order': 'Order of the CSV should be "authors", "last name", yada yada'
    }

    if request.method == "GET":
        return render(request, template, prompt)

    csv_file = request.FILES['file']
    data_set = csv_file.read().decode('UTF-8')
    io_string = io.StringIO(data_set)

    # skip the header
    next(io_string)

    for row in csv.reader(io_string, delimiter=','):

        # convert the row into a dictionary
        row_dict = row_to_dict(row)

        # check to see if book is in the database
        book = book_is_in_db(row_dict['title'])
        if book:
            add_book_instance(book)
        else:
            # get the author from the db, or create them
            try:
                author = Author.objects.get(first_name=row_dict['first_name'], last_name=row_dict['last_name'])
            except Author.DoesNotExist:
                author = Author.objects.create(first_name=row_dict['first_name'], last_name=row_dict['last_name'])

            # create the book object and save it to the db
            _, created = Book.objects.create(
                title=row_dict['title'],
                author=author,
                summary=row_dict['summary'],
                #genre=genre,
                #language=language,
                isbn=row_dict['isbn'],
                )

            add_book_instance(created)

    context = {}
    return render(request, template, context)


def row_to_dict(row):
    """Returns the given row in a dict format"""
    # Here's how the row list looks like:
    # ['full name', 'first name', 'last name' 'title',
    #  'summary', 'genre', 'language', 'isbn']
    return {'first_name': row[1], 'last_name': row[2], 
        'title': row[3], 'summary': row[4], 'genre': row[5],
        'language': row[6], 'isbn': row[7],
        }

def book_is_in_db(title):
    """Check the book is in db. 
    If yes, return the book, else return None
    """
    try:
        return Book.objects.get(title=title)
    except Book.DoesNotExist:
        return None

def add_book_instance(book):
    """Add a new instance of the book"""
    BookInstance.objects.create(book=book, status='a')

The models:

class Genre(models.Model):
    """Model representing a book genre."""
    name = models.CharField(max_length=200, help_text='Enter a book genre (e.g. Science Fiction)')

    def __str__(self):
        """String for representing the Model object."""
        return self.name


class Book(models.Model):
    """Model representing a book (but not a specific copy of a book)."""
    title = models.CharField(max_length=200)
    author = models.ForeignKey('Author', on_delete=models.SET_NULL, null=True)
    summary = models.TextField(max_length=1000, help_text='Enter a brief description of the book')
    isbn = models.CharField('ISBN', max_length=13, help_text='13 Character <a href="https://www.isbn-international.org/content/what-isbn">ISBN number</a>')
    genre = models.ManyToManyField(Genre, help_text='Select a genre for this book', blank=True)
    language = models.ForeignKey('Language', on_delete=models.SET_NULL, null=True, blank=True)

    def __str__(self):
        """String for representing the Model object."""
        return self.title

    def get_absolute_url(self):
        """Returns the url to access a detail record for this book."""
        return reverse('book-detail', args=[str(self.id)])

    def display_genre(self):
        """Figure this out later"""
        return ', '.join(genre.name for genre in self.genre.all()[:])

    display_genre.short_descriptions = 'Genre'


class BookInstance(models.Model):
    """Model representing a specific copy of a book (i.e. that can be borrowed from the library)."""
    id = models.UUIDField(primary_key=True, default=uuid.uuid4, help_text='Unique ID for this particular book across whole library')
    book = models.ForeignKey('Book', on_delete=models.SET_NULL, null=True) 
    imprint = models.CharField(max_length=200, blank=True)
    due_back = models.DateField(null=True, blank=True)

    LOAN_STATUS = (
        ('m', 'Maintenance'),
        ('o', 'On loan'),
        ('a', 'Available'),
        ('r', 'Reserved'),
    )

    status = models.CharField(
        max_length=1,
        choices=LOAN_STATUS,
        blank=True,
        default='m',
        help_text='Book availability',
    )

    #borrowing/user stuff
    borrower = models.ForeignKey(User, on_delete=models.SET_NULL, null=True, blank=True)

    # a property definition for overdue stuff
    @property
    def is_overdue(self):
        if self.due_back and date.today() > self.due_back:
            return True
        return False

    class Meta:
        ordering = ['due_back']
        permissions = (("can_mark_returned", "Set book as returned"),)

    def __str__(self):
        """String for representing the Model object."""
        return f'{self.id} ({self.book.title})'


class Author(models.Model):
    """Model representing an author."""
    first_name = models.CharField(max_length=100)
    last_name = models.CharField(max_length=100)
    date_of_birth = models.DateField(null=True, blank=True)
    date_of_death = models.DateField('died', null=True, blank=True)

    class Meta:
        ordering = ['last_name', 'first_name']

    def get_absolute_url(self):
        """Returns the url to access a particular author instance."""
        return reverse('author-detail', args=[str(self.id)])

    def __str__(self):
        """String for representing the Model object."""
        return f'{self.last_name}, {self.first_name}'


class Language(models.Model):
    """Model representing a Language."""
    name = models.CharField(max_length=200,
        help_text="Enter the book's natural language (e.g. English, French, Japanese etc.)")

    def __str__(self):
        """String for representing the Model object."""
        return self.name

The csv file:

fullname,first_name,last_name,title,summary,genre,language,isbn
Alex Garland,Alex,Garland,The Beach,A book about the beach,Coming-of-age,English,1234567890123
Simon Singh,Simon,Singh,Fermat's Last Theorem,Maths-y book,Maths,English,3210987654321

Latest error: TypeError at /catalog/upload/ cannot unpack non-iterable Book object, with info:

205. _, created = Book.objects.create(

▼ Local vars
Variable    Value
author  

<Author: Singh, Simon>

book    

None

csv_file    

<InMemoryUploadedFile: test.csv (application/vnd.ms-excel)>

data_set    

('fullname,first_name,last_name,title,summary,genre,language,isbn\r\n'
 'Alex Garland,Alex,Garland,The Beach,A book about the '
 'beach,Coming-of-age,English,1234567890123\r\n'
 "Simon Singh,Simon,Singh,Fermat's Last Theorem,Maths-y "
 'book,Maths,English,3210987654321')

io_string   

<_io.StringIO object at 0x044F2028>

prompt  

{'order': 'Order of the CSV should be "authors", last_name, yada yada'}

request     

<WSGIRequest: POST '/catalog/upload/'>

row     

['Simon Singh',
 'Simon',
 'Singh',
 "Fermat's Last Theorem",
 'Maths-y book',
 'Maths',
 'English',
 '3210987654321']

row_dict    

{'first_name': 'Simon',
 'genre': 'Maths',
 'isbn': '3210987654321',
 'language': 'English',
 'last_name': 'Singh',
 'summary': 'Maths-y book',
 'title': "Fermat's Last Theorem"}

template    

'catalog/book_upload.html'

Also, I'm not sure what the -, created = part is all about. Is it a tuple and takes care of saving it to the database somehow?

Upvotes: 0

Views: 4772

Answers (1)

hoopscootch234
hoopscootch234

Reputation: 11

Same user here (forgot my original login details). Figured out how to do this, so thought I'd leave an answer in the unlikely event that anyone stumbles across this question again.

To import the file I used a "management command" instead of how I originally tried to do it. By management command, I mean at the terminal you put in something like:

python manage.py import_books_from_csv data.csv

as opposed to creating a page where you select the file and click a submit button. I guess it could still be done that way, but I'm happy with the management command way.

The file structure that needs to be created is something like: file structure

So, you create a management folder, and within that a commands folder. Inside both folders you have empty/blank __init__.py files (to make them "python packages"). Then inside the commands folder you create a py file giving it the name of the command, i.e. import_books_from_csv.py.

Inside this file you put in something like the following code (I've changed some of the info in the csv file, so for example I've got pages, publish_date and omitted stuff like genre, language):

from django.core.management.base import BaseCommand
import csv
from collections import Counter
from books.models import Book, BookInstance2
from authors.models import Author
from datetime import datetime

class Command(BaseCommand):
    help = "Import books"
    def add_arguments(self, parser):
        parser.add_argument("csvfile", type=open)
    def handle(self, *args, **options):
        self.stdout.write("Importing books")
        c = Counter()
        reader = csv.DictReader(options.pop("csvfile"))
        for row in reader:
            author, created_author = Author.objects.get_or_create(
                first_name=row["first_name"], last_name=row["last_name"]
            )
            c["authors"] += 1
            if created_author:
                c["authors_created"] += 1
                author.save()

            book, created_book = Book.objects.get_or_create(
                title=row["title"], author=author
                )
            c["books"] += 1
            if created_book:
                book.summary = row["description"]
                book.publish_date = datetime.strptime(row["publish_date"], "%Y-%m-%d").date()
                book.isbn = row["isbn"]
                c["books_created"] += 1
                book.save()

            new_instance = BookInstance2.objects.create(
                book=book, publisher=row["publisher"], pages=row["pages"]
            )

            c["instances_created"] += 1
            new_instance.save()
        self.stdout.write(
            "Books processed=%d (created=%d)"
            % (c["books"], c["books_created"])
        )
        self.stdout.write(
            "Authors processed=%d (created=%d)"
            % (c["authors"], c["authors_created"])
        )
        self.stdout.write("Instances created=%d" % c["instances_created"])

The add_arguments function takes in the file path (which at the terminal would be the last part of the command, e.g. data.csv, if the file is in the root directory).

The self.stdout.write() stuff is just for printing out useful info, which has been retrieved by using Counter.

The main improvement is using DictReader. So if in the csv file, the first row has all the column headings, then you can use those headings to assign the corresponding info where you want for each row.

The other improvement is using get_or_create(), which either gets or creates a new object, but also returns a boolean (e.g. "created_book", "created_author", etc). Then you can use this to add the info to the object if it's a new one, otherwise leave it alone (alternatively you can use this to update an existing object).

I'm not gonna pretend I understand all of that, I don't know what the options thing is for example, and maybe I've misunderstood some of how it works. But I'm pretty happy I got it working in the end. I got all this from a Django textbook by the way, which is probably where I should have looked in the first place.

Upvotes: 1

Related Questions