Reputation: 1
I'm trying to figure out how to read from a csv file in a django app. One row of the csv file has data that needs to be parcelled out into different tables/models. But I also need to perform checks on that data to see if it's already in the database.
The app is a continuation of the local library tutorial on the Mozilla website. I'm trying to update the library database by uploading data from a csv file, where each line is a record from a book.
It kinda works, but not without errors. The main struggle is understanding how to iterate through the file correctly. Do I need to be using an input stream, if so am I doing that correctly, or can I just open the file some other way? I've tried that, but had issues with decoding.
The view function I've tried to write (cobbled together from other tutorials, videos and stackexchange posts):
import csv, io
from django.contrib.auth.decorators import permission_required
@permission_required('admin.can_add_log_entry')
def book_upload(request):
template = "catalog/book_upload.html"
prompt = {
'order': 'Order of the CSV should be "authors", "last name", yada yada'
}
if request.method == "GET":
return render(request, template, prompt)
csv_file = request.FILES['file']
data_set = csv_file.read().decode('UTF-8')
io_string = io.StringIO(data_set)
# skip the header
next(io_string)
for row in csv.reader(io_string, delimiter=','):
# convert the row into a dictionary
row_dict = row_to_dict(row)
# check to see if book is in the database
book = book_is_in_db(row_dict['title'])
if book:
add_book_instance(book)
else:
# get the author from the db, or create them
try:
author = Author.objects.get(first_name=row_dict['first_name'], last_name=row_dict['last_name'])
except Author.DoesNotExist:
author = Author.objects.create(first_name=row_dict['first_name'], last_name=row_dict['last_name'])
# create the book object and save it to the db
_, created = Book.objects.create(
title=row_dict['title'],
author=author,
summary=row_dict['summary'],
#genre=genre,
#language=language,
isbn=row_dict['isbn'],
)
add_book_instance(created)
context = {}
return render(request, template, context)
def row_to_dict(row):
"""Returns the given row in a dict format"""
# Here's how the row list looks like:
# ['full name', 'first name', 'last name' 'title',
# 'summary', 'genre', 'language', 'isbn']
return {'first_name': row[1], 'last_name': row[2],
'title': row[3], 'summary': row[4], 'genre': row[5],
'language': row[6], 'isbn': row[7],
}
def book_is_in_db(title):
"""Check the book is in db.
If yes, return the book, else return None
"""
try:
return Book.objects.get(title=title)
except Book.DoesNotExist:
return None
def add_book_instance(book):
"""Add a new instance of the book"""
BookInstance.objects.create(book=book, status='a')
The models:
class Genre(models.Model):
"""Model representing a book genre."""
name = models.CharField(max_length=200, help_text='Enter a book genre (e.g. Science Fiction)')
def __str__(self):
"""String for representing the Model object."""
return self.name
class Book(models.Model):
"""Model representing a book (but not a specific copy of a book)."""
title = models.CharField(max_length=200)
author = models.ForeignKey('Author', on_delete=models.SET_NULL, null=True)
summary = models.TextField(max_length=1000, help_text='Enter a brief description of the book')
isbn = models.CharField('ISBN', max_length=13, help_text='13 Character <a href="https://www.isbn-international.org/content/what-isbn">ISBN number</a>')
genre = models.ManyToManyField(Genre, help_text='Select a genre for this book', blank=True)
language = models.ForeignKey('Language', on_delete=models.SET_NULL, null=True, blank=True)
def __str__(self):
"""String for representing the Model object."""
return self.title
def get_absolute_url(self):
"""Returns the url to access a detail record for this book."""
return reverse('book-detail', args=[str(self.id)])
def display_genre(self):
"""Figure this out later"""
return ', '.join(genre.name for genre in self.genre.all()[:])
display_genre.short_descriptions = 'Genre'
class BookInstance(models.Model):
"""Model representing a specific copy of a book (i.e. that can be borrowed from the library)."""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, help_text='Unique ID for this particular book across whole library')
book = models.ForeignKey('Book', on_delete=models.SET_NULL, null=True)
imprint = models.CharField(max_length=200, blank=True)
due_back = models.DateField(null=True, blank=True)
LOAN_STATUS = (
('m', 'Maintenance'),
('o', 'On loan'),
('a', 'Available'),
('r', 'Reserved'),
)
status = models.CharField(
max_length=1,
choices=LOAN_STATUS,
blank=True,
default='m',
help_text='Book availability',
)
#borrowing/user stuff
borrower = models.ForeignKey(User, on_delete=models.SET_NULL, null=True, blank=True)
# a property definition for overdue stuff
@property
def is_overdue(self):
if self.due_back and date.today() > self.due_back:
return True
return False
class Meta:
ordering = ['due_back']
permissions = (("can_mark_returned", "Set book as returned"),)
def __str__(self):
"""String for representing the Model object."""
return f'{self.id} ({self.book.title})'
class Author(models.Model):
"""Model representing an author."""
first_name = models.CharField(max_length=100)
last_name = models.CharField(max_length=100)
date_of_birth = models.DateField(null=True, blank=True)
date_of_death = models.DateField('died', null=True, blank=True)
class Meta:
ordering = ['last_name', 'first_name']
def get_absolute_url(self):
"""Returns the url to access a particular author instance."""
return reverse('author-detail', args=[str(self.id)])
def __str__(self):
"""String for representing the Model object."""
return f'{self.last_name}, {self.first_name}'
class Language(models.Model):
"""Model representing a Language."""
name = models.CharField(max_length=200,
help_text="Enter the book's natural language (e.g. English, French, Japanese etc.)")
def __str__(self):
"""String for representing the Model object."""
return self.name
The csv file:
fullname,first_name,last_name,title,summary,genre,language,isbn
Alex Garland,Alex,Garland,The Beach,A book about the beach,Coming-of-age,English,1234567890123
Simon Singh,Simon,Singh,Fermat's Last Theorem,Maths-y book,Maths,English,3210987654321
Latest error: TypeError at /catalog/upload/ cannot unpack non-iterable Book object, with info:
205. _, created = Book.objects.create(
▼ Local vars
Variable Value
author
<Author: Singh, Simon>
book
None
csv_file
<InMemoryUploadedFile: test.csv (application/vnd.ms-excel)>
data_set
('fullname,first_name,last_name,title,summary,genre,language,isbn\r\n'
'Alex Garland,Alex,Garland,The Beach,A book about the '
'beach,Coming-of-age,English,1234567890123\r\n'
"Simon Singh,Simon,Singh,Fermat's Last Theorem,Maths-y "
'book,Maths,English,3210987654321')
io_string
<_io.StringIO object at 0x044F2028>
prompt
{'order': 'Order of the CSV should be "authors", last_name, yada yada'}
request
<WSGIRequest: POST '/catalog/upload/'>
row
['Simon Singh',
'Simon',
'Singh',
"Fermat's Last Theorem",
'Maths-y book',
'Maths',
'English',
'3210987654321']
row_dict
{'first_name': 'Simon',
'genre': 'Maths',
'isbn': '3210987654321',
'language': 'English',
'last_name': 'Singh',
'summary': 'Maths-y book',
'title': "Fermat's Last Theorem"}
template
'catalog/book_upload.html'
Also, I'm not sure what the -, created =
part is all about. Is it a tuple and takes care of saving it to the database somehow?
Upvotes: 0
Views: 4772
Reputation: 11
Same user here (forgot my original login details). Figured out how to do this, so thought I'd leave an answer in the unlikely event that anyone stumbles across this question again.
To import the file I used a "management command" instead of how I originally tried to do it. By management command, I mean at the terminal you put in something like:
python manage.py import_books_from_csv data.csv
as opposed to creating a page where you select the file and click a submit button. I guess it could still be done that way, but I'm happy with the management command way.
The file structure that needs to be created is something like: file structure
So, you create a management folder, and within that a commands folder. Inside both folders you have empty/blank __init__.py
files (to make them "python packages"). Then inside the commands folder you create a py file giving it the name of the command, i.e. import_books_from_csv.py
.
Inside this file you put in something like the following code (I've changed some of the info in the csv file, so for example I've got pages, publish_date and omitted stuff like genre, language):
from django.core.management.base import BaseCommand
import csv
from collections import Counter
from books.models import Book, BookInstance2
from authors.models import Author
from datetime import datetime
class Command(BaseCommand):
help = "Import books"
def add_arguments(self, parser):
parser.add_argument("csvfile", type=open)
def handle(self, *args, **options):
self.stdout.write("Importing books")
c = Counter()
reader = csv.DictReader(options.pop("csvfile"))
for row in reader:
author, created_author = Author.objects.get_or_create(
first_name=row["first_name"], last_name=row["last_name"]
)
c["authors"] += 1
if created_author:
c["authors_created"] += 1
author.save()
book, created_book = Book.objects.get_or_create(
title=row["title"], author=author
)
c["books"] += 1
if created_book:
book.summary = row["description"]
book.publish_date = datetime.strptime(row["publish_date"], "%Y-%m-%d").date()
book.isbn = row["isbn"]
c["books_created"] += 1
book.save()
new_instance = BookInstance2.objects.create(
book=book, publisher=row["publisher"], pages=row["pages"]
)
c["instances_created"] += 1
new_instance.save()
self.stdout.write(
"Books processed=%d (created=%d)"
% (c["books"], c["books_created"])
)
self.stdout.write(
"Authors processed=%d (created=%d)"
% (c["authors"], c["authors_created"])
)
self.stdout.write("Instances created=%d" % c["instances_created"])
The add_arguments
function takes in the file path (which at the terminal would be the last part of the command, e.g. data.csv, if the file is in the root directory).
The self.stdout.write()
stuff is just for printing out useful info, which has been retrieved by using Counter
.
The main improvement is using DictReader
. So if in the csv file, the first row has all the column headings, then you can use those headings to assign the corresponding info where you want for each row.
The other improvement is using get_or_create()
, which either gets or creates a new object, but also returns a boolean (e.g. "created_book", "created_author", etc). Then you can use this to add the info to the object if it's a new one, otherwise leave it alone (alternatively you can use this to update an existing object).
I'm not gonna pretend I understand all of that, I don't know what the options thing is for example, and maybe I've misunderstood some of how it works. But I'm pretty happy I got it working in the end. I got all this from a Django textbook by the way, which is probably where I should have looked in the first place.
Upvotes: 1