GrantU
GrantU

Reputation: 6555

Django slow at creating objects?

How long should this take to run?

 query = Contact.objects.filter(contact_owner=batch.user, subscribed=True)
 objs = [
        Message(
            recipient_number=e.mobile,
            content=content,
            sender=e.contact_owner,
            billee=user,
            sender_name=sender,
        )
        for e in query

Just this code nothing else (not saving to db I'm only running above i.e. create the objects). It takes 15 mins for 5000 messages objects to be created in the query. Is this right? Why is Django so slow?

This is the model it creates, again I'm not saving here. I think there ahs to be an issue in the model when an object is created, that or Django is just too slow for my needs.

Model message

from django.db import models
from django.contrib.contenttypes import generic
from django.utils.translation import ugettext as _
from django.conf import settings
from django.db.models.signals import post_save, pre_save
from django.dispatch import receiver


import uuidfield.fields
import picklefield
import jsonfield

if 'timezones' in settings.INSTALLED_APPS:
    from timezones.utils import adjust_datetime_to_timezone
else:
    def adjust_datetime_to_timezone(a, b, c):
        return a

from gateway import Gateway



class MessageManager(models.Manager):
    def get_matching_message(self, datadict):
        for gateway in Gateway.objects.all():
            try:
                return Message.objects.get(
                    gateway_message_id=datadict.get(gateway.status_msg_id),
                    gateway=gateway,
                )
            except Message.DoesNotExist:
                pass

    def get_original_for_reply(self, datadict):
        for gateway in Gateway.objects.all():
            try:
                return Message.objects.get(
                    uuid=datadict.get(gateway.uuid_keyword),
                    gateway=gateway
                )
            except Message.DoesNotExist:
                pass
        # This may have been a message sent from another phone, but
        # there may be a reply-code that was added in.
        return self.custom_reply_matcher(datadict)

    def custom_reply_matcher(self, datadict):
        # Designed to be overridden.
        return None

    def get_last_rate_for(self, recipient_number):
        m = Message.objects.filter(recipient_number=recipient_number).exclude(
            gateway_charge=None).order_by('-send_date')[0]
        return m.gateway_charge / m.length

    def get_message(self, gateway_message_id):
        try:

            return Message.objects.get(gateway_message_id=gateway_message_id,)

        except Message.DoesNotExist:
            pass


    MESSAGE_STATUSES = (
        ('Unsent', 'Unsent'),
        ('Sent', 'Sent'),
        ('Delivered', 'Delivered'),
        ('Failed', 'Failed'),
    )
    class Message(models.Model):
        """
        A Message.

        We have a uuid, which is our reference. We also have a gateway_message_id,
        which is their reference.  This is required by some systems so we can 
        pass in a unique value that will allow us to match up replies to original
        messages.
        """

        content = models.TextField(help_text=_(u'The body of the message.'))
        recipient_number = models.CharField(max_length=32,
            help_text=_(u'The international number of the recipient'
                         ', without the leading +'))

        sender = models.ForeignKey('auth.User', related_name='sent_sms_messages')

        sender_name = models.CharField(max_length=11)

        send_date = models.DateTimeField(null=True, blank=True, editable=False)
        delivery_date = models.DateTimeField(null=True, blank=True, editable=False,
                                             help_text="The date the message was sent.")
        uuid = uuidfield.fields.UUIDField(auto=True,
            help_text=_(u'Used for associating replies.'))

        status = models.CharField(max_length=16, choices=MESSAGE_STATUSES,
            default="Unsent",
        )
        status_message = models.CharField(max_length=128, null=True, blank=True)
        billed = models.BooleanField(default=False)

        content_type = models.ForeignKey('contenttypes.ContentType')
        object_id = models.PositiveIntegerField()
        billee = generic.GenericForeignKey()

        gateway = models.ForeignKey('sms.Gateway', 
            null=True, blank=True, editable=False)
        gateway_message_id = models.CharField(max_length=128, 
            blank=True, null=True, editable=False)

        reply_callback = picklefield.PickledObjectField(null=True, blank=True)

        gateway_charge = models.DecimalField(max_digits=10, decimal_places=5,
            null=True, blank=True)

        charge = models.DecimalField(max_digits=10, decimal_places=5,
                                             null=True, blank=True)

        objects = MessageManager()


        class Meta:
            app_label = 'sms'
            permissions = (
                ('view_message', 'Can view message'),
            )
            ordering = ('send_date',)

        def send(self, gateway):
            gateway.send(self)

        @property
        def length(self):
            """Unicode messages are limited to 70 chars/message segment."""
            # try:
            #     return len(str(self.content)) / 160 + 1
            # except UnicodeEncodeError:
            #     return len(self.content) / 70 + 1
            return len(self.content) / 160 + 1

        @property
        def local_send_time(self):
            # TODO: Get this from UserProfile?
            if getattr(self.billee, 'timezone', None):
                return adjust_datetime_to_timezone(
                    self.send_date,
                    settings.TIME_ZONE,
                    self.billee.timezone
                )
            return self.send_date

        @property
        def local_send_date(self):
            return self.local_send_time.date()

        def __unicode__(self):
            return "[%s] Sent to %s by %s at %s [%i]" % (
                self.status,
                self.recipient_number,
                self.sender,
                self.send_date,
                self.length
            )


    @receiver(pre_save, sender=Message)
    def my_handler(sender, **kwargs):
        instance = kwargs['instance']
        if not instance.charge:
            instance.charge = instance.length
            # No need to save, as we're slipping the value in
            # before we hit the database.

contact model

import os
import datetime
from uuid import uuid4
from datetime import date
from django.db import models
from django.core.urlresolvers import reverse
from django.contrib.auth.models import User
from django.utils.translation import ugettext as _
from django.utils import timezone
from django.db.models.signals import pre_delete
from django.dispatch.dispatcher import receiver

from adaptor.fields import *
from adaptor.model import CsvModel

def path_and_rename(path):
    """
   Callable function for renaming the file being uploaded.

    """
    def wrapper(instance, filename):
        ext = filename.split('.')[-1]
        # get filename
        if instance.pk:
            filename = '{}.{}'.format(instance.pk, ext)
        else:
            # set filename as random string
            filename = '{}.{}'.format(uuid4().hex, ext)
            # return the whole path to the file
        return os.path.join(path, filename)

    return wrapper


class GroupManager(models.Manager):
    def for_user(self, user):
        return self.get_query_set().filter(user=user, )


class Group(models.Model):
    """
     Stores all groups.
    """
    name = models.CharField(max_length=60)
    modified = models.DateTimeField(null=True, auto_now=True, help_text="Shows when object was modified.")
    created = models.DateTimeField(auto_now_add=True, help_text="Shows when object was created.")

    #FK
    user = models.ForeignKey(User, related_name="user")

    objects = GroupManager()

    def __unicode__(self):
        return self.name

    def get_absolute_url(self):
        return reverse('contacts.views.group', args=[str(self.id)])

    def get_delete_url(self):
        return reverse('contacts.views.group_delete_confirm', args=[str(self.id)])


class ContactManager(models.Manager):
    """
    Custom Manager for keyword.
    """
    def unsorted_contacts(self, user):
        """
        Manager that will list all records for a user where group is 'None'.
        """
        return self.get_query_set().filter(contact_owner=user, group=None)

    def for_user_and_group(self, user, group):
        """
        Manager that will list all records for a user where group is 'group'.
        """
        return self.get_query_set().filter(contact_owner=user, group=group)


    def for_user(self, user):
        """
        Manager that will list all records for a user they own.
        """
        return self.get_query_set().filter(contact_owner=user)




class Contact(models.Model):
    """
    Stores all contacts.
    """
    first_name = models.CharField(max_length=60, blank=True)
    last_name = models.CharField(max_length=60, blank=True)
    company = models.CharField(max_length=100, blank=True)
    mobile = models.CharField(max_length=15)
    email = models.EmailField(max_length=100, blank=True)
    subscribed = models.NullBooleanField(default=1, help_text="Shows if contact is unsubscribed to SMS/Email.")
    modified = models.DateTimeField(null=True, auto_now=True, help_text="Shows when object was modified.")
    created = models.DateTimeField(auto_now_add=True, help_text="Shows when object was created.")

    objects = ContactManager()

    #FK
    group = models.ForeignKey(Group, related_name='contacts', blank=True, null=True)
    contact_owner = models.ForeignKey(User)

    def __unicode__(self):
        return self.first_name

    def full_name(self):
        return "%s %s" % (self.first_name, self.last_name)

    def get_delete_url(self):
        return reverse('contacts.views.contact_delete', args=[str(self.id), str(self.group_id)])

    def get_group_absolute_url(self):
        return reverse('contacts.views.group', args=[str(self.group_id)])

    @property
    def user(self):
        return self.contact_owner


@receiver(pre_delete, sender=Contact)
def contact_cleanup(sender, instance, **kwargs):
    """
    Do a bit of tidying up when deleting a Contact.
    Sent at the beginning of a model's delete() method and a queryset's delete() method.
    """
    # Remove any FK's not done by cascade delete like generic relationships.
    from unsubscribe.models import Unsubscribe
    unsubscribe_list = Unsubscribe.objects.filter(object_id=instance.id, content_type__model='contact')
    unsubscribe_list.delete()





class Upload(models.Model):
    """
    Stores jobs and status uploads of file uploads for CSV import.
    """
    filepath = models.FileField(upload_to=path_and_rename('uploadsCSV'),
                                help_text="It can take several minutes for contacts to appear.")
    # Upload audit information
    uploaded_by = models.ForeignKey(User)
    date_uploaded = models.DateTimeField(auto_now_add=True)

    # Processing audit information
    PENDING, PROCESSED, FAILED = 'Pending', 'Processed', 'Failed'
    STATUSES = (
        (PENDING, _(PENDING)),
        (PROCESSED, _(PROCESSED)),
        (FAILED, _(FAILED)),
    )

    status = models.CharField(max_length=64, choices=STATUSES, default=PENDING)
    processing_description = models.TextField(blank=True, null=True)
    num_records = models.PositiveIntegerField()
    num_columns = models.PositiveIntegerField()
    date_start_processing = models.DateTimeField(null=True)
    date_end_processing = models.DateTimeField(null=True)

    #FKs
    group = models.ForeignKey(Group)


    def get_configurator_url(self):
        return reverse('contacts.views.upload_configurator', args=[str(self.id)])

    def process(self, cleaned_data):
        self.date_start_processing = timezone.now()
        try:

            group_position = self.num_columns + 1
            upload_id_position = self.num_columns + 1

            # Try and import CSV
            import_this(data=self.filepath, extra_fields=[
                {'value': self.group_id, 'position': group_position},
                {'value': self.uploaded_by.id, 'position': upload_id_position}], cleaned_data=cleaned_data)

            self._mark_processed(self.num_records)

        except Exception as e:
            self._mark_failed(unicode(e))

    def was_processing_successful(self):
        return self.status == self.PROCESSED






    def was_processing_successful(self):
        return self.status == self.PROCESSED


    def _mark_processed(self, num_records, description=None):
        self.status = self.PROCESSED
        self.date_end_processing = date.today()
        self.num_records = num_records
        self.processing_description = description
        self.save()

    def _mark_failed(self, description):
        self.status = self.FAILED
        self.processing_description = description
        self.save()


def import_this(cleaned_data, *args, **kw):
    # make custom ContactCSVModel
    class ContactCSVModel(CsvModel):
        for k, v in cleaned_data.items():

            if not v == '':
                # print("---------------------------------")
                # print(str(v))
                # print("---")
                # print(k[3:])
                # print("---------------------------------")
                setattr(CsvModel, v, CharField(row_num=k[3:]))

        group = DjangoModelField(Group, row_num="5")
        contact_owner = DjangoModelField(User, row_num="6")


        class Meta:
            delimiter = ","
            dbModel = Contact
            update = {'keys': ["mobile", "group"]}

    return ContactCSVModel.import_data(*args, **kw)

Upvotes: 0

Views: 2315

Answers (1)

Meitham
Meitham

Reputation: 9670

If you install Django debugtoolbar you could actually see the queries being fired, and the code responsible on firing them.

This slowness is mainly because django fires a new db query every time you do e.mobile and e.contact_owner, in the loop.

To prevent these queries, prefetch the data using select_related like below

query = Contact.objects.select_related('mobile', 'contact_owner').filter(contact_owner=batch.user, subscribed=True)

If your relations are many to many, then use prefetch_related rather than select_related.

Upvotes: 1

Related Questions