AlexW
AlexW

Reputation: 2589

Django queryset not returning distinct values

I have a query that for some reason is not returning distinct values even though I have specified distinct, I thought it may be because of the only, so I removed that, but the list is still the same

circuit_providers =  CircuitInfoData.objects.only('provider').values('provider').distinct()

I just want a list of unqiue providers

model.py

from __future__ import unicode_literals
from django.db import models
import string
import random
import time
import os

# Create your models here.
from service.models import ServiceContacts

def site_photos_path(instance, filename):
    file ,extension = os.path.splitext(filename)
    # file will be uploaded to MEDIA_ROOT/user_<id>/<filename>
    chars=string.ascii_uppercase + string.digits
    random_string = ''.join(random.choice(chars) for _ in range(6))
    filename = '%s-%s%s' % (random_string,time.strftime("%d-%m-%H-%M-%S"),extension)
    return 'site_photos/{0}'.format(filename)

def service_upload_path(instance, filename):
    file ,extension = os.path.splitext(filename)
    # file will be uploaded to MEDIA_ROOT/user_<id>/<filename>
    chars=string.ascii_uppercase + string.digits
    random_string = ''.join(random.choice(chars) for _ in range(6))
    filename = '%s-%s%s' % (random_string,time.strftime("%d-%m-%H-%M-%S"),extension)
    return 'service_files/{0}'.format(filename)    

def site_files_path(instance, filename):
    file ,extension = os.path.splitext(filename)
    # file will be uploaded to MEDIA_ROOT/user_<id>/<filename>
    chars=string.ascii_uppercase + string.digits
    random_string = ''.join(random.choice(chars) for _ in range(6))
    filename = '%s-%s%s' % (random_string,time.strftime("%d-%m-%H-%M-%S"),extension)
    return 'site_files/{0}'.format(filename)   

provider_choices = (
        ('KCOM','KCOM'),
        ('BT','BT'),
        ('EE','EE'),
        ('THREE','THREE'),
    ) 
circuit_choices = (
        ('DSL','DSL'),
        ('VDSL','VDSL'),
        ('MPLS','MPLS'),
        ('4G','4G'),
        ('Internet Leased Line','Internet Leased Line'),
    )     

subnet_mask_choices = (
        ('/16','/16'),
        ('/24','/24'),
        ('/25','/25'),
        ('/26','/26'),
        ('/27','/27'),
        ('/28','/28'),
        ('/29','/29'),
        ('/30','/30'),
        ('/31','/31'),
    )       

class ShowroomConfigData(models.Model):
    location = models.CharField(max_length=50)
    subnet = models.GenericIPAddressField(protocol='IPv4')
    r1_loopback_ip = models.GenericIPAddressField(protocol='IPv4',verbose_name="R1 Loopback IP")
    r2_loopback_ip = models.GenericIPAddressField(protocol='IPv4',verbose_name="R2 Loopback IP")  
    opening_date = models.DateField(verbose_name="Showroom opening date")
    last_hw_refresh_date = models.DateField(verbose_name="Date of latest hardware refresh")
    is_showroom = models.BooleanField(default=True,verbose_name="Is this site a showroom?")

    class Meta:
            verbose_name = "Showroom Data"
            verbose_name_plural = "Showroom Data"  
            ordering = ('location',)

    def __unicode__(self):
        return self.location   


class MajorSiteInfoData(models.Model):
    location = models.CharField(max_length=200)
    major_subnet = models.GenericIPAddressField(protocol='IPv4',verbose_name="Major Site Subnet")
    routed_subnet = models.GenericIPAddressField(protocol='IPv4',verbose_name="Routed Link Subnet")
    bgp_as = models.CharField(max_length=6,verbose_name="BGP AS Number")

    class Meta:
            verbose_name = "Major Site Data"
            verbose_name_plural = "Major Site Data"      

    def __unicode__(self):
        return self.location

class CircuitInfoData(models.Model):    
    showroom_config_data = models.ForeignKey(ShowroomConfigData,verbose_name="Install Showroom")
    major_site_info = models.ForeignKey(MajorSiteInfoData,verbose_name="Install Site") 
    circuit_type = models.CharField(max_length=100,choices=circuit_choices)    
    circuit_speed = models.IntegerField(blank=True)
    circuit_bearer = models.IntegerField(blank=True)
    provider = models.CharField(max_length=200,choices=provider_choices)
    ref_no = models.CharField(max_length=200,verbose_name="Reference No")

    class Meta:
        verbose_name = "Circuit Data"
        verbose_name_plural = "Circuit Data"
        ordering = ('showroom_config_data__location','circuit_speed')

    def __unicode__(self):
        return '%s | %s | %s | %s | %s' % (self.showroom_config_data.location,self.major_site_info.location, self.provider, self.service_type, self.ref_no)

results from shell below

[root@network-tools infternal]# python manage.py shell
Python 2.7.5 (default, Nov 20 2015, 02:00:19)
[GCC 4.8.5 20150623 (Red Hat 4.8.5-4)] on linux2
Type "help", "copyright", "credits" or "license" for more information.
(InteractiveConsole)
>>> from networks.models import CircuitInfoData
>>> d = CircuitInfoData.objects.values('provider').distinct()
>>> for item in d:
...  print item
...
{'provider': u'BT'}
{'provider': u'BT'}
{'provider': u'KCOM'}
{'provider': u'BT'}
{'provider': u'BT'}
{'provider': u'KCOM'}
.....
>>> print d.query
SELECT DISTINCT "networks_circuitinfodata"."provider", "networks_showroomconfigdata"."location", "networks_circuitinfodata"."circuit_speed" FROM "networks_circuitinfodata" INNER JOIN "networks_showroomconfigdata" ON ("networks_circuitinfodata"."showroom_config_data_id" = "networks_showroomconfigdata"."id") ORDER BY "networks_showroomconfigdata"."location" ASC, "networks_circuitinfodata"."circuit_speed" ASC
>>>

one thing ive noticed is that when i print items in shell as above

#### with def __unicode__(self): #####

>>> from networks.models import CircuitInfoData
>>> d = CircuitInfoData.objects.only('provider').distinct()
>>> for i in d:
...  print i
...
Location1 | Showroom | BT | DSL | N/A
Location2 | Showroom | BT | MPLS | XXXX
Location2 | Showroom | KCOM | MPLS | XXXX
Location3 | Showroom | BT | MPLS | XXXX
Location3 | Showroom | BT | DSL | N/A
Location4 | Showroom | KCOM | MPLS | XXXXX
...

#### with out def __unicode__(self): #####

>>> from networks.models import CircuitInfoData
>>> d = CircuitInfoData.objects.only('provider').distinct()
>>> for i in d:
...  print i
...
CircuitInfoData_Deferred_circuit_bearer_circuit_cfb3d62ef325a6acfc8ddcb43c8ae1c6 object
CircuitInfoData_Deferred_circuit_bearer_circuit_cfb3d62ef325a6acfc8ddcb43c8ae1c6 object
CircuitInfoData_Deferred_circuit_bearer_circuit_cfb3d62ef325a6acfc8ddcb43c8ae1c6 object
CircuitInfoData_Deferred_circuit_bearer_circuit_cfb3d62ef325a6acfc8ddcb43c8ae1c6 object
CircuitInfoData_Deferred_circuit_bearer_circuit_cfb3d62ef325a6acfc8ddcb43c8ae1c6 object
CircuitInfoData_Deferred_circuit_bearer_circuit_cfb3d62ef325a6acfc8ddcb43c8ae1c6 object
...

#### with either ####

>>> for i in d:
...  print i.provider
...
BT
BT
KCOM
BT
BT
KCOM
...

Upvotes: 1

Views: 6817

Answers (2)

CodeRed
CodeRed

Reputation: 1410

For SQLite, the solution is to use set(). Check the example below.

queryset = set(CircuitInfoData.objects.values_list('provider', flat=True).filter(provider__icontains='keyword'))

You can also do:

queryset = set(CircuitInfoData.objects.values_list('provider', flat=True))

Upvotes: 0

e4c5
e4c5

Reputation: 53734

The documentation for distinct says

Returns a new QuerySet that uses SELECT DISTINCT in its SQL query. This eliminates duplicate rows from the query results.

By default, a QuerySet will not eliminate duplicate rows. In practice, this is rarely a problem, because simple queries such as Blog.objects.all() don’t introduce the possibility of duplicate result rows.

Distinct gives you distinct rows but you are looking at only one of the fields in the record and in that field items can be duplicated unless it has a unique constraint on it. And in this case you don't.

If you happen to be using postgresql you can do

CircuitInfoData.objects.distinct('provider')

to achieve your objective.

UPDATE: Since you mentioned in the comments that you use sqlite, use this solution.

CircuitInfoData.objects.values('provider').distinct()

This will work because now each row has only one column. the resulting query will be similar to

SELECT DISTINCT "someapp_circuitinfodata"."name" FROM "someapp_circuitinfodata"

UPDATE 2:

Notice that you have overridden the __unicode__ function.

def __unicode__(self):
     return '%s | %s | %s | %s | %s' % 
       (self.showroom_config_data.location,self.major_site_info.location, 
       self.provider, self.service_type, self.ref_no)

You are referring to the fields in the related model. This is going to be very costly (unless you use select_related). Also note that if you iterate through a queryset and use print for debug purposes it will give you misleading results (since what you are seeing is the output of __unicode__, a rather complex function)

Upvotes: 6

Related Questions