bitroost
bitroost

Reputation: 31

How to create a grouped QuerySet for use in ModelAdmin.get_queryset

I want to be able to display a list of banned ip addresses grouped by ip, sorted descending by count and the latest time of ban, as an admin changelist.

fail2ban generates a sqlite-db and debian-8/fail2ban-debian-0.9.6 generates this table:

"CREATE TABLE bans(" \
    "jail TEXT NOT NULL, " \
    "ip TEXT, " \
    "timeofban INTEGER NOT NULL, " \
    "data JSON, " \
    "FOREIGN KEY(jail) REFERENCES jails(name) " \
    ");" \
    "CREATE INDEX bans_jail_timeofban_ip ON bans(jail, timeofban);" \
    "CREATE INDEX bans_jail_ip ON bans(jail, ip);" \
    "CREATE INDEX bans_ip ON bans(ip);"

the SQL i would like django to produce should return the same results as this SQL:

SELECT
    ip,
    strftime('%Y-%m-%d %H:%M:%S', timeofban, 'unixepoch', 'localtime') as latest,
    COUNT(ip) as ipcount
FROM
    bans
GROUP BY
    ip
ORDER BY
    ipcount DESC,
    timeofban DESC

So i started to set up the additional db in settings:

DATABASES = {
    ...
    'fail2ban': {
        'ENGINE': 'django.db.backends.sqlite3',
        'NAME': '/var/lib/fail2ban/fail2ban.sqlite3',
    }
}

DATABASE_ROUTERS = [
    'fail2ban.dbrouter.Fail2BanRouter'
]

Created a model:

    from django.db import models

    class Ban(models.Model):
       jail = models.CharField('jail', max_length=250)
       ip = models.CharField('ip', max_length=100)
       timeofban = models.IntegerField('timeofban', primary_key=True)
       data = models.TextField('data', blank=True, null=True)

   class Meta:
       db_table = 'bans'
       ordering = ["-timeofban"]
       managed = False

   def save(self, **kwargs):
       raise NotImplementedError()

Setup the admin:

import datetime
from django.contrib import admin
from django.db.models import Count, Max, OuterRef, Subquery

from .models import Ban

@admin.register(Ban)
class BanAdmin(admin.ModelAdmin):
    list_display = ['ip', 'jail', 'get_timeofban']

    def has_add_permission(self, request):
        return False

    def has_delete_permission(self, request, obj=None):
        return False

    def has_change_permission(self, request, obj=None):
        return False

    def has_view_permission(self, request, obj=None):
        return True

    def get_timeofban(self, instance):
        return datetime.datetime.fromtimestamp(instance.timeofban).strftime('%Y-%m-%d %H:%M:%S')
    get_timeofban.short_description = 'timeofban'
    get_timeofban.admin_order_field = 'timeofban'


    def get_queryset(self, request):

        qs = super().get_queryset(request)

        duplicates = qs.values('ip') \
                       .annotate(ipcount=Count('ip'), latest=Max('timeofban')) \
                       .order_by('-ipcount') \
                       .filter(ipcount__gt=10) \
                       .values_list('latest', flat=True)

        qs = qs.filter(timeofban__in=duplicates)

        subquery = Ban.objects.values('ip') \
                     .annotate(ipcount=Count('ip')) \
                     .filter(ip=OuterRef('ip')) \
                     .order_by('-ipcount') \
                     .values('ipcount')

        return qs.annotate(
                   ipcount=Subquery(subquery)
               ).order_by('-ipcount')

That's the only way i could come up with a QuerySet, which i need for the admin. The db has roughly 80000 rows so without filtering first there is no way this can be achieved via subquery. And i cannot believe that in django this sql is the only way to get to my desired results:

SELECT 
"bans"."jail", "bans"."ip", "bans"."timeofban", "bans"."data",
(
    SELECT COUNT(U0."ip") AS "ipcount" FROM "bans" U0 
    WHERE U0."ip" = ("bans"."ip") 
    GROUP BY U0."ip" ORDER BY "ipcount" DESC
) AS "ipcount" 
FROM "bans" 
WHERE "bans"."timeofban" IN (SELECT MAX(U0."timeofban") AS "latest" 
FROM "bans" U0 
GROUP BY U0."ip" HAVING COUNT(U0."ip") > 10) 
ORDER BY "ipcount" DESC
LIMIT 21

I know i can get a ValuesQuerySet by creating a query which almost looks like what i need:

from django import db
from .models import Ban
Ban.objects.order_by('-ipcount').values('ip').annotate(ipcount=Count('ip'))
db.connections['fail2ban'].queries[-1]['sql']
# 'SELECT "bans"."ip", COUNT("bans"."ip") AS "ipcount" FROM "bans" GROUP BY "bans"."ip" ORDER BY "ipcount" DESC  LIMIT 21'

but even here i am puzzled as to how i would get the additional field 'timeofban' out of that.

So my question is if there is something that i missed in achieving the SQL i would like django to produce as a QuerySet (not a ValueQuerySet) and a better solution than the Subquery-Overkill i came up with.

Upvotes: 1

Views: 242

Answers (1)

bitroost
bitroost

Reputation: 31

So i ended up adding a primary key-autoincrement id field to the fail2ban-sqlite table which shouldn't interfere with fail2ban, and using a proxy of the bans-model with a custom manager:

class Ban(models.Model):
    jail = models.CharField('jail', max_length=255)
    ip = models.GenericIPAddressField('ip', protocol='IPv4')
    timeofban = models.IntegerField('timeofban')
    data = models.TextField('data')
    id = models.IntegerField('id', primary_key=True)

    objects = BanManager()

    class Meta:
        db_table = 'bans'
        verbose_name = "Ban"
        verbose_name_plural = "Bans"
        managed = False

    def save(self, **kwargs):
        raise NotImplementedError()


class BanAggregatedManager(models.Manager):
    def get_queryset(self):
        qs = super().get_queryset()
        qs = qs.defer('data')

        latest = qs.values('ip') \
                   .annotate(Count('ip')) \
                   .order_by() \
                   .annotate(uid=Max('id')) \
                   .values('uid')

        ipcount = qs.filter(ip=OuterRef('ip')) \
                    .values('ip') \
                    .annotate(c=Count('ip')) \
                    .order_by() \
                    .values('c')

        annotated = qs.filter(id__in=Subquery(latest)) \
                      .annotate(ipcount=Subquery(ipcount)) \
                      .order_by('-ipcount')

        return annotated


class BanAggregated(Ban):
    objects = BanAggregatedManager()

    class Meta:
        proxy = True
        verbose_name = 'Ban Aggregated'
        verbose_name_plural = 'Bans Aggregated'
        managed = False

    def save(self, **kwargs):
        raise NotImplementedError()

The admin looks like this now:

@admin.register(BanAggregated)
class BanAggregatedAdmin(BanAdmin):
    list_display = ['ip', 'get_timeofban', 'ipcount']
    list_per_page = 20

    def ipcount(self, instance):
        return instance.ipcount
    ipcount.short_description = 'Count'
    ipcount.admin_order_field = 'ipcount'

Works well for me so far.

Upvotes: 2

Related Questions