unknown
unknown

Reputation: 321

N+1 queries in SerializerMethodField

I have this view

    def get_queryset(self) -> QuerySet[Good]:
        ....
        qs = (
            Good.objects.values('brand_id', 'brand__name')
            .annotate(
                ....
            )
            .prefetch_related(Prefetch('history', StocksHistory.objects.filter(Q(**subquery_filter_args))))
            .order_by('-total_sales')
        )
        return qs

and serializer

class ExtendedBrandSerializer(serializers.ModelSerializer):
    ...
    history = serializers.SerializerMethodField()

    class Meta:
        model = Good
        fields = (
            ...
            'history',
        )

    def get_history(self, good: dict) -> dict:
      ....

      return StocksHistorySerializer(
        StocksHistory.objects.extra(select={'day': 'date( snap_at )'})
        .values('day')
        .filter(history_filter_query)
        .annotate(
            ....
        ),
        many=True,
      ).data

Relation: StocksHistory (*) -> (1) Good.

I have N+1 queries in SerializerMethodField. How can I fix it?

Perhaps there is a way to move annotate from serializer to view?

The bottom line is that I also need the history key in the response, which will contain a list of these child objects.

Full queries - https://gist.github.com/dima-dmytruk23/0183014d76ad25f4e2a0ca87ec225a10

As far as I understand, it is unrealistic to implement this using Django ORM. There is an idea to write this in RAW SQL, but I also don’t understand how to make such a query (

UPDATE:

At the moment, I have done this with two queries and manually combine the querysets into the response I need, but I do not like this solution.

view

    def get_queryset(self) -> QuerySet[Good]:
        subquery_filter_args = {}
        history_filter_args = {}
        if all([key in self.request.query_params for key in ['history_from_date', 'history_to_date']]):
            from_date, to_date = get_history_date_range(request=self.request)
            subquery_filter_args.update({'snap_at__range': (from_date, to_date)})
            history_filter_args.update({'history__snap_at__range': (from_date, to_date)})
        history_filter_query = Q(**history_filter_args)

        qs = (
            Good.objects.values('brand_id', 'brand__name')
            .annotate(
                total_sales=Sum('history__sales', filter=history_filter_query),
                avg_sales_per_day=Avg('history__sales', filter=history_filter_query),
                total_revenue=Sum('history__revenue', filter=history_filter_query),
                avg_revenue_per_day=Avg('history__revenue', filter=history_filter_query),
                rating=Avg('history__rating', filter=history_filter_query),
                feedbacks=Sum('history__feedbacks', filter=history_filter_query),
                base_price=Avg('history__base_price', filter=history_filter_query),
                price=Avg('history__price', filter=history_filter_query),
                price_with_discount=Avg('history__price_with_discount', filter=history_filter_query),
                discount=Avg('history__discount', filter=history_filter_query),
                max_price=Max('history__price', filter=history_filter_query),
                min_price=Min('history__price', filter=history_filter_query),
                avg_price=Avg('history__price', filter=history_filter_query),
            )
            .prefetch_related(Prefetch('history', StocksHistory.objects.filter(Q(**subquery_filter_args))))
            .order_by('-total_sales')
        )
        return qs

    def list(self, request, *args, **kwargs):
        queryset = self.filter_queryset(self.get_queryset())
        page = self.paginate_queryset(queryset)
        if page is not None:
            serializer = self.get_serializer(page, many=True)
            data = serializer.data
            add_histories_to_top_response(request=self.request, data=data)
            response = self.get_paginated_response(data)
            return response

        serializer = self.get_serializer(queryset, many=True)
        data = serializer.data
        add_histories_to_top_response(request=self.request, data=data)
        return Response(data)

helper

def add_histories_to_top_response(request: Request, data: OrderedDict):
    history_filter_args = {'good__brand__in': tuple(item['brand_id'] for item in data)}
    if all([key in request.query_params for key in ['history_from_date', 'history_to_date']]):
        from_date, to_date = get_history_date_range()
        history_filter_args.update({'history__snap_at__range': (from_date, to_date)})
    history_filter_query = Q(**history_filter_args)

    histories = StocksHistorySerializer(
        StocksHistory.objects.extra(select={'day': 'date( snap_at )'})
        .values('day')
        .filter(history_filter_query)
        .annotate(
            sales=Sum('sales', filter=history_filter_query),
            feedbacks=Sum('feedbacks', filter=history_filter_query),
            rating=Avg('rating', filter=history_filter_query),
            base_price=Avg('base_price', filter=history_filter_query),
            price=Avg('price', filter=history_filter_query),
            price_with_discount=Avg('price_with_discount', filter=history_filter_query),
            revenue=Avg('revenue', filter=history_filter_query),
            stock_balances=Avg('stock_balances', filter=history_filter_query),
            snap_at=TruncDay('snap_at', filter=history_filter_query),
        )
        .values(
            'snap_at',
            'feedbacks',
            'rating',
            'price',
            'base_price',
            'price_with_discount',
            'sales',
            'revenue',
            'stock_balances',
            'good__brand_id',
        ),
        many=True,
    ).data

    set_histories(data, histories)


def set_histories(data: OrderedDict, histories: OrderedDict):
    for item in data:
        item['history'] = []
        for key, group in itertools.groupby(histories, lambda x: x['brand_id']):
            if item['brand_id'] == key:
                item['history'] = list(group)


def get_history_date_range(request: Request) -> tuple[datetime, datetime]:
    _date_format = '%Y-%m-%d'

    from_date = datetime.strptime(request.query_params['history_from_date'], _date_format).replace(tzinfo=utc)
    to_date = datetime.strptime(request.query_params['history_to_date'], _date_format).replace(tzinfo=utc)
    return from_date, to_date

Upvotes: 3

Views: 827

Answers (1)

Brian Destura
Brian Destura

Reputation: 12068

You can move all your filters and annotations on the get_queryset method:

    def get_queryset(self) -> QuerySet[Good]:
        # ...
        qs = (
            Good.objects.values(
                'brand_id', 'brand__name'
            ).annotate(
                # ...
            ).prefetch_related(
                Prefetch(
                    'history', 
                    StocksHistory.objects.filter(
                        Q(**subquery_filter_args)
                    ).extra(
                        select={'day': 'date( snap_at )'}
                    ).annotate(
                        # ...
                    )
                )
            ).order_by('-total_sales')
        )
        return qs

Then just use good.history.all() in your serializer method, to avoid the N+1 like this:

class ExtendedBrandSerializer(serializers.ModelSerializer):
    # ...
    history = serializers.SerializerMethodField()

    class Meta:
        model = Good
        fields = (
            # ...
            'history',
        )

    def get_history(self, good: dict) -> dict:
        # ...
        return StocksHistorySerializer(
            good.history.all(),
            many=True,
        ).data

Upvotes: 2

Related Questions