Avian
Avian

Reputation: 126

Converting a SQLAlchemy @hybrid_property into a SQL .expression

I am attempting to use an @hybrid_property to order my parent table, and I have learnt that in order to do that, the @hybrid_property must be attached to a valid expression.

read - SQLAlchemy order by function result

    @hybrid_property
    def traffic(self):
        # this getter is used when accessing the property of an instance
        if self.traffic_summary and self.traffic_summary != []:
            traffic = statistics.mean(
                [st.page_views_per_million for st in self.traffic_summary]
            )
            if traffic == 0:
                if self.traffic_history and self.traffic_history != []:
                    traffic = statistics.mean(
                    [st.page_views_per_million for st in self.traffic_history[:30]]
                    )
        else:
            if self.traffic_history and self.traffic_history != []:
                traffic = statistics.mean(
                [st.page_views_per_million for st in self.traffic_history[:30]]
                )

            else:
                traffic = 0

        return int(traffic)

    @traffic.expression
    def traffic(cls):
        # this expression is used when querying the model
        return case(
            [(cls.traffic_summary != None), cls.traffic_history)],
            else_=cls.traffic_summary
        )

The @traffic.expression is the specific code, I want to modify, the issue is, I have absolutely no idea how to replicate statistics.mean([st.page_views_per_million for st in self.traffic_summary]) or the complicated Python logic in SQL.

My questions are twofold.

  1. how do I proceed to convert the above into SQL?
  2. is it even possible to convert such complex logic into SQL?

Updated with relationship models and the way the relationships are set up on the parent_table:

    traffic_summary = db.relationship(
        "traffic_summary", backref="traffic", passive_deletes=True, lazy="subquery"
    )
    traffic_by_country = db.relationship(
        "traffic_by_country",
        backref="store",
        passive_deletes=True,
        lazy="select",
        order_by="desc(traffic_by_country.contribution_of_users)",
    )
    traffic_history = db.relationship(
        "traffic_datapoint",
        backref="traffic",
        passive_deletes=True,
        lazy="select",
        order_by="desc(traffic_datapoint.date)",
    )
class traffic_datapoint(ResourceMixin, db.Model):
    id = db.Column(db.BigInteger, primary_key=True)
    date = db.Column(db.DateTime)
    page_views_per_million = db.Column(db.BigInteger)
    page_views_per_user = db.Column(db.Float)
    alexa_rank = db.Column(db.BigInteger)
    reach_per_million = db.Column(db.BigInteger)
    store_id = db.Column(
        db.BigInteger,
        db.ForeignKey(top_store.id, onupdate="CASCADE", ondelete="CASCADE"),
        index=True,
        nullable=True,
    )


class traffic_by_country(ResourceMixin, db.Model):
    id = db.Column(db.BigInteger, primary_key=True)
    country_code = db.Column(db.String(30))
    country_name = db.Column(db.String(100))
    contribution_of_pageviews = db.Column(db.Float)
    contribution_of_users = db.Column(db.Float)
    store_id = db.Column(
        db.BigInteger,
        db.ForeignKey(top_store.id, onupdate="CASCADE", ondelete="CASCADE"),
        index=True,
        nullable=True,
    )


class traffic_summary(ResourceMixin, db.Model):
    id = db.Column(db.BigInteger, primary_key=True)
    summary_type = db.Column(db.String(100))
    alexa_rank = db.Column(db.BigInteger)
    alexa_rank_delta = db.Column(db.BigInteger)
    page_views_per_million = db.Column(db.BigInteger)
    page_views_per_user = db.Column(db.Float)
    reach_per_million = db.Column(db.BigInteger)
    store_id = db.Column(
        db.BigInteger,
        db.ForeignKey(top_store.id, onupdate="CASCADE", ondelete="CASCADE"),
        index=True,
        nullable=True,
    )

Upvotes: 1

Views: 673

Answers (1)

Ilja Everilä
Ilja Everilä

Reputation: 53017

Converting list-comprehensions to SQL is somewhat easy. After all they resemble SQL queries; you have your relation variable or table to select from, you can select tuples matching predicates, and you can restrict the result to a subset of attributes. Summarizing (aggregates) is a bit different, but not too much.

# Assume S is a set of named tuples
[(T.X, T.Y) for T in S if T.Z == 'foo']
# is like
# SELECT T.X, T.Y FROM S AS T WHERE T.Z = 'foo'

From the above we get

@traffic.expression
def traffic(cls):
    traffic_history = select([traffic_datapoint.page_views_per_million]).\
        where(traffic_datapoint.store_id == cls.id).\
        order_by(traffic_datapoint.date.desc()).\
        limit(30).\
        correlate_except(traffic_datapoint).\
        alias()

    return func.coalesce(
        func.nullif(
            select([func.avg(traffic_summary.page_views_per_million)]).
                where(traffic_summary.store_id == cls.id).
                correlate_except(traffic_summary).
                as_scalar(),
            0),
        select([func.avg(traffic_history.c.page_views_per_million)]).
            as_scalar(),
        0).cast(Integer)

The coalesce(nullif(..., 0), ..., 0) replicates the if-else logic in Python. Coalesce returns the first non-null value. If there are no related traffic summaries, then the first subquery results in null. If the result is 0, then nullif converts it to null. In that case the result of the second subquery is returned, if it is not null i.e. there are related traffic datapoints. Finally 0 is returned by default.

Upvotes: 2

Related Questions