Bill
Bill

Reputation: 924

SQLAlchemy eagerly/joined loading self referential one-to-one relationship

I have a self-referential 1-to-1 relationship (using uselist=False) for which I am trying to eagerly load the next_node at query time. If I remove uselist=False and access next_node as an instrumented list, the list is properly loaded in the original query. Is this a known issue, or am I doing something wrong when attempting to eagerly load the next_node backref relationship?

from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import (
    backref, joinedload, relationship, scoped_session, sessionmaker)
from sqlalchemy import create_engine, Column, Integer, ForeignKey, MetaData

class Node(declarative_base()):
    __tablename__ = 'nodes'

    id = Column(Integer, primary_key=True)
    value = Column(Integer)
    prev_node_id = Column(Integer, ForeignKey('nodes.id'), unique=True)
    prev_node = relationship(
        'Node',
        uselist=False,
        remote_side=[id],
        backref=backref('next_node', uselist=False))

engine = create_engine('sqlite://', echo=True)
metadata = MetaData()
with engine.begin() as connection:
    metadata = Node.metadata
    metadata.create_all(connection)

session = scoped_session(sessionmaker(bind=engine))

a = Node()
session.add(a)
b = Node()
session.add(b)
b.prev_node = a
session.commit()

query = session.query(Node)
query = query.options(joinedload(Node.next_node))
print("executing joined query")
for n in query:
    if n.next_node is not None:
        print(n.id, n.next_node.id)
print("done!")

Results in two select statements:

...
executing joined query
2017-04-19 01:30:54,514 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2017-04-19 01:30:54,515 INFO sqlalchemy.engine.base.Engine SELECT nodes.id AS nodes_id, nodes.value AS nodes_value, nodes.prev_node_id AS nodes_prev_node_id, nodes_1.id AS nodes_1_id, nodes_1.value AS nodes_1_value, nodes_1.prev_node_id AS nodes_1_prev_node_id 
FROM nodes LEFT OUTER JOIN nodes AS nodes_1 ON nodes.id = nodes_1.prev_node_id
2017-04-19 01:30:54,515 INFO sqlalchemy.engine.base.Engine ()
(1, 2)
2017-04-19 01:30:54,517 INFO sqlalchemy.engine.base.Engine SELECT nodes.id AS nodes_id, nodes.value AS nodes_value, nodes.prev_node_id AS nodes_prev_node_id 
FROM nodes 
WHERE ? = nodes.prev_node_id
2017-04-19 01:30:54,517 INFO sqlalchemy.engine.base.Engine (2,)
done!

However, if, instead, I do not use the uselist=False flag, I get the expected single query:

from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import (
    backref, joinedload, relationship, scoped_session, sessionmaker)
from sqlalchemy import create_engine, Column, Integer, ForeignKey, MetaData

class Node(declarative_base()):
    __tablename__ = 'nodes'

    id = Column(Integer, primary_key=True)
    value = Column(Integer)
    prev_node_id = Column(Integer, ForeignKey('nodes.id'), unique=True)
    prev_node = relationship(
        'Node',
        uselist=False,
        remote_side=[id],
        backref=backref('next_node'))

engine = create_engine('sqlite://', echo=True)
metadata = MetaData()
with engine.begin() as connection:
    metadata = Node.metadata
    metadata.create_all(connection)

session = scoped_session(sessionmaker(bind=engine))

a = Node()
session.add(a)
b = Node()
session.add(b)
b.prev_node = a
session.commit()

query = session.query(Node)
query = query.options(joinedload(Node.next_node))
print("executing joined query")
for n in query:
    if n.next_node is not None and len(n.next_node) > 0:
        print(n.id, n.next_node[0].id)
print("done!")

Results in a single select statement:

...
executing joined query
2017-04-19 01:29:30,570 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2017-04-19 01:29:30,571 INFO sqlalchemy.engine.base.Engine SELECT nodes.id AS nodes_id, nodes.value AS nodes_value, nodes.prev_node_id AS nodes_prev_node_id, nodes_1.id AS nodes_1_id, nodes_1.value AS nodes_1_value, nodes_1.prev_node_id AS nodes_1_prev_node_id 
FROM nodes LEFT OUTER JOIN nodes AS nodes_1 ON nodes.id = nodes_1.prev_node_id
2017-04-19 01:29:30,571 INFO sqlalchemy.engine.base.Engine ()
(1, 2)
done!

Upvotes: 2

Views: 1437

Answers (1)

Haleemur Ali
Haleemur Ali

Reputation: 28233

The relationship configuration is missing a parameter join_depth

If you set it to join_depth=0, i.e.

prev_node = orm.relationship(
    'Node', uselist=False,
    remote_side=[id], join_depth=0, 
    backref=orm.backref('next_node', uselist=False)
)

then you first block of code should eagerload the next_node in a single query.

edit: added executable python script & output.

tested on sqlalchemy 1.1.8, python 3.6.0 & OS X El Capitan

# eager.py
import sqlalchemy.ext.declarative as dec
import sqlalchemy as sa
import sqlalchemy.orm as orm

Base = dec.declarative_base()

class Node(Base):
  __tablename__ = 'nodes'
  id = sa.Column(sa.Integer, primary_key=True)
  value = sa.Column(sa.Integer)
  prev_node_id = sa.Column(sa.Integer, sa.ForeignKey('nodes.id'), unique=True)
  prev_node = orm.relationship(
    'Node', uselist=False,
    remote_side=[id],
    join_depth=0, backref=orm.backref('next_node', uselist=False)
  )

print('setup database & schema')
engine = sa.create_engine('sqlite://', echo=True)
Base.metadata.create_all(engine)

session = orm.scoped_session(orm.sessionmaker(bind=engine))
a = Node()
b = Node()
b.prev_node = a
print("inserting nodes")
session.add(a)
session.add(b)
session.commit()
query = session.query(Node)
query = query.options(orm.joinedload(Node.next_node))

print('string representation of query:')
print(str(query))
print('*'*80)
print('executing joined query')

for n in query:
  if n.next_node is not None:
    print(n.id, n.next_node.id)
print('done!')

And, this script produces the following output:

# output of ``$ python eager.py``
setup database & schema
2017-04-19 22:23:36,491 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2017-04-19 22:23:36,491 INFO sqlalchemy.engine.base.Engine ()
2017-04-19 22:23:36,492 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2017-04-19 22:23:36,492 INFO sqlalchemy.engine.base.Engine ()
2017-04-19 22:23:36,492 INFO sqlalchemy.engine.base.Engine PRAGMA table_info("nodes")
2017-04-19 22:23:36,493 INFO sqlalchemy.engine.base.Engine ()
2017-04-19 22:23:36,493 INFO sqlalchemy.engine.base.Engine
CREATE TABLE nodes (
    id INTEGER NOT NULL,
    value INTEGER,
    prev_node_id INTEGER,
    PRIMARY KEY (id),
    UNIQUE (prev_node_id),
    FOREIGN KEY(prev_node_id) REFERENCES nodes (id)
)


2017-04-19 22:23:36,493 INFO sqlalchemy.engine.base.Engine ()
2017-04-19 22:23:36,494 INFO sqlalchemy.engine.base.Engine COMMIT
inserting nodes
2017-04-19 22:23:36,497 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2017-04-19 22:23:36,498 INFO sqlalchemy.engine.base.Engine INSERT INTO nodes (value, prev_node_id) VALUES (?, ?)
2017-04-19 22:23:36,498 INFO sqlalchemy.engine.base.Engine (None, None)
2017-04-19 22:23:36,498 INFO sqlalchemy.engine.base.Engine INSERT INTO nodes (value, prev_node_id) VALUES (?, ?)
2017-04-19 22:23:36,499 INFO sqlalchemy.engine.base.Engine (None, 1)
2017-04-19 22:23:36,499 INFO sqlalchemy.engine.base.Engine COMMIT
string representation of query:
SELECT nodes.id AS nodes_id, nodes.value AS nodes_value, nodes.prev_node_id AS nodes_prev_node_id, nodes_1.id AS nodes_1_id, nodes_1.value AS nodes_1_value, nodes_1.prev_node_id AS nodes_1_prev_node_id
FROM nodes LEFT OUTER JOIN nodes AS nodes_1 ON nodes.id = nodes_1.prev_node_id
********************************************************************************
executing joined query
2017-04-19 22:23:36,501 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2017-04-19 22:23:36,501 INFO sqlalchemy.engine.base.Engine SELECT nodes.id AS nodes_id, nodes.value AS nodes_value, nodes.prev_node_id AS nodes_prev_node_id, nodes_1.id AS nodes_1_id, nodes_1.value AS nodes_1_value, nodes_1.prev_node_id AS nodes_1_prev_node_id
FROM nodes LEFT OUTER JOIN nodes AS nodes_1 ON nodes.id = nodes_1.prev_node_id
2017-04-19 22:23:36,501 INFO sqlalchemy.engine.base.Engine ()
1 2
done!

As you can see from the logs, only 1 query is run on the database:

SELECT nodes.id AS nodes_id, nodes.value AS nodes_value, nodes.prev_node_id AS nodes_prev_node_id, nodes_1.id AS nodes_1_id, nodes_1.value AS nodes_1_value, nodes_1.prev_node_id AS nodes_1_prev_node_id
FROM nodes LEFT OUTER JOIN nodes AS nodes_1 ON nodes.id = nodes_1.prev_node_id

Upvotes: 5

Related Questions