Reputation: 14480
I have datafiles of rows of things. Each thing has a gene listed in it. It's a one-many relationship because each gene can be part of multiple things but each thing can only have one gene.
Imagine models roughly like these:
class Gene(db.Model):
__tablename__ = "gene"
id = db.Column(db.Integer, primary_key=True)
name1 = db.Column(db.Integer, index=True, unique=True, nullable=False) # nullable might be not right
name2 = db.Column(db.String(120), index=True, unique=True)
things = db.relationship("Thing", back_populates='gene')
def __init__(self, name1, name2=None):
self.name1 = name1
self.name2 = name2
@classmethod
def find_or_create(cls, name1, name2=None):
record = cls.query.filter_by(name1=name1).first()
if record != None:
if record.name2 == None and name2 != None:
record.name2 = name2
else:
record = cls(name1, name2)
db.session.add(record)
return record
class Thing(db.Model):
__tablename__ = "thing"
id = db.Column(db.Integer, primary_key=True)
gene_id = db.Column(db.Integer, db.ForeignKey("gene.id"), nullable=False, index=True)
gene = db.relationship("Gene", back_populates='thing')
data = db.Column(db.Integer)
I'd like to bulk-insert many things, but I'm afraid that by using
db.engine.execute(Thing.__table__.insert(), things)
I won't have the relationships in the database. Is there some way of preserving the relationships with a bulk add, or somehow adding these sequentially and then establishing the relationships at a later point? All the documentation about bulk adding seems to assume that you want to insert extremely simple models and I'm a little lost as to how to do this when your models are more complex (the example above is a dumbed down version).
-- Update 1 --
This answer seems to indicate that there isn't really a solution to this.
This answer seems to confirm that.
Upvotes: 4
Views: 2115
Reputation: 433
I actually changed my code quite a bit, I think it is improved and I'm changing my answer here as well.
I define the following 2 tables. Sets and Data, for each set in Sets there are many data in Data.
class Sets(sa_dec_base):
__tablename__ = 'Sets'
id = sa.Column(sa.Integer, primary_key=True)
FileName = sa.Column(sa.String(250), nullable=False)
Channel = sa.Column(sa.Integer, nullable=False)
Loop = sa.Column(sa.Integer, nullable=False)
Frequencies = sa.Column(sa.Integer, nullable=False)
Date = sa.Column(sa.String(250), nullable=False)
Time = sa.Column(sa.String(250), nullable=False)
Instrument = sa.Column(sa.String(250), nullable=False)
Set_Data = sa_orm.relationship('Data')
Set_RTD_spectra = sa_orm.relationship('RTD_spectra')
Set_RTD_info = sa.orm.relationship('RTD_info')
__table_args__ = (sa.UniqueConstraint('FileName', 'Channel', 'Loop'),)
class Data(sa_dec_base):
__tablename__ = 'Data'
id = sa.Column(sa.Integer, primary_key = True)
Frequency = sa.Column(sa.Float, nullable=False)
Magnitude = sa.Column(sa.Float, nullable=False)
Phase = sa.Column(sa.Float, nullable=False)
Set_ID = sa.Column(sa.Integer, sa.ForeignKey('Sets.id'))
Data_Set = sa_orm.relationship('Sets', foreign_keys = [Set_ID])
then, I wrote this function to bulk_insert data with relationship.
def insert_set_data(session, set2insert, data2insert, Data):
"""
Insert set and related data; with uniqueconstraint check on the set
set2insert is the prepared set object without id. A correct and unique id will given by the db itself
data2insert is a big pandas df, so that bulk_insert is used
"""
session.add(set2insert)
try:
session.flush()
except sa.exc.IntegrityError as err: # here catch uniqueconstraint error if set already in db
session.rollback()
print('already inserted ', set2insert.FileName, 'loop ', set2insert.Loop, 'channel ', set2insert.Channel)
pass
else: # if not error, flush will give the id to the set ("Set.id")
data2insert['Set_ID'] = set2insert.id # pass Set.id to data2insert as foreign_keys to keep relationship
data2insert = data2insert.to_dict(orient = 'records') # convert df to record for bulk_insert
session.bulk_insert_mappings(Data, data2insert) # bulk_insert
session.commit() # commit only once, so that it is done only if set and data were correctly inserted
print('inserting ', set2insert.FileName, 'loop ', set2insert.Loop, 'channel ', set2insert.Channel)
Probably other, and better, solutions are possible anyway.
Upvotes: 1