Souad
Souad

Reputation: 5094

get results grouped by the same field value in SQLAlchemy

I have 3 MySQL tables: companies, activities, association_company_activities the association_company_activitiestable links companies and activities, so it has 3 fields: 1 auto increment ID, company_id as foreing key, and activity_id as foreing key. I have this query:

SELECT
    C.id,
    C.label,
    A.name
FROM
    companies C
JOIN activities A JOIN association_company_activities S ON
    C.identifier = S.company_id AND A.identifier = S.activitiy_id
ORDER BY
    C.label

As I'm using a python script, the above query corresponds to this: ( I also return the result as a json )

def search(args, items):
    args = request.args.to_dict()
    if len(args) > 0:
      for param, value in args.iteritems():
        items = [v for v in items if v.has_key(param) and v[param] == value]
    return items

A = aliased(model.Activity, name='A')
S = aliased(model.AssocCompaniesActivities, name='S')
C = aliased(model.Company, name='C')
activity_area = A.name.label("activities_area")

results = session.query(C.id, C.label, activity_area) \
                 .join(S) \
                 .join(A) \
                 .filter(C.identifier == S.company_id) \
                 .filter(A.identifier == S.activity_id) \
                 .order_by(C.label) \
                 .all()
session.close()
args = request.args.to_dict()
results = search(args, results)
return jsonify({"results": results})

This gives me this:

{
  "results": [
    {
      "activities_area": "luxury", 
      "id": "company1", 
      "label": "first company"
    }, 
    {
      "activities_area": "banks", 
      "id": "company2", 
      "label": "second company"
    }, 
    {
      "activities_area": "paper", 
      "id": "company2", 
      "label": "second company"
    }
  ]
}

I want to return the companies with multiple activities only once and get the activity_area as an array like this:

{
  "results": [
    {
      "activities_area": "luxury", 
      "id": "company1", 
      "label": "first company"
    }, 
    {
      "activities_area": [
           "paper",
           "banks"
      ],
      "id": "company2", 
      "label": "second company"
    }
  ]
}

THE MODEL:

class Companies(Base):
    __tablename__ = 'companies'

    identifier = Column(Integer, primary_key=True)
    id = Column(String)
    label = Column(String)

    def __init__(self, id, label):
        self.id = id
        self.label = label
class Activities(Base):
    __tablename__ = 'activities_area'

    identifier = Column(Integer, primary_key=True)
    name = Column(String)

    def __init__(self, name):
        self.name = name
class AssocCompaniesActivities(Base):
    __tablename__ = 'assoc_companies_activities'

    identifier = Column(Integer, primary_key=True)
    company_id = Column(Integer, ForeignKey("companies.identifier"), nullable=True)
    activities_area_id = Column(Integer, ForeignKey("activities_area.identifier"), nullable=True)

    def __init__(self, company_id , activities_area_id):
        self.company_id = organization_id
        self.activities_area_id = activities_area_id

How to do this ?

Upvotes: 0

Views: 1713

Answers (1)

metatoaster
metatoaster

Reputation: 18938

Since you are already using the ORM features, it really is similar to that SQLAlchemy with multiple Many to Many Relationships and there is a tutorial on this scenario, however with a subtle difference that you want a class for AssocCompaniesActivities.

Preamble for imports and other set up to have your original code work:

from sqlalchemy import *
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base

from sqlalchemy import create_engine
engine = create_engine('sqlite:///:memory:', echo=True)
Session = sessionmaker(bind=engine)
Base = declarative_base()

This is the updated class definition:

from sqlalchemy.orm import relationship

class Companies(Base):
    __tablename__ = 'companies'

    identifier = Column(Integer, primary_key=True)
    id = Column(String)
    label = Column(String)

    activities_area = relationship("Activities", secondary='assoc_companies_activities', back_populates='companies')

    def __init__(self, id, label):
        self.id = id 
        self.label = label

class Activities(Base):
    __tablename__ = 'activities_area'

    identifier = Column(Integer, primary_key=True)
    name = Column(String)

    companies = relationship("Companies", secondary='assoc_companies_activities', back_populates='activities_area')

    def __init__(self, name):
        self.name = name

class AssocCompaniesActivities(Base):
    __tablename__ = 'assoc_companies_activities'

    identifier = Column(Integer, primary_key=True)
    company_id = Column(Integer, ForeignKey("companies.identifier"), nullable=True)
    activities_area_id = Column(Integer, ForeignKey("activities_area.identifier"), nullable=True)
    # Should declare primary key for company_id and activities_area_id,
    # or better yet, just create a simple un-mapped table like in the docs

    def __init__(self, company_id , activities_area_id):
        self.company_id = company_id
        self.activities_area_id = activities_area_id

Set up the db, session:

session = Session()
Base.metadata.create_all(engine)

Finally, add the example data:

company1 = Companies(id='company1', label='first company')
company2 = Companies(id='company2', label='second company')
banks_activity = Activities('banks')
luxury_activity = Activities('luxury')
paper_activity = Activities('paper')
session.add(company1)
session.add(company2)
session.add(banks_activity)
session.add(luxury_activity)
session.add(paper_activity)
session.commit()

Also the relationships (after committing to have ids for the company and activities)

company1_luxury = AssocCompaniesActivities(company1.identifier, luxury_activity.identifier)
company2_banks = AssocCompaniesActivities(company2.identifier, banks_activity.identifier)
company2_paper = AssocCompaniesActivities(company2.identifier, paper_activity.identifier)
session.add(company1_luxury)
session.add(company2_banks)
session.add(company2_paper)
session.commit()

With the model updated, we can now do a join query using one of the relationship loading techniques for eager loading. This is the part where it is different to other questions, and that there are not that many examples that combine many-to-many relationships with this particular technique to create some for of joined query. We can achieve what you want using joinedload:

from sqlalchemy.orm import joinedload

companies = session.query(Companies).options(
    joinedload(Companies.activities_area).load_only('name')).all()

Which generates this query:

SELECT companies.identifier AS companies_identifier, companies.id AS companies_id, companies.label AS companies_label, activities_area_1.identifier AS activities_area_1_identifier, activities_area_1.name AS activities_area_1_name 
FROM companies LEFT OUTER JOIN (assoc_companies_activities AS assoc_companies_activities_1 JOIN activities_area AS activities_area_1 ON activities_area_1.identifier = assoc_companies_activities_1.activities_area_id) ON companies.identifier = assoc_companies_activities_1.company_id

Finally, turn the results into the data structure that you desired:

print(json.dumps([{
    'id': c.id,
    'label': c.label,
    'activities_area': [a.name for a in c.activities_area]
} for c in companies], indent=4))

Output:

[
    {
        "id": "company1",
        "label": "first company",
        "activities_area": [
            "luxury"
        ]
    },
    {
        "id": "company2",
        "label": "second company",
        "activities_area": [
            "banks",
            "paper"
        ]
    }
]

Upvotes: 2

Related Questions