Reputation: 77
I have a PostgreSQL database with a many-to-many users to tags relationship with the following tables:
I am attempting to build out a simple API to access data in this database using Flask, Peewee, and Marshmallow. We can ignore Flask for now, but I am trying to create a schema for social_user that will allow me to dump in a query that returns a user or users with their respective tags. I am looking for a response that looks something like the following:
{ "id": "[ID]", "handle": "[HANDLE]", "local_id": "[LOCAL_ID]", "platform_slug": "[PLATFORM_SLUG]", "tags": [ { "id": "[ID]", "title": "[TITLE]", "tag_type": "[TAG_TYPE]" }, { "id": "[ID]", "title": "[TITLE]", "tag_type": "[TAG_TYPE]" } ] }
I've managed to do this by including a second query that pulls the tags in the social_user schema in a @post_dump wrapped function, however, this feels like a hack and also seems like it would be slow with a large number of users (UPDATE: this is VERY slow, I tested it on 369 users). I imagine there is something I can do with Marshmallow's fields.Nested
field type. Is there a better way to serialize this relationship with only one Peewee query? My code is below:
# just so you are aware of my namespaces
import marshmallow as marsh
import peewee as pw
db = postgres_ext.PostgresqlExtDatabase(
register_hstore = False,
**json.load(open('postgres.json'))
)
class Base_Model(pw.Model):
class Meta:
database = db
class Tag(Base_Model):
title = pw.CharField()
tag_type = pw.CharField(db_column = 'type')
class Meta:
db_table = 'tag'
class Social_User(Base_Model):
handle = pw.CharField(null = True)
local_id = pw.CharField()
platform_slug = pw.CharField()
class Meta:
db_table = 'social_user'
class User_Tag(Base_Model):
social_user_id = pw.ForeignKeyField(Social_User)
tag_id = pw.ForeignKeyField(Tag)
class Meta:
primary_key = pw.CompositeKey('social_user_id', 'tag_id')
db_table = 'user_tag'
class Tag_Schema(marsh.Schema):
id = marsh.fields.Int(dump_only = True)
title = marsh.fields.Str(required = True)
tag_type = marsh.fields.Str(required = True, default = 'descriptive')
class Social_User_Schema(marsh.Schema):
id = marsh.fields.Int(dump_only = True)
local_id = marsh.fields.Str(required = True)
handle = marsh.fields.Str()
platform_slug = marsh.fields.Str(required = True)
tags = marsh.fields.Nested(Tag_Schema, many = True, dump_only = True)
def _get_tags(self, user_id):
query = Tag.select().join(User_Tag).where(User_Tag.social_user_id == user_id)
tags, errors = tags_schema.dump(query)
return tags
@marsh.post_dump(pass_many = True)
def post_dump(self, data, many):
if many:
for datum in data:
datum['tags'] = self._get_tags(datum['id']) if datum['id'] else []
else:
data['tags'] = self._get_tags(data['id'])
return data
user_schema = Social_User_Schema()
users_schema = Social_User_Schema(many = True)
tags_schema = Tag_Schema(many = True)
Here are some tests to demonstrate the functionality:
db.connect()
query = Social_User.get(Social_User.id == 825)
result, errors = user_schema.dump(query)
db.close()
pprint(result)
{'handle': 'test', 'id': 825, 'local_id': 'test', 'platform_slug': 'tw', 'tags': [{'id': 20, 'tag_type': 'descriptive', 'title': 'this'}, {'id': 21, 'tag_type': 'descriptive', 'title': 'that'}]}
db.connect()
query = Social_User.select().where(Social_User.platform_slug == 'tw')
result, errors = users_schema.dump(query)
db.close()
pprint(result)
[{'handle': 'test', 'id': 825, 'local_id': 'test', 'platform_slug': 'tw', 'tags': [{'id': 20, 'tag_type': 'descriptive', 'title': 'this'}, {'id': 21, 'tag_type': 'descriptive', 'title': 'that'}]}, {'handle': 'test2', 'id': 826, 'local_id': 'test2', 'platform_slug': 'tw', 'tags': []}]
Upvotes: 2
Views: 3331
Reputation: 77
It looks like this can be done using the ManyToMany
field in the Peewee models and manually setting the through_model
. The ManyToMany
field allows you to add a field to your models that relates two tables to one another, typically it automatically creates the relationship table (through_model
) itself, but you can set it manually.
I am working with the 3.0 alpha of Peewee, but I'm sure many people are working with the current stable release so I'll include both versions. We'll use a DeferredThroughModel
object and the ManyToMany
field, in Peewee 2.x these are in the "playhouse" in 3.x they're part of the main Peewee release. We'll also remove the @post_dump
wrapped function:
# Peewee 2.x
# from playhouse import fields
# User_Tag_Proxy = fields.DeferredThroughModel()
# Peewee 3.x
User_Tag_Proxy = pw.DeferredThroughModel()
class Tag(Base_Model):
title = pw.CharField()
tag_type = pw.CharField(db_column = 'type')
class Meta:
db_table = 'tag'
class Social_User(Base_Model):
handle = pw.CharField(null = True)
local_id = pw.CharField()
platform_slug = pw.CharField()
# Peewee 2.x
# tags = fields.ManyToManyField(Tag, related_name = 'users', through_model = User_Tag_Proxy)
# Peewee 3.x
tags = pw.ManyToManyField(Tag, backref = 'users', through_model = User_Tag_Proxy)
class Meta:
db_table = 'social_user'
class User_Tag(Base_Model):
social_user = pw.ForeignKeyField(Social_User, db_column = 'social_user_id')
tag = pw.ForeignKeyField(Tag, db_column = 'tag_id')
class Meta:
primary_key = pw.CompositeKey('social_user', 'tag')
db_table = 'user_tag'
User_Tag_Proxy.set_model(User_Tag)
class Social_User_Schema(marsh.Schema):
id = marsh.fields.Int(dump_only = True)
local_id = marsh.fields.Str(required = True)
handle = marsh.fields.Str()
platform_slug = marsh.fields.Str(required = True)
tags = marsh.fields.Nested(Tag_Schema, many = True, dump_only = True)
user_schema = Social_User_Schema()
users_schema = Social_User_Schema(many = True)
In practice it works exactly the same as using a @post_dump
wrapped function. Unfortunately, while this appears to be the "right" way to approach this problem, it is actually slightly slower.
I've managed to accomplish the same thing in 1/100th of the time. It's a bit of a hack and could use some cleaning up but it works! Instead of making changes to the models I've instead adjusted how I gather and process the data before passing it to the schema to serialize.
class Tag(Base_Model):
title = pw.CharField()
tag_type = pw.CharField(db_column = 'type')
class Meta:
db_table = 'tag'
class Social_User(Base_Model):
handle = pw.CharField(null = True)
local_id = pw.CharField()
platform_slug = pw.CharField()
class Meta:
db_table = 'social_user'
class User_Tag(Base_Model):
social_user = pw.ForeignKeyField(Social_User, db_column = 'social_user_id')
tag = pw.ForeignKeyField(Tag, db_column = 'tag_id')
class Meta:
primary_key = pw.CompositeKey('social_user', 'tag')
db_table = 'user_tag'
class Social_User_Schema(marsh.Schema):
id = marsh.fields.Int(dump_only = True)
local_id = marsh.fields.Str(required = True)
handle = marsh.fields.Str()
platform_slug = marsh.fields.Str(required = True)
tags = marsh.fields.Nested(Tag_Schema, many = True, dump_only = True)
user_schema = Social_User_Schema()
users_schema = Social_User_Schema(many = True)
For the new query we'll be joining (LEFT_OUTER
) the three tables (Social_User, Tag, and User_Tag) with Social_User as our source of truth. We want to make sure we get every user whether they have tags or not. This will return users multiple times depending on the number of tags they have, so we'll need to reduce this by iterating over each and using a dictionary to store the objects. In each of these new Social_User
objects will add a tags
list to which we'll append the Tag
objects.
db.connect()
query = (Social_User.select(User_Tag, Social_User, Tag)
.join(User_Tag, pw.JOIN.LEFT_OUTER)
.join(Tag, pw.JOIN.LEFT_OUTER)
.order_by(Social_User.id))
users = {}
last = None
for result in query:
user_id = result.id
if (user_id not in users):
# creates a new Social_User object matching the user data
users[user_id] = Social_User(**result.__data__)
users[user_id].tags = []
try:
# extracts the associated tag
users[user_id].tags.append(result.user_tag.tag)
except AttributeError:
pass
result, errors = users_schema.dump(users.values())
db.close()
pprint(result)
Upvotes: 1