Reputation: 13826
Block model (which goes on block 0 -> block 1 -> block 2 -> block 3 -> […]):
Example input document [700+ of these in the modulestore.structures
collection]:
{
_id: ObjectId('5932d50ff8f46c0a8098ab79'),
blocks: [
{
definition: ObjectId('5923556ef8f46c0a787e9c0f'),
block_type: 'chapter',
block_id: '5b053a7f10ba41df85a3221c3ef3956e',
fields: {
format: 'Foo exam',
children: [
[
'sequential',
'9f1e58553ad448818ec8e7915d3d94d3'
],
[
'sequential',
'f052c7aa44274769a4631e95405834e0'
]
]
}
},
{
definition: ObjectId('59235569f8f46c0a7be1debc'),
block_type: 'sequential',
block_id: '9f1e58553ad448818ec8e7915d3d94d3',
fields: {
display_name: 'FooBar'
}
},
{
definition: ObjectId('59317406f8f46c0a8098aaf5'),
block_type: 'sequential',
block_id: 'f052c7aa44274769a4631e95405834e0',
fields: {
display_name: 'CanHaz'
}
}
]
}
My goal here is to:
children
array for traversal;top_ancestor_fields
containing the fields
property from their topmost ancestor.Example output:
[
{
_id: ObjectId('5a00f611f995363c2b63c9a6'),
block_type: 'chapter',
block_id: '5b053a7f10ba41df85a3221c3ef3956e',
fields: {
format: 'Foo exam'
children: [
[
'sequential',
'9f1e58553ad448818ec8e7915d3d94d3'
],
[
'sequential',
'f052c7aa44274769a4631e95405834e0'
]
]
},
top_ancestor_fields: {
format: 'Foo exam'
}
},
{
_id: ObjectId('5a00f611f995363c2b63c9a7'),
block_id: '9f1e58553ad448818ec8e7915d3d94d3',
block_type: 'sequential',
fields: {
display_name: 'FooBar'
},
top_ancestor_fields: {
format: 'Foo exam'
}
},
{
_id: ObjectId('5a00f611f995363c2b63c9a8'),
block_id: 'f052c7aa44274769a4631e95405834e0',
block_type: 'sequential',
fields: {
display_name: 'CanHaz'
},
top_ancestor_fields: {
format: 'Foo exam'
}
},
]
Almost have it working based off @neil-lunn's suggestion:
db.modulestore.structures.aggregate([
{ $unwind: '$blocks' },
{ $project: { _id: 0,
block_id: '$blocks.block_id',
children: '$blocks.fields.children',
display_name: '$blocks.fields.display_name',
block_type: '$blocks.block_type',
exam: '$blocks.fields.format',
fields: '$blocks.fields'
}},
{ $out: 'modulestore.mapped0' }
])
db.modulestore.mapped0.aggregate([
{ $graphLookup: {
from: 'modulestore.mapped0',
startWith: '$block_id',
connectToField: 'children',
connectFromField: 'block_id',
as: 'block_ids',
maxDepth: 0
} },
{ $unwind: '$block_ids' },
{ $project: {
name: 1,
_id: 0,
ancestor: '$block_ids.block_id'
} },
{ $out: 'modulestore.mapped1' }
]);
But this just hangs. I've tried configuring maxDepth
$graphLookup
option. FYI: db.modulestore.mapped0.count()
is 80772 for me.
Each document potentially contains a children
array with up to 180 elements.
Not sure how to approach this larger pipeline to map children
hierarchies…
Upvotes: 2
Views: 1048
Reputation: 10918
The following should get you started:
db.modulestore.structures.aggregate([{
$unwind: '$blocks' // flatten "blocks" array
}, {
$replaceRoot: { // move "blocks" field to top level
newRoot: "$blocks"
}
}, {
$unwind: { // flatten "fields.children" array
path: "$fields.children",
preserveNullAndEmptyArrays: true
}
}, {
// this step is technically not needed but it might speed up things - try running with and without that
$addFields: { // we only keep the second (last, really) entry of all your arrays since this is the only valid join key for the graphLookup
"fields.children": {
$slice: [ "$fields.children", -1 ]
}
}
}, {
$unwind: { // flatten "fields.children" array one more time because it was nested before
path: "$fields.children",
preserveNullAndEmptyArrays: true
}
}, {
$group: { // reduce the number of lookups required later by eliminating duplicate parent-child paths
"_id": "$block_id",
"block_type": { $first: "$block_type" },
"definition": { $first: "$definition" },
"fieldsFormat": { $first: "$fields.format" },
"fieldsChildren": { $addToSet: "$fields.children" }
}
}, {
$project: { // restore original structure
"block_id": "$_id",
"block_type": "$block_type",
"definition": "$definition",
"fields": {
"format": "$fieldsFormat",
"children": "$fieldsChildren"
}
}
}, { // spit out the result into "modulestore.mapped0" collection, overwriting all existing content
$out: 'modulestore.mapped0'
}])
and then
db.modulestore.mapped0.aggregate([{
$graphLookup: {
from: 'modulestore.mapped0',
startWith: '$block_id',
connectToField: 'fields.children',
connectFromField: 'block_id',
as: 'block_ids',
maxDepth: 0
}
}, {
$lookup: {
from: 'modulestore.mapped0',
localField: 'block_ids.fields.children',
foreignField: '_id',
as: 'block_ids.fields.children'
}
}])
Upvotes: 1
Reputation: 13826
Partial solution [gist]:
def update_descendants(modulestore, blocks, ancestor_fields):
"""
:keyword modulestore: modulestore containing the blocks
:type modulestore: ``Collection``
:keyword blocks: iterator over the blocks (collections within modulestore)
:type blocks: ``Cursor`` | `tuple`
:keyword ancestor_fields: fields of the top most ancestor
:type ancestor_fields: ``dict``
"""
for block in blocks:
modulestore.replace_one({'block_id': block['block_id'],
'block_type': block['block_type']},
update_d(block, add={'ancestor_fields': ancestor_fields},
rm=('_id',)))
update_descendants.counter += 1
print 'Updated:', update_descendants.counter
if 'children' in block and block['children']:
for block_type, block_id in block['children']:
update_descendants(modulestore,
modulestore.find({'block_id': block_id,
'block_type': block_type,
'ancestor_fields': {
'$exists': False
}}),
ancestor_fields)
Would prefer a solution that's wholly in the database though, and without all these inefficient queries.
Upvotes: 0