Fran b
Fran b

Reputation: 3036

Mongodb query for picking random documents from a collection until reach a quantity

I have a collection with documents like this:

Songs

{
  "title": "Highway To Hell",
  "duration": 4
  "tags": [{"label":"rock"},{"label":"heavy"}]
}
{
  "title": "Never Be The Same",
  "duration": 3
  "tags": [{"label":"pop"}]
}
{
  "title": "Wake Up",
  "duration": 4
  "tags": [{"label":"metal"},{"label":"heavy"}]
}
{
  "title": "Catharsis",
  "duration": 3
  "tags": [{"label":"metal"},{"label":"heavy"}]
}

And I want query for random songs for a top duration 8 min and with the tag "heavy"

Query result 1

{
  "title": "Highway To Hell",
  "duration": 4
  "tags": [{"label":"rock"},{"label":"heavy"}]
}
{
  "title": "Wake Up",
  "duration": 4
  "tags": [{"label":"metal"},{"label":"heavy"}]
}

Query result 2

{
  "title": "Highway To Hell",
  "duration": 4
  "tags": [{"label":"rock"},{"label":"heavy"}]
}
{
  "title": "Catharsis",
  "duration": 3
  "tags": [{"label":"metal"},{"label":"heavy"}]
}

I have reviewed the $sample operator but you need the number of random documents but in this case the number is not fixed. And a variable is needed to hold the "duration" but I am not sure how accomplish this.

Upvotes: 1

Views: 49

Answers (1)

Saravana
Saravana

Reputation: 12817

can you try this aggregation

  1. $match - match only tags contains heavy
  2. $sample - get n number of samples (order of elements not guranteed)
  3. $group - group by _id for reducing in next stage
  4. $project - with $reduce count the duration and maximum duration for each song and sum it, if max duration is not reached added song to array otherwise ignore it
  5. $unwind - unwind filtered songs
  6. $replaceRoot - to get songs in original document structure

here the assumption is song's duration will be minimum 1 minute, if not increase the sample count

pipeline

    db.songs.aggregate(
      [
        {$match : {"tags.label" : "heavy"}},
        {$sample : {size : 8}},
        {$group : {_id : null, songs : {$push : "$$ROOT"}}},
        {$project : {
          songs : {
            $reduce : {
                input : "$songs",
                initialValue : { filtered : [], total : 0 },
                in : {              
                  total : {$add : ["$$value.total",{$cond :[{$lte : [{$add : ["$$value.total", "$$this.duration"]}, 8]},"$$this.duration", 0]}]},
                  filtered : {$concatArrays : [ "$$value.filtered", {$cond :[{$lte : [{$add : ["$$value.total", "$$this.duration"]}, 8]},["$$this"], []]}]}
                }
              }
            }
          }
        },
        {$unwind : "$songs.filtered"},
        {$replaceRoot : {newRoot : "$songs.filtered"}}
      ]
    ).pretty()

possible outputs generated for the test data in the question

> db.songs.aggregate(   [     {$match : {"tags.label" : "heavy"}},     {$sample : {size : 8}},     {$group : {_id : null, songs : {$push : "$$ROOT"}}},     {$project : {       songs : {         $reduce : {             input : "$songs",             initialValue : { filtered : [], total : 0 },             in : {                             total : {$add : ["$$value.total",{$cond :[{$lte : [{$add : ["$$value.total", "$$this.duration"]}, 8]},"$$this.duration", 0]}]},               filtered : {$concatArrays : [ "$$value.filtered", {$cond :[{$lte : [{$add : ["$$value.total", "$$this.duration"]}, 8]},["$$this"], []]}]}             }           }         }       }     },     {$unwind : "$songs.filtered"},     {$replaceRoot : {newRoot : "$songs.filtered"}}   ] ).pretty()
{
    "_id" : ObjectId("5a7739693eafa689da47ba81"),
    "title" : "Wake Up",
    "duration" : 4,
    "tags" : [
        {
            "label" : "metal"
        },
        {
            "label" : "heavy"
        }
    ]
}
{
    "_id" : ObjectId("5a7739693eafa689da47ba7f"),
    "title" : "Highway To Hell",
    "duration" : 4,
    "tags" : [
        {
            "label" : "rock"
        },
        {
            "label" : "heavy"
        }
    ]
}
> 
> db.songs.aggregate(   [     {$match : {"tags.label" : "heavy"}},     {$sample : {size : 8}},     {$group : {_id : null, songs : {$push : "$$ROOT"}}},     {$project : {       songs : {         $reduce : {             input : "$songs",             initialValue : { filtered : [], total : 0 },             in : {                             total : {$add : ["$$value.total",{$cond :[{$lte : [{$add : ["$$value.total", "$$this.duration"]}, 8]},"$$this.duration", 0]}]},               filtered : {$concatArrays : [ "$$value.filtered", {$cond :[{$lte : [{$add : ["$$value.total", "$$this.duration"]}, 8]},["$$this"], []]}]}             }           }         }       }     },     {$unwind : "$songs.filtered"},     {$replaceRoot : {newRoot : "$songs.filtered"}}   ] ).pretty()
{
    "_id" : ObjectId("5a7739693eafa689da47ba7f"),
    "title" : "Highway To Hell",
    "duration" : 4,
    "tags" : [
        {
            "label" : "rock"
        },
        {
            "label" : "heavy"
        }
    ]
}
{
    "_id" : ObjectId("5a7739693eafa689da47ba82"),
    "title" : "Catharsis",
    "duration" : 3,
    "tags" : [
        {
            "label" : "metal"
        },
        {
            "label" : "heavy"
        }
    ]
}
> db.songs.aggregate(   [     {$match : {"tags.label" : "heavy"}},     {$sample : {size : 8}},     {$group : {_id : null, songs : {$push : "$$ROOT"}}},     {$project : {       songs : {         $reduce : {             input : "$songs",             initialValue : { filtered : [], total : 0 },             in : {                             total : {$add : ["$$value.total",{$cond :[{$lte : [{$add : ["$$value.total", "$$this.duration"]}, 8]},"$$this.duration", 0]}]},               filtered : {$concatArrays : [ "$$value.filtered", {$cond :[{$lte : [{$add : ["$$value.total", "$$this.duration"]}, 8]},["$$this"], []]}]}             }           }         }       }     },     {$unwind : "$songs.filtered"},     {$replaceRoot : {newRoot : "$songs.filtered"}}   ] ).pretty()
{
    "_id" : ObjectId("5a7739693eafa689da47ba82"),
    "title" : "Catharsis",
    "duration" : 3,
    "tags" : [
        {
            "label" : "metal"
        },
        {
            "label" : "heavy"
        }
    ]
}
{
    "_id" : ObjectId("5a7739693eafa689da47ba81"),
    "title" : "Wake Up",
    "duration" : 4,
    "tags" : [
        {
            "label" : "metal"
        },
        {
            "label" : "heavy"
        }
    ]
}
> 

Upvotes: 1

Related Questions