Mauricio Herrera
Mauricio Herrera

Reputation: 51

Mongo query to get 1 document by day with time param

I'm having trouble getting the documents of the desired dates from my DB.

My data base structure is something like this:

{
  date: 2019-10-24T10:00:00.000Z,
  value: 6
},
{
  date: 2019-10-24T10:01:03.000Z,
  value: 6
},
{
  date: 2019-10-24T10:02:35.000Z,
  value: 6
},
...

And trying to get a query that takes 2 parameters:

And delivers 1 document per day between startDate and endDate, this document needs to have the time (HH:mm:ss) closest to the one of endDate. Something like this:

start = new Date('2019-10-22T10:00:00.000Z')
end = new Date('2019-10-24T10:00:00.000Z)

{
  date: '2019-10-22T09:59:13.000Z',
  value: 10
},
{
  date: '2019-10-23T10:00:00.000Z',
  value: 17
},
{
  date: '2019-10-24T09:58:55.000Z',
  value: 10
}

Right now I havbe this aggregate function that gets me the first document of each day:

.aggregate([
    {
      $match: {
        date: { $gte: ISODate('2019-05-26T12:00:00.000Z'), $lte: ISODate('2019-10-26T12:00:00.000Z') }
      }
    },
    {
      $project: {
        _id: 1,
        value: 1,
        docDate: '$date',
        day: {
          '$dayOfMonth': '$date'
        },
        month: {
          '$month': '$date'
        },
        year: {
          '$year': '$date'
        }
      }
    },
    {
      $project: {
        _id: 1,
        value: 1,
        docDate: 1,
        date: {
          '$concat': [
            {
              $substr: ['$year', 0, 4]
            },
            '-',
            {
              $substr: ['$month', 0, 2]
            },
            '-',
            {
              $substr: ['$day', 0, 2]
            }
          ]
        }
      }
    },
    {
      $group: {
        _id: '$date',
        objId: {
          $first: '$_id'
        },
        value: {
          $first: '$value'
        },
        date: {
          $first: '$docDate'
        }
      }
    },
    {
      $project: {
        _id: '$objId',
        value: 1,
        date: 1
      }
    },
    {
      '$sort': {
        date: 1
      }
    }
  ]
)

Upvotes: 2

Views: 998

Answers (2)

Mauricio Herrera
Mauricio Herrera

Reputation: 51

I found the solution, I added minutes variable and matched using it. Also changed the group data to get $last instead of $first.

db.getCollection('weightedaverages').aggregate([
    {
      $match: {
        date: { $gte: ISODate('2019-05-26T15:00:00.000Z'), $lte: ISODate('2019-10-26T15:00:00.000Z') }
      }
    },
    {
      $project: {
        _id: 1,
        value: 1,
        docDate: '$date',
        day: {
          '$dayOfMonth': '$date'
        },
        month: {
          '$month': '$date'
        },
        year: {
          '$year': '$date'
        },
        minutes: {
          $add: [
            {
              $multiply: [
                {
                  '$hour': '$date'
                },
                60
              ] 
            }, 
            {
              '$minute': '$date'
            } 
          ]
        } 
      }
    },
    { $match: { 'minutes' : { $lte : ISODate('2019-10-26T13:20:00.000Z').getHours() * 60 + ISODate('2019-10-26T13:20:00.000Z').getMinutes()}}},
    {
      $project: {
        _id: 1,
        value: 1,
        docDate: 1,
        date: {
          '$concat': [
            {
              $substr: ['$year', 0, 4]
            },
            '-',
            {
              $substr: ['$month', 0, 2]
            },
            '-',
            {
              $substr: ['$day', 0, 2]
            }
          ]
        }
      }
    },
    {
      $group: {
        _id: '$date',
        objId: {
          $last: '$_id'
        },
        value: {
          $last: '$value'
        },
        date: {
          $last: '$docDate'
        }
      }
    },
    {
      $project: {
        _id: '$objId',
        value: 1,
        date: 1
      }
    },
    {
      '$sort': {
        date: 1
      }
    }
  ]
)

Output:

{
  "date" : ISODate("2019-08-20T15:00:13.633Z"),
  "value" : 10
},
{
  "date" : ISODate("2019-08-19T15:00:19.850Z"),
  "value" : 5
},
...
{
  "date" : ISODate("2019-06-20T14:59:48.000Z"),
  "value" : 7
}

Upvotes: 1

barrypicker
barrypicker

Reputation: 10088

So I think this query might be what you are seeking...

db.records.aggregate([
    { $match: { 
         date: { 
           $gte: ISODate('2019-05-03T12:01:07Z'), 
           $lte: ISODate('2019-05-07T12:01:07Z') 
        }
      }
    },
    { $addFields:  {
         date_string: { $dateToString: { format: "%Y-%m-%d", date: "$date" } }
       }
    },
    { $sort: { date: -1 } },
    { $group: {
        _id: "$date_string",
        my_doc: { $first: "$$ROOT" }
      }
    },
    { $replaceRoot: { newRoot: "$my_doc" } },
    { $project: { date_string: 0 } }, 
    { $sort: { date: 1 } }
])

Breakdown of pipeline strategy:

  1. $match - select the range of records to consider by date range

  2. $addFields - get just the date string without time. This is needed for $group to actually group upon. If the string contained the time too it would not group properly

  3. $sort date -1 = this is needed prior to grouping so we can use the $first operator in $group

  4. $group - aggregate and use $first to select the item in the grouping

  5. $replaceRoot - transpose the pipeline output back into the original document format

  6. $project - remove the temporary date_string field

  7. $sort date 1 - put the data back in normal sorted order.

Output:

{ "_id" : ObjectId("5db21090d15e418f2d5b7b4e"), "value" : 3, "date" : ISODate("2019-05-03T12:01:07Z") }
{ "_id" : ObjectId("5db21090d15e418f2d5b7b4f"), "value" : 4, "date" : ISODate("2019-05-04T12:07:07Z") }
{ "_id" : ObjectId("5db21090d15e418f2d5b7b53"), "value" : 5, "date" : ISODate("2019-05-05T12:01:07Z") }
{ "_id" : ObjectId("5db21090d15e418f2d5b7b54"), "value" : 6, "date" : ISODate("2019-05-06T12:01:07Z") }
{ "_id" : ObjectId("5db21090d15e418f2d5b7b55"), "value" : 7, "date" : ISODate("2019-05-07T12:01:07Z") }

Upvotes: 2

Related Questions