Reputation: 63
I have a collection where I'm storing water dispensed for a particular day. Now for some days when the device isn't operated the data isn't stored in the database and I won't be getting the data in the collection. For example, I am querying water dispensed for the last 7 days where the device only operated for two day gives me something like this:
[{
"uID" : "12345678",
"midNightTimeStamp" : NumberInt(1645381800),
"waterDispensed" : NumberInt(53)
},
{
"uID" : "12345678",
"midNightTimeStamp" : NumberInt(1645641000),
"waterDispensed" : NumberInt(30)
}]
Converting the above two timestamps gives me data for Monday 21st February and Thursday 24th February. Now if I run the query for 21st Feb to 27th Feb something like this,
db.getCollection("analytics").find({ uID: "12345678", midNightTimeStamp: {"$in": [1645381800, 1645468200, 1645554600, 1645641000, 1645727400, 1645813800, 1645900200]}})
This returns me above two documents only, how to fill missing values for midNightTimeStamp supplied to get the document list like this which doesn't exists:
[{
"uID" : "12345678",
"midNightTimeStamp" : 1645381800,
"waterDispensed" : 53
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645468200,
"waterDispensed" : 0
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645554600,
"waterDispensed" : 0
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645641000,
"waterDispensed" : 30
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645727400,
"waterDispensed" : 0
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645813800,
"waterDispensed" : 0
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645900200,
"waterDispensed" : 0
}
Upvotes: 1
Views: 898
Reputation: 195
As of MongoDB 5.1 you can use the $densify
aggregation operator to fill in missing time series data with an average or default value.
https://www.mongodb.com/docs/rapid/reference/operator/aggregation/densify/
In your case, you may need to convert your timestamp field to a date while aggregating so that you can use $densify
.
You can also watch a quick explanation of $densify
in this presentation from MongoDB World 2022.
Upvotes: 0
Reputation: 11942
This is just a little different than the other answer, and it takes care to just grab the "uID"
desired. Comments in the MQL explain the process.
db.collection.aggregate([
{ // The uID we want
"$match": { "uID": "12345678" }
},
{ // grab all the uID docs as "water"
// keep uID
"$group": {
"_id": null,
"water": { "$push": "$$CURRENT" },
"uID": { "$first": "$uID" }
}
},
{ // create outArray
"$set": {
"outArray": {
// by mapping time vals
"$map": {
"input": {
"$range": [ NumberInt(1645381800), NumberInt(1645900200), 86400 ]
},
"in": {
"$cond": [
{ // already have doc?
"$in": [ "$$this", "$water.midNightTimeStamp" ]
},
{ // yes! Get it!
"$arrayElemAt": [
"$water",
{ "$indexOfArray": [ "$water.midNightTimeStamp", "$$this" ] }
]
},
{ // no, create it
"uID": "$uID",
"midNightTimeStamp": "$$this",
"waterDispensed": 0
}
]
}
}
}
}
},
{ // only need outArray now
"$project": {
"_id": 0,
"outArray": 1
}
},
{ // create docs
"$unwind": "$outArray"
},
{ // hoist them
"$replaceWith": "$outArray"
},
{ // don't need _id
"$unset": "_id"
}
])
Try it on mongoplayground.net.
Upvotes: 0
Reputation: 10737
Maybe something like this:
db.collection.aggregate([
{
$group: {
_id: null,
ar: {
$push: "$$ROOT"
},
mind: {
"$min": "$midNightTimeStamp"
},
maxd: {
"$max": "$midNightTimeStamp"
}
}
},
{
$project: {
ar: {
$map: {
input: {
$range: [
"$mind",
{
"$sum": [
"$maxd",
86400
]
},
86400
]
},
as: "dateInRange",
in: {
$let: {
vars: {
dateIndex: {
"$indexOfArray": [
"$ar.midNightTimeStamp",
"$$dateInRange"
]
}
},
in: {
$cond: {
if: {
$ne: [
"$$dateIndex",
-1
]
},
then: {
$arrayElemAt: [
"$ar",
"$$dateIndex"
]
},
else: {
midNightTimeStamp: "$$dateInRange",
"waterDispensed": NumberInt(0)
}
}
}
}
}
}
}
}
},
{
$unwind: "$ar"
},
{
$project: {
_id: 0,
"waterDispensed": "$ar.waterDispensed",
midNightTimeStamp: "$ar.midNightTimeStamp",
"Date": {
$toDate: {
"$multiply": [
"$ar.midNightTimeStamp",
1000
]
}
}
}
}
])
Explained:
Upvotes: 1