Reputation: 153
First of all i must say i don't have solid experience with mongoDB and probably due to this can't solve following issue. Problem is that i need to aggregate records not by full clock hours but by the hour starts when first record comes in and one hour away from the previous event.
Example:
Time | Counter |
---|---|
no records yet | 0 |
11:55 | 1 |
11:58 | 1 |
12:02 | 1 |
12:05 | 1 |
12:55 | 1 |
12:56 | 2 |
13:04 | 2 |
It's quite clear how to aggregate by minutes, hours, day etc but not clear if i can reach desired aggregation with mongodb only.
UPDATE
Data sample:
{
"_id":{
"$oid":"5f7eddd73b0e3e7259cf18b9"
},
"agentCodeSystem":"2.16.840.1.113883.2.4.3.118.2.1",
"agentCode":"653259001653360",
"agentName":"Diabetic",
"date":"20201008",
"organisationId":"97490137",
"user":{
"identifiers":[
{
"codeSystemName":"organisation/user",
"oid":"2.16.840.1.113883.2.4.3.118.2.1",
"code":"653259001653360",
"displayName":""
}
],
"organisation":{
"identifiers":[
{
"codeSystemName":"my-system-name",
"oid":"2.16.840.1.113883.2.4.6.1",
"code":"97490137",
"displayName":""
}
],
"type":"Organisation type",
"name":"My org name"
},
"overseer":{
"identifiers":[
{
"codeSystemName":"my code system name",
"oid":"2.16.840.1.113883.2.4.3.118.2.1",
"code":"653259001653360",
"displayName":""
}
],
"name":"Diabetic",
"role":{
"identifiers":[
{
"codeSystemName":"my code system name",
"oid":"2.16.840.1.113883.2.4.4.30.2",
"code":"99",
"displayName":"overig"
}
],
"name":"Overig"
}
},
"role":{
"identifiers":[
{
"codeSystemName":"my code system name",
"oid":"2.16.840.1.113883.2.4.4.30.2",
"code":"99",
"displayName":"overig"
}
],
"name":"Overig"
},
"name":"Diabetic"
},
"event":{
"identifiers":[
{
"codeSystemName":"my code system name",
"oid":"2.16.840.1.113883.2.4.3.118.2.13",
"code":"uid",
"displayName":""
}
],
"eventType":{
"codeSystemName":"my code system name",
"oid":"2.16.840.1.113883.2.4.3.118.2.1",
"code":"5",
"displayName":""
},
"actionCode":"R",
"dateTime":"20200612165713",
"date":"2019-12-13",
"additionalText":"MyService"
},
"userSubjects":[
{
"identifiers":[
{
"codeSystemName":"my code system name",
"oid":"2.16.840.1.113883.2.4.3.118.2.1",
"code":"653259001653360",
"displayName":""
}
],
"type":"",
"name":"Diabetic",
"gender":"Man",
"birthDate":"2019-10-09",
"organisationId":"97490137",
"organisationName":"my org name"
}
],
"organisationSubjects":[
{
"identifiers":[
{
"codeSystemName":"my code system name",
"oid":"2.16.840.1.113883.2.4.6.1",
"code":"123214453",
"displayName":""
}
],
"type":"org type",
"name":"org name"
}
]
}
{
"_id":{
"$oid":"5f7eddd73b0e3e7259cf18b9"
},
"agentCodeSystem":"2.16.840.1.113883.2.4.3.118.2.1",
"agentCode":"653259001653360",
"agentName":"Diabetic",
"date":"20201008",
"organisationId":"97490137",
"user":{
"identifiers":[
{
"codeSystemName":"organisation/user",
"oid":"2.16.840.1.113883.2.4.3.118.2.1",
"code":"653259001653360",
"displayName":""
}
],
"organisation":{
"identifiers":[
{
"codeSystemName":"my-system-name",
"oid":"2.16.840.1.113883.2.4.6.1",
"code":"97490137",
"displayName":""
}
],
"type":"Organisation type",
"name":"My org name"
},
"overseer":{
"identifiers":[
{
"codeSystemName":"my code system name",
"oid":"2.16.840.1.113883.2.4.3.118.2.1",
"code":"653259001653360",
"displayName":""
}
],
"name":"Diabetic",
"role":{
"identifiers":[
{
"codeSystemName":"my code system name",
"oid":"2.16.840.1.113883.2.4.4.30.2",
"code":"99",
"displayName":"overig"
}
],
"name":"Overig"
}
},
"role":{
"identifiers":[
{
"codeSystemName":"my code system name",
"oid":"2.16.840.1.113883.2.4.4.30.2",
"code":"99",
"displayName":"overig"
}
],
"name":"Overig"
},
"name":"Diabetic"
},
"event":{
"identifiers":[
{
"codeSystemName":"my code system name",
"oid":"2.16.840.1.113883.2.4.3.118.2.13",
"code":"uid",
"displayName":""
}
],
"eventType":{
"codeSystemName":"my code system name",
"oid":"2.16.840.1.113883.2.4.3.118.2.1",
"code":"5",
"displayName":""
},
"actionCode":"R",
"dateTime":"20200612155713",
"date":"2019-12-13",
"additionalText":"MyService"
},
"userSubjects":[
{
"identifiers":[
{
"codeSystemName":"my code system name",
"oid":"2.16.840.1.113883.2.4.3.118.2.1",
"code":"653259001653360",
"displayName":""
}
],
"type":"",
"name":"Diabetic",
"gender":"Man",
"birthDate":"2019-10-09",
"organisationId":"97490137",
"organisationName":"my org name"
}
],
"organisationSubjects":[
{
"identifiers":[
{
"codeSystemName":"my code system name",
"oid":"2.16.840.1.113883.2.4.6.1",
"code":"123214453",
"displayName":""
}
],
"type":"org type",
"name":"org name"
}
]
}
Upvotes: 0
Views: 155
Reputation: 59602
Your sample data is still useless! Remove all non-relevant stuff. And give us more than just 2 documents in order to cover all requirements.
Anyway, I will try my best.
It is a very bad design to store date/time values as string (or number). Use always proper Date
objects.
db.collection.aggregate([
// convert string into proper `Date` object
{ $set: { ts: { $dateFromString: { dateString: "event.dateTime", format: "%Y%m%d%H:%M:%S" } } } }
{ $sort: { ts: 1 } },
// put documents into an array
{ $group: { _id: { date: "$event.date", organisationId: "$organisationId" }, data: { $push: "$$ROOT" } } },
{
$set: {
data: {
$reduce: {
input: "$data",
initialValue: [],
in: {
$concatArrays: [
"$$value",
[
{
$cond: {
// compare timestamp with boundary of previous element
if: { $gt: ["$$this.ts", { $last: "$$value.boundary" }] },
// new interval: increase bucket number and set new boudnary
then: {
$mergeObjects: [
"$$this", {
bucket: { $add: [{ $ifNull: [{ $last: "$$value.bucket" }, 0] }, 1] },
boundary: { $add: ["$$this.ts", 1000 * 60 * 60] }
}
]
},
// same interval: append element
else: { $mergeObjects: ["$$this", { boundary: { $last: "$$value.boundary" }, bucket: { $last: "$$value.bucket" } }] }
}
}
]
]
}
}
}
}
},
{ $unwind: "$data" }, // transpose array back to documents
// count the documents by bucket number
{
$group: {
_id: { date: "$data.date", organisationId: "$data.organisationId", bucket: "$data.bucket" },
ts_min: { $min: "$data.ts" },
ts_max: { $max: "$data.ts" },
count: { $sum: 1 }
}
},
// some cosmetic
{ $replaceRoot: { newRoot: { $mergeObjects: ["$$ROOT", "$_id"] } } },
{ $unset: "_id" }
])
Upvotes: 1
Reputation: 5715
As this is not a straightforward task in MongoDB, you will be better off doing it in your business logic. The result you want to achieve is basically a form of aggregation over ranges. The things is, your ranges are not a predefined set, but are dynamic and calculated based on the first entry + additional time value. So, basically you have:
[firstTime -> firstTime + 60m]
, [firstTime + 60m + 1s -> firstTime + 60m + 60m]
, ... to infinity. It is doable but requires either hacky dynamic expressions for the grouping stage or projection calculations. Both of those approaches seem unnecessary complex for your use case.
You can check some related scenarios here and here if you really wanna go that route.
If you decide to do it in your business logic, depending on your data set size, you can batch the data as to not exceed your available memory, but this should be a problem only if you calculate the aggregate for the first time with a decent sized set of unprocessed data. Also, in any case, you should cache the calculated results as they won't be changing(except the last open date-range, of course). Considering you're doing this for statistics and you're not performance constrained, you should definitely do yourself a favour and go the more maintainable way of doing it in code.
Upvotes: 0