Reputation: 1135
My document structure is shown here:
{
"_id" : ObjectId("52517d9e6e0af435ddd48219"),
"date" : ISODate("2013-10-06T18:11:26.329Z"),
"engines" : {
"ahnlab" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "ahnlab",
"node_name" : "",
"status" : 1,
"task_id" : "7fac4f67-2bde-49de-980a-4f7fa1d46db7",
"threat" : "EICAR_Test_File"
},
"avast" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "avast",
"node_name" : "",
"status" : 4,
"task_id" : "fdce50e9-9bf3-4cc9-91a4-b674a108d478",
"threat" : ""
},
"avg" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "avg",
"node_name" : "",
"status" : 1,
"task_id" : "4bdb4a37-80ab-4631-8587-edcbde7e2592",
"threat" : "EICAR_Test"
},
"avira" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "avira",
"node_name" : "",
"status" : 4,
"task_id" : "43b43bab-ff67-440c-9919-f6241ccaf539",
"threat" : ""
},
"bitdefender" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "bitdefender",
"node_name" : "",
"status" : 4,
"task_id" : "ba7efe74-ef93-465f-b6c3-6c4fc746934d",
"threat" : ""
},
"comodo" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "comodo",
"node_name" : "",
"status" : 4,
"task_id" : "831d2dc8-704b-4eb7-9a75-9a0364a8ab09",
"threat" : ""
},
"drweb" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "drweb",
"node_name" : "",
"status" : 4,
"task_id" : "ebb758c3-9146-4a99-b36f-0fb6ee024a33",
"threat" : ""
},
"f-prot" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "f-prot",
"node_name" : "",
"status" : 4,
"task_id" : "13590a25-ac0d-4b1b-b93e-bc715009432a",
"threat" : ""
},
"forticlient" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "forticlient",
"node_name" : "",
"status" : 4,
"task_id" : "",
"threat" : ""
},
"kaspersky" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "kaspersky",
"node_name" : "",
"status" : 4,
"task_id" : "",
"threat" : ""
},
"malwarebytes" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "malwarebytes",
"node_name" : "",
"status" : 4,
"task_id" : "a97f3d9a-d6a9-44df-8355-c053e9f4980a",
"threat" : ""
},
"mcafee" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "mcafee",
"node_name" : "",
"status" : 1,
"task_id" : "e7a592bb-84fc-4c47-a1ed-3719874b19cc",
"threat" : "EICAR test file"
},
"msessentials" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "msessentials",
"node_name" : "",
"status" : 4,
"task_id" : "01c9fb71-155a-473d-b45c-91fa117ae649",
"threat" : ""
},
"nod32" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "nod32",
"node_name" : "",
"status" : 4,
"task_id" : "a715cc8f-0e2f-4698-b883-a35fa6add13e",
"threat" : ""
},
"norman" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "norman",
"node_name" : "",
"status" : 4,
"task_id" : "29da2955-0674-45d2-ac4d-c0b3ea401cba",
"threat" : ""
},
"norton" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "norton",
"node_name" : "",
"status" : 4,
"task_id" : "c4cd1e3e-2f0d-4bf6-84cf-ab0962f7f4ed",
"threat" : ""
},
"panda" : {
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "panda",
"node_name" : "",
"status" : 4,
"task_id" : "b0888f7c-e4a4-4b4e-b163-21d283e166f1",
"threat" : ""
}
},
"expiration_date" : ISODate("2013-10-06T18:11:36.329Z"),
"file_name" : "ffgtr.exe",
"scan_status" : "DONE",
"task_id" : "4ce4ae9e-ef0a-476a-8189-92a5bfe328bd"
}
I would like to project with aggregation the fields 'date' and 'definitions' of every engine. ('definitions' is an embedded field)
I succeeded doing it only if I specify the specific engine:
cursor = collection.aggregate([
{ "$match": { "date": { "$gte": startdate } } },
{'$project': { "def": "$engines.avast.definitions","date":1,"_id": 0 }}
], allowDiskUse=True)
But I would like to run the query for all engines without specifying specific engine, something like:
cursor = collection.aggregate([
{ "$match": { "date": { "$gte": startdate } } },
{'$project': { "def": "$engines.$elemMatch.definitions","date":1,"_id": 0 }}
], allowDiskUse=True)
(which doesn't work)
What I want to find is the average difference between "date" and each "engine" definition date per engine type.
Upvotes: 2
Views: 67
Reputation: 50426
The main problem here is the structure of your data. In order to work with the aggregation framework and also happily work with most other MongoDB query operations and indexing practices then your data should be structured like this instead.
{
"_id" : ObjectId("52517d9e6e0af435ddd48219"),
"date" : ISODate("2013-10-06T18:11:26.329Z"),
"engines" : [
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "ahnlab",
"node_name" : "",
"status" : 1,
"task_id" : "7fac4f67-2bde-49de-980a-4f7fa1d46db7",
"threat" : "EICAR_Test_File"
},
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "avast",
"node_name" : "",
"status" : 4,
"task_id" : "fdce50e9-9bf3-4cc9-91a4-b674a108d478",
"threat" : ""
},
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "avg",
"node_name" : "",
"status" : 1,
"task_id" : "4bdb4a37-80ab-4631-8587-edcbde7e2592",
"threat" : "EICAR_Test"
},
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "avira",
"node_name" : "",
"status" : 4,
"task_id" : "43b43bab-ff67-440c-9919-f6241ccaf539",
"threat" : ""
},
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "bitdefender",
"node_name" : "",
"status" : 4,
"task_id" : "ba7efe74-ef93-465f-b6c3-6c4fc746934d",
"threat" : ""
},
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "comodo",
"node_name" : "",
"status" : 4,
"task_id" : "831d2dc8-704b-4eb7-9a75-9a0364a8ab09",
"threat" : ""
},
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "drweb",
"node_name" : "",
"status" : 4,
"task_id" : "ebb758c3-9146-4a99-b36f-0fb6ee024a33",
"threat" : ""
},
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "f-prot",
"node_name" : "",
"status" : 4,
"task_id" : "13590a25-ac0d-4b1b-b93e-bc715009432a",
"threat" : ""
},
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "forticlient",
"node_name" : "",
"status" : 4,
"task_id" : "",
"threat" : ""
},
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "kaspersky",
"node_name" : "",
"status" : 4,
"task_id" : "",
"threat" : ""
},
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "malwarebytes",
"node_name" : "",
"status" : 4,
"task_id" : "a97f3d9a-d6a9-44df-8355-c053e9f4980a",
"threat" : ""
},
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "mcafee",
"node_name" : "",
"status" : 1,
"task_id" : "e7a592bb-84fc-4c47-a1ed-3719874b19cc",
"threat" : "EICAR test file"
},
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "msessentials",
"node_name" : "",
"status" : 4,
"task_id" : "01c9fb71-155a-473d-b45c-91fa117ae649",
"threat" : ""
},
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "nod32",
"node_name" : "",
"status" : 4,
"task_id" : "a715cc8f-0e2f-4698-b883-a35fa6add13e",
"threat" : ""
},
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "norman",
"node_name" : "",
"status" : 4,
"task_id" : "29da2955-0674-45d2-ac4d-c0b3ea401cba",
"threat" : ""
},
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "norton",
"node_name" : "",
"status" : 4,
"task_id" : "c4cd1e3e-2f0d-4bf6-84cf-ab0962f7f4ed",
"threat" : ""
},
{
"definitions" : ISODate("2000-01-01T00:00:00.000Z"),
"name" : "panda",
"node_name" : "",
"status" : 4,
"task_id" : "b0888f7c-e4a4-4b4e-b163-21d283e166f1",
"threat" : ""
}
],
"expiration_date" : ISODate("2013-10-06T18:11:36.329Z"),
"file_name" : "ffgtr.exe",
"scan_status" : "DONE",
"task_id" : "4ce4ae9e-ef0a-476a-8189-92a5bfe328bd"
}
Then working with that structure is as simple as processing $unwind
and then a $group
for your results:
db.collection.aggregate([
{ "$unwind": "$engines" },
{ "$group": {
"_id": "$engines.name",
"avgInterval": { "$avg": {
"$subtract": [ "$date", "$engines.definitions" ]
}}
}}
])
The difference will be in milliseconds between two date objects.
Otherwise you are stuck with traversing the object keys in code using JavaScript with mapReduce:
db.collection.mapReduce(
function() {
var doc = this;
Object.keys(doc.engines).forEach(function(key) {
emit( key,
( doc.date.valueOf() -
doc.engines[key].definitions.valueOf())
);
})
},
function(key,values) {
return ( Array.sum(values) / values.length );
},
{ "out": { "inline": 1 } }
);
Both will give you the average value for each engine "name" accross the collection or other query input.
Try to change the structure as it is better suited to your ongoing needs and provides much faster processing.
Upvotes: 2