Reputation: 5
I'm trying to work out how to query a document which has two layers of nested arrays.
{
"_id" : ObjectId("5d7fb679d76f3bbf82ed952e"),
"org-name" : "Shropshire Community Health NHS Trust",
"domain" : "shropscommunityhealth.nhs.uk",
"subdomains" : [
{
"name" : "www.shropscommunityhealth.nhs.uk",
"firstSeen" : "2015-10-17 01:10:00",
"a_rr" : "195.49.146.9",
"data_retrieved" : ISODate("2019-09-16T17:21:11.468Z"),
"asn" : 21472,
"asn_org" : "ServerHouse Ltd",
"city" : "Portsmouth",
"country" : "United Kingdom",
"shodan" : {
"ports" : [
{
"port" : 443,
"cpe" : "cpe:/a:microsoft:internet_information_server:8.5",
"product" : "Microsoft IIS httpd"
},
{
"port" : 80,
"cpe" : "cpe:/o:microsoft:windows",
"product" : "Microsoft HTTPAPI httpd"
}
],
"timestamp" : ISODate("2019-09-16T17:21:12.659Z")
}
},
{
"name" : "www2.shropscommunityhealth.nhs.uk",
"firstSeen" : "2017-06-23 16:55:00",
"a_rr" : "80.175.25.17",
"data_retrieved" : ISODate("2019-09-16T17:21:12.663Z"),
"asn" : 8607,
"asn_org" : "Timico Limited",
"city" : null,
"country" : "United Kingdom",
"shodan" : {
"timestamp" : ISODate("2019-09-16T17:21:13.664Z")
}
}
]
}
I want to be able to search through the collection and return all of the subdomains where where there is a match on the port number supplied. So far I've tried (in PyMongo)
result = db.aggregate([{'$match': {'subdomains.shodan.ports.port': port}},
{'$project': {
'subdomains': {'$filter': {
'input': '$subdomains.shodan.ports',
'cond': {'$eq': ['$$this.port', port]}
}}
}}])
When I run this I don't get any results back at all. I've played around with my $filter
but can't seem to get any results out. I'm using a similar aggregation for querying within just the subdomains
array and it works fine, I'm just struggling with the array within an array and wondering if I need a different approach.
Upvotes: 0
Views: 230
Reputation: 3010
The following query can get us the expected output:
db.collection.aggregate([
{
$project:{
"subdomains":{
$filter:{
"input":"$subdomains",
"as":"subdomain",
"cond":{
$in:[
443,
{
$ifNull:[
"$$subdomain.shodan.ports.port",
[]
]
}
]
}
}
}
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d7fb679d76f3bbf82ed952e"),
"org-name" : "Shropshire Community Health NHS Trust",
"domain" : "shropscommunityhealth.nhs.uk",
"subdomains" : [
{
"name" : "www.shropscommunityhealth.nhs.uk",
"firstSeen" : "2015-10-17 01:10:00",
"a_rr" : "195.49.146.9",
"data_retrieved" : ISODate("2019-09-16T17:21:11.468Z"),
"asn" : 21472,
"asn_org" : "ServerHouse Ltd",
"city" : "Portsmouth",
"country" : "United Kingdom",
"shodan" : {
"ports" : [
{
"port" : 443,
"cpe" : "cpe:/a:microsoft:internet_information_server:8.5",
"product" : "Microsoft IIS httpd"
},
{
"port" : 80,
"cpe" : "cpe:/o:microsoft:windows",
"product" : "Microsoft HTTPAPI httpd"
}
],
"timestamp" : ISODate("2019-09-16T17:21:12.659Z")
}
},
{
"name" : "www2.shropscommunityhealth.nhs.uk",
"firstSeen" : "2017-06-23 16:55:00",
"a_rr" : "80.175.25.17",
"data_retrieved" : ISODate("2019-09-16T17:21:12.663Z"),
"asn" : 8607,
"asn_org" : "Timico Limited",
"city" : null,
"country" : "United Kingdom",
"shodan" : {
"timestamp" : ISODate("2019-09-16T17:21:13.664Z")
}
}
]
}
Output:
{
"_id" : ObjectId("5d7fb679d76f3bbf82ed952e"),
"org-name" : "Shropshire Community Health NHS Trust",
"domain" : "shropscommunityhealth.nhs.uk",
"subdomains" : [
{
"name" : "www.shropscommunityhealth.nhs.uk",
"firstSeen" : "2015-10-17 01:10:00",
"a_rr" : "195.49.146.9",
"data_retrieved" : ISODate("2019-09-16T17:21:11.468Z"),
"asn" : 21472,
"asn_org" : "ServerHouse Ltd",
"city" : "Portsmouth",
"country" : "United Kingdom",
"shodan" : {
"ports" : [
{
"port" : 443,
"cpe" : "cpe:/a:microsoft:internet_information_server:8.5",
"product" : "Microsoft IIS httpd"
},
{
"port" : 80,
"cpe" : "cpe:/o:microsoft:windows",
"product" : "Microsoft HTTPAPI httpd"
}
],
"timestamp" : ISODate("2019-09-16T17:21:12.659Z")
}
}
]
}
Upvotes: 0
Reputation: 1274
Try aggregate pipeline below:
db.collection.aggregate([
{
$unwind: "$subdomains"
},
{
$match: {
"subdomains.shodan.ports": {
$elemMatch: {
port: 443
},
$ne: null
}
}
},
{
$group: {
_id: "$_id",
"org-name": {
$last: "$org-name"
},
"domain": {
$last: "$domain"
},
"subdomains": {
$push: "$subdomains"
}
}
}
])
giving output:
[
{
"_id": ObjectId("5d7fb679d76f3bbf82ed952e"),
"domain": "shropscommunityhealth.nhs.uk",
"org-name": "Shropshire Community Health NHS Trust",
"subdomains": [
{
"a_rr": "195.49.146.9",
"asn": 21472,
"asn_org": "ServerHouse Ltd",
"city": "Portsmouth",
"country": "United Kingdom",
"data_retrieved": ISODate("2019-09-16T17:21:11.468Z"),
"firstSeen": "2015-10-17 01:10:00",
"name": "www.shropscommunityhealth.nhs.uk",
"shodan": {
"ports": [
{
"cpe": "cpe:/a:microsoft:internet_information_server:8.5",
"port": 443,
"product": "Microsoft IIS httpd"
},
{
"cpe": "cpe:/o:microsoft:windows",
"port": 80,
"product": "Microsoft HTTPAPI httpd"
}
],
"timestamp": ISODate("2019-09-16T17:21:12.659Z")
}
}
]
}
]
Upvotes: 1