Reputation: 650
Here is a playground I've done to ease the finding of the answer https://mongoplayground.net/p/rthdTzpHkZ1
I have a collection of items
[
{
"_id": ObjectId("611ba6d26aaaf2446f7fc45e"),
"hashtags": [
"test1",
"test2"
],
"hour": ISODate("2021-08-16T04:00:00.000Z"),
"username": "username1",
"lang": "fr",
},
{
"_id": ObjectId("611ba6d26aaaf2446f7fc45f"),
"hashtags": [
"test1"
],
"hour": ISODate("2021-08-09T04:00:00.000Z"),
"lang": "fr",
"username": "username1"
},
{
"_id": ObjectId("611ba6d26aaaf2446f7fc460"),
"hashtags": [
"test2",
"test3"
],
"hour": ISODate("2021-08-16T04:00:00.000Z"),
"lang": "en",
"username": "username2"
},
{
"_id": ObjectId("611ba6d26aaaf2446f7fc462"),
"hashtags": [
"test2",
"test3"
],
"hour": ISODate("2021-08-16T04:00:00.000Z"),
"lang": "en",
"username": "username2"
}
]
And want to
hour
hashtag
, lang
and username
as keys along with the number of times they appearedhashtag
, lang
and username
alphabeticallylike so
[
{
"_id": {
"hour": ISODate("2021-08-16T04:00:00Z")
},
"hashtags": {
"test1": 1,
"test2": 3,
"test3": 2
},
"languages": {
"en": 2,
"fr": 1
},
"usernames": {
"username1": 1,
"username2": 2
},
"nbLikes": 0,
"nbQuotes": 0,
"nbRetweets": 0,
"nbTweets": 3
},
{
"_id": {
"hour": ISODate("2021-08-09T04:00:00Z")
},
"hashtags": {
"test1": 1
},
"languages": {
"fr": 1
},
"usernames": {
"username1": 1
},
"nbLikes": 0,
"nbQuotes": 0,
"nbRetweets": 0,
"nbTweets": 1
}
]
The closest I got is in the playground here https://mongoplayground.net/p/rthdTzpHkZ1
Thanks for your help
Upvotes: 1
Views: 44
Reputation: 13103
Well, you need to use the $arrayToObject operator to create a dicctionary with language
, hashtags
and username
as keys.
$map + $reduce
["en", "fr", "en"] ---------------> [{k:"en", v:2}, {k:"fr", v:1}, {k:"en", v:2}]
[{k:"en", v:2}, $arrayToObject
{k:"fr", v:1}, ---------------> {"en":2, "fr":1}
{k:"en", v:2}]
db.collection.aggregate([
{
"$group": {
"_id": {
"hour": "$hour"
},
"nbTweets": {
"$sum": 1
},
"nbRetweets": {
"$sum": "$retweetCount"
},
"nbQuotes": {
"$sum": "$retweetCount"
},
"nbLikes": {
"$sum": "$likeCount"
},
usernames: {
"$push": "$username"
},
hashtags: {
"$push": "$hashtags"
},
languages: {
"$push": "$lang"
}
}
},
{
$addFields: {
hashtags: {
$reduce: {
input: "$hashtags",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this"
]
}
}
},
}
},
{
"$addFields": {
"languages": {
"$arrayToObject": [
{
$map: {
input: "$languages",
as: "lang",
in: {
k: "$$lang",
v: {
$reduce: {
input: "$languages",
initialValue: 0,
in: {
$cond: [
{
$eq: [
"$$lang",
"$$this"
]
},
{
$add: [
"$$value",
1
]
},
"$$value"
]
}
}
}
}
}
}
]
},
"hashtags": {
"$arrayToObject": [
{
$map: {
input: "$hashtags",
as: "hash",
in: {
k: "$$hash",
v: {
$reduce: {
input: "$hashtags",
initialValue: 0,
in: {
$cond: [
{
$eq: [
"$$hash",
"$$this"
]
},
{
$add: [
"$$value",
1
]
},
"$$value"
]
}
}
}
}
}
}
]
},
"usernames": {
"$arrayToObject": [
{
$map: {
input: "$usernames",
as: "user",
in: {
k: "$$user",
v: {
$reduce: {
input: "$usernames",
initialValue: 0,
in: {
$cond: [
{
$eq: [
"$$user",
"$$this"
]
},
{
$add: [
"$$value",
1
]
},
"$$value"
]
}
}
}
}
}
}
]
}
}
},
{
"$sort": {
"_id.hour": -1
},
}
])
Upvotes: 1