Jeff Shoemaker
Jeff Shoemaker

Reputation: 91

mongodb aggregation help. Remove a group by field

I need to group by an $hour field so I can match for specific hours in my query, but the end results I do not want that _id.hour in the output. I tried adding a 2nd group by, but the results are still showing multiple records of the same keywordid because it's returning results based on the first groupby which has multiple hours in the results.

db.getCollection("yahoohourlyquick").aggregate([{
"$match": {
     "pid" : {"$in" : [759]},
        "date":      {"$gte": new ISODate('2024-03-29')},
        "clicks":    {"$gt": 0},
        "graphdata": {"$regex": "^rs-"},
    },
},
{
    "$group": {
        "_id": {
            "date":   {"$hour" : "$date"},
            "pid":       "$pid",
            "keywordid": "$graphdata",
        },
        "rpc": {"$avg": "$cpc"},
        "clicks" : {"$sum" : "$clicks"},
    },
},
{
    "$match" : {
      "_id.date" : {$in:[1,2,3]}, 
      "clicks" : {"$gte" : 10}
    }
},
{
    "$group": {
        "_id": {
            "pid":       "$pid",
            "keywordid": "$graphdata",
        },
        "data" : {$push: {
            "clicks" : "$clicks",
            "rpc" : "$rpc",
            "keywordid" : "$_id.keywordid",
            "pid" : "$_id.pid"
        }}
    },
},
{$unwind : "$data"},
{
    "$project": {
          _id : 0,
          "pid" : "$data.pid",
          "keywordid" : "$data.keywordid",
          "clicks" : "$data.clicks",
          "rpc" : "$data.rpc"
     },
},
{
   "$sort" : {"rpc":-1}
   }
])  

My goals is to just have the output contain PID,KEYWORDID,SUM OF THE CLICKS, AVERAGE RPCS. There should not be rows with the same keywordid but I can't seem to solve this problem.

Example results when I run this: enter image description here

you can see keywordid "rs-2391021771" is in there twice because it has data for multiple hours. I don't want each hour broken out in the results.

Upvotes: 0

Views: 38

Answers (1)

Jeff Shoemaker
Jeff Shoemaker

Reputation: 91

I finally got it working.

db.getCollection("yahoohourlyquick").aggregate([{
"$match": {
     "pid" : {"$in" : [759]},
        "date":      {"$gte": new ISODate('2024-03-29')},
        "clicks":    {"$gt": 0},
        "graphdata": {"$regex": "^rs-"},
    },
},
{
    "$project": {
          "hour":   {"$hour" : "$date"},
          "pid" : "$pid",
          "keywordid" : "$graphdata",
          "rpc" : "$cpc",
          "clicks" : "$clicks"
     },
},
{
    "$match" : {
      "hour" : {$in:[23,0,1]}, 
    }
},
{
    "$group": {
        "_id": {
            "pid":       "$pid",
            "keywordid": "$keywordid",
        },
        "rpc": {"$avg": "$rpc"},
        "clicks" : {"$sum" : "$clicks"},
    },
},
{
    "$match" : {
      "clicks" : {"$gte" : 10}
    }
},
{
    "$project": {
          "pid" : "$_id.pid",
          "keywordid" : "$_id.keywordid",
          "rpc" : "$rpc",
          "clicks" : "$clicks"
     },
},
{
   "$sort" : {"rpc":-1}
   } 
])

Upvotes: 0

Related Questions