Parthiban Rajendran
Parthiban Rajendran

Reputation: 450

How to avoid time gaps while aggregating OHLC in mongoDB

I am trying to extract ohlc 5 min interval from a 1 min interval DB stored in mongoDB. Below is my current query.

    myModel.aggregate([
    {"$project":
      {
        "data":
        {
          "$let":
          {
            "vars":
            {
              "mints":{"$arrayElemAt":[{"$arrayElemAt":["$data",0]},0]},
              "maxts":{"$arrayElemAt":[{"$arrayElemAt":["$data",-1]},0]}
            },
            "in":
            {
              "$map":
              {
                "input":{"$range":["$$mints",{"$add":["$$maxts",300]},300]},
                "as":"rge",
                "in":
                {
                  "$let":
                  {
                    "vars":
                    {
                      "five":
                      {
                        "$filter":
                        {
                          "input":"$data",
                          "as":"fres",
                          "cond":
                          {
                            "$and":
                            [
                              {"$gte":[{"$arrayElemAt":["$$fres",0]},"$$rge"]},
                              {"$lt":[{"$arrayElemAt":["$$fres",0]},{"$add":["$$rge",300]}]}
                            ]
                          }
                        }
                      }
                    },
                    "in":
                    [
                      {"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},0]},
                      {"$arrayElemAt":[{"$arrayElemAt":["$$five",0]},1]},
                      {"$max":{"$map":{"input":"$$five","as":"res","in":{"$arrayElemAt":["$$res",2]}}}},
                      {"$min":{"$map":{"input":"$$five","as":"res","in":{"$arrayElemAt":["$$res",3]}}}},
                      {"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},-2]},
                      {"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},-1]}
                    ]
                  }
                }
              }
            }
          }
        }
      }
    }
  ]);

It seem to extract the 5 min but not taking care of gaps in 1 min interval data. Instead for those time instants, I am getting null array. How do we avoid null arrays?

Sample 1 DB data: https://gist.github.com/parthi2929/36e6898cff7be45ccdd008ec750e70e9

5 min extracted output snapshot is here

As you can see in snapshot, I get lot of null arrays. How do I avoid them?

I tried inserting { "$ne":[{"$arrayElemAt":["$$fres",0]},null] } in $and operator, but it did not help.

Update 14th Feb 2018: As per Veeram's suggestion, below is the modified code incorporated with the suggested changes. However, I still get one empty array (that is apparently many empty arrays in that time gap is now gone, but replaced by single empty array) which should also be fixed.

db.getCollection('ohlc-koinex-1').aggregate(
[
    {"$project":
      {
        "data":
        {
          "$let":
          {
            "vars":
            {
              "mints":{"$arrayElemAt":[{"$arrayElemAt":["$data",0]},0]},
              "maxts":{"$arrayElemAt":[{"$arrayElemAt":["$data",-1]},0]}
            },
            "in":
            {
                "$setDifference":
                [
                    {
                      "$map":
                      {
                        "input":{"$range":["$$mints",{"$add":["$$maxts",300]},300]},
                        "as":"rge",
                        "in":
                        {
                          "$let":
                          {
                            "vars":
                            {
                              "five":
                              {
                                "$filter":
                                {
                                  "input":"$data",
                                  "as":"fres",
                                  "cond":
                                  {
                                    "$and":
                                    [
                                      {"$gte":[{"$arrayElemAt":["$$fres",0]},"$$rge"]},
                                      {"$lt":[{"$arrayElemAt":["$$fres",0]},{"$add":["$$rge",300]}]}
                                    ]
                                  }
                                }
                              }
                            },
                            "in":
                            {
                                "$cond":[
                                    {"$eq":["$$five",[]]},
                                    "$$five",
                                    [
                                      {"$arrayElemAt": [{"$arrayElemAt":["$$five",-1]},0]},
                                      {"$arrayElemAt":[{"$arrayElemAt":["$$five",0]},1]},
                                      {"$max":{"$map":{"input":"$$five","as":"res","in":{"$arrayElemAt":["$$res",2]}}}},
                                      {"$min":{"$map":{"input":"$$five","as":"res","in":{"$arrayElemAt":["$$res",3]}}}},
                                      {"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},-2]},
                                      {"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},-1]}
                                    ]
                                ]
                             }
                          }
                        }
                      }
                  },[]
                 ]
            }
          }
        }
      }
    }
]
)

Here is the snapshot of the result

Upvotes: 1

Views: 171

Answers (1)

s7vr
s7vr

Reputation: 75934

You can add $cond operator to account for gaps followed by $filter to filter the empty array values.

You require two changes.

The first change to store [] values for gaps instead of array with null values.

Update inner $let expression to below:

{
  "$let":{
    "vars":{"five":...},
    "in":{
      "$cond":[
        {"$eq":["$$five",[]]},
        "$$five",
        [{"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},0]},
         ....
        {"$arrayElemAt":[{"$arrayElemAt":["$$five",-1]},-1]}]
      ]
    }
  }
}

The second change to filter the empty array values from the output.

{
  "$project":{
    "data":{
      "$let":{
        "vars":{"mints":...},
        "in":{"$filter":{"input":{"$map":...},as:"flr", "cond":{"$ne":["$$flr",[]]}}}
      }
    }
  }
}

Upvotes: 1

Related Questions