draco951
draco951

Reputation: 368

How to merge dynamic number data with condition?

Let take the following dataset

timestamp_bin deviceId flow level pressure
2020-05-15T00:00:00Z fddf1cec-16db-4461-9057-3d08e46b6bcf NaN 55 NaN
2020-05-15T00:00:00Z aaaaaaaa-fed4-c23b-422b-e85e0877c092 365 85 NaN
2020-05-15T00:00:00Z cb04ccff-48bc-4108-9d16-7d7db9152895 NaN NaN 130

I would like to merge the flow, level and pressure column without the NaN values and without having to mention the flow, level and pressure column:

deviceId timestamp value
fddf1cec-16db-4461-9057-3d08e46b6bcf 2020-05-15 17:01:35.7750000 {"level": 55.0}
aaaaaaaa-fed4-c23b-422b-e85e0877c092 2020-05-15 17:01:35.7750000 {"flow": 365.0, "level": 85.0}
cb04ccff-48bc-4108-9d16-7d7db9152895 2020-05-15 17:01:35.7750000 {"pressure": 130.0}

The following query allow to achieve this result

let deviceTelemetry = datatable (deviceId: guid, timestamp: datetime, flow: real, level: real, pressure: real)
[
    'fddf1cec-16db-4461-9057-3d08e46b6bcf', '2020-05-15 17:01:35.7750000', real(NaN),        55, real(NaN),
    'aaaaaaaa-fed4-c23b-422b-e85e0877c092', '2020-05-15 17:01:35.7750000',       365,        85, real(NaN),
    'cb04ccff-48bc-4108-9d16-7d7db9152895', '2020-05-15 17:01:35.7750000', real(NaN), real(NaN),       130,
];
deviceTelemetry
| extend packAll=pack_all()
| extend packWithNan=bag_remove_keys(packAll, dynamic(['deviceId', 'timestamp']))
| project-away packAll
| mv-expand kind=array packWithNan
| where packWithNan[1] != 'NaN'
| extend packWithoutNan=pack(tostring(packWithNan[0]), packWithNan[1])
| summarize value=make_bag(packWithoutNan) by deviceId, timestamp

Is there a better way to achieve this query ?

Edit:

This discussion is a continuation of this previous thread

Upvotes: 1

Views: 303

Answers (2)

David דודו Markovitz
David דודו Markovitz

Reputation: 44941

... and just for the fun of it, here is an extension of the previous solution:

let deviceTelemetry = datatable (deviceId:guid, timestamp:datetime, value:dynamic)[
'fddf1cec-16db-4461-9057-3d08e46b6bcf','2020-05-15 17:01:35.7750000', dynamic({ "level":  60}),
'fddf1cec-16db-4461-9057-3d08e46b6bcf','2020-05-15 18:01:35.7750000', dynamic({ "level":  50}),
'aaaaaaaa-fed4-c23b-422b-e85e0877c092','2020-05-15 17:01:35.7750000', dynamic({ "level": 100, "flow": 350}),
'aaaaaaaa-fed4-c23b-422b-e85e0877c092','2020-05-15 18:01:35.7750000', dynamic({ "level":  90, "flow": 360}),
'aaaaaaaa-fed4-c23b-422b-e85e0877c092','2020-05-15 19:01:35.7750000', dynamic({ "level":  80, "flow": 370}),
'aaaaaaaa-fed4-c23b-422b-e85e0877c092','2020-05-15 20:01:35.7750000', dynamic({ "level":  70, "flow": 380}),
'cb04ccff-48bc-4108-9d16-7d7db9152895','2020-05-15 21:01:35.7750000', dynamic({ "pressure":  120}),
'cb04ccff-48bc-4108-9d16-7d7db9152895','2020-05-15 20:01:35.7750000', dynamic({ "pressure":  130}),
'cb04ccff-48bc-4108-9d16-7d7db9152895','2020-05-15 21:01:35.7750000', dynamic({ "pressure":  140}),
];
deviceTelemetry
| mv-expand kind=array value
| extend k = tostring(value[0]), v = toreal(value[1])
| extend timestamp_bin = bin(timestamp , 1d)
| evaluate pivot(k, avg(v), timestamp_bin, deviceId)
// add the following 2 lines of code
| mv-apply with_itemindex=i pa = pack_all() on (summarize make_bag_if(pa, i >= 2 and tostring(pa) !has_cs "NaN"))
| project timestamp_bin, deviceId, bag_pa
timestamp_bin deviceId bag_pa
2020-05-15T00:00:00Z fddf1cec-16db-4461-9057-3d08e46b6bcf {"level":55}
2020-05-15T00:00:00Z aaaaaaaa-fed4-c23b-422b-e85e0877c092 {"flow":365,"level":85}
2020-05-15T00:00:00Z cb04ccff-48bc-4108-9d16-7d7db9152895 {"pressure":130}

Fiddle

Upvotes: 3

David דודו Markovitz
David דודו Markovitz

Reputation: 44941

This is why it's important to share the whole scenario and not just a friction of it :-)

let deviceTelemetry = datatable (deviceId:guid, timestamp:datetime, value:dynamic)[
'fddf1cec-16db-4461-9057-3d08e46b6bcf','2020-05-15 17:01:35.7750000', dynamic({ "level":  60}),
'fddf1cec-16db-4461-9057-3d08e46b6bcf','2020-05-15 18:01:35.7750000', dynamic({ "level":  50}),
'aaaaaaaa-fed4-c23b-422b-e85e0877c092','2020-05-15 17:01:35.7750000', dynamic({ "level": 100, "flow": 350}),
'aaaaaaaa-fed4-c23b-422b-e85e0877c092','2020-05-15 18:01:35.7750000', dynamic({ "level":  90, "flow": 360}),
'aaaaaaaa-fed4-c23b-422b-e85e0877c092','2020-05-15 19:01:35.7750000', dynamic({ "level":  80, "flow": 370}),
'aaaaaaaa-fed4-c23b-422b-e85e0877c092','2020-05-15 20:01:35.7750000', dynamic({ "level":  70, "flow": 380}),
'cb04ccff-48bc-4108-9d16-7d7db9152895','2020-05-15 21:01:35.7750000', dynamic({ "pressure":  120}),
'cb04ccff-48bc-4108-9d16-7d7db9152895','2020-05-15 20:01:35.7750000', dynamic({ "pressure":  130}),
'cb04ccff-48bc-4108-9d16-7d7db9152895','2020-05-15 21:01:35.7750000', dynamic({ "pressure":  140}),
];
deviceTelemetry
| mv-expand kind=array value
| extend k = tostring(value[0]), v = toreal(value[1])
| summarize avg(v) by k, timestamp_bin = bin(timestamp , 1d), deviceId
| summarize make_bag(pack_dictionary(k, avg_v)) by timestamp_bin, deviceId
timestamp_bin deviceId bag_
2020-05-15T00:00:00Z fddf1cec-16db-4461-9057-3d08e46b6bcf {"level":55}
2020-05-15T00:00:00Z aaaaaaaa-fed4-c23b-422b-e85e0877c092 {"level":85,"flow":365}
2020-05-15T00:00:00Z cb04ccff-48bc-4108-9d16-7d7db9152895 {"pressure":130}

Fiddle

Upvotes: 1

Related Questions