Reputation: 4032
I want to check how fast my queries are and would like to display a cumulative histogram to understand how many are in the fastest 10%, slowest 10% and so on.
I enabled
query.tracing.enabled=true
So I can now do
select * FROM _query_trace
And I get data like
"ts","query_text","execution_micros","principal"
"2025-02-27T19:35:32.184744Z","SELECT\n timestamp AS ""time"", device_type,\n avg(measure1) AS metric,\n avg(measure2) as m2\nFROM iot_data\nWHERE\n timestamp >= cast(1740684632134000 as timestamp) AND timestamp <= cast(1740684932134000 as timestamp)\nsample by 1m \nORDER BY 1,2",1169,"admin"
"2025-02-27T19:35:32.645992Z","SELECT\n timestamp AS ""time"", device_type,\n avg(measure1) AS metric,\n avg(measure2) as m2\nFROM iot_data\nWHERE\n timestamp >= cast(1740684632633000 as timestamp) AND timestamp <= cast(1740684932633000 as timestamp)\nsample by 30s \nORDER BY 1,2",3097,"admin"
"2025-02-27T19:35:32.648683Z","SELECT\n timestamp AS ""time"", device_type,\n avg(measure1) AS metric,\n avg(measure2) as m2\nFROM iot_data\nWHERE\n timestamp >= cast(1740684632633000 as timestamp) AND timestamp <= cast(1740684932633000 as timestamp)\nsample by 1m \nORDER BY 1,2",3988,"admin"
"2025-02-27T19:35:32.663753Z","WITH average_vals AS (\n select device_type, avg(duration_ms) as avg_duration_ms, avg(speed) as avg_speed, avg(measure1) as avg_m1, avg(measure2) as avg_m2 from iot_data \n WHERE\n timestamp >= cast(1740684632633000 as timestamp) AND timestamp <= cast(1740684932633000 as timestamp)\n), latest_seen AS (\nselect timestamp, device_type, lat, lon from 'iot_data' latest on timestamp partition by device_type)\nSelect timestamp as `time`, latest_seen.device_type as device_type, lat, lon, avg_duration_ms, avg_speed, avg_m1, avg_m2\nfrom latest_seen JOIN average_vals ON (device_type)\n\n",2787,"admin"
"2025-02-27T19:35:32.668832Z","SELECT\n device_type,\n avg(speed) as avg_speed\nFROM iot_data\nWHERE\n timestamp >= cast(1740684632633000 as timestamp) AND timestamp <= cast(1740684932633000 as timestamp)\nORDER BY 1",1362,"admin"
"2025-02-27T19:35:32.678185Z","WITH in_interval AS (\nSELECT count(*) as total_in_interval\nFROM iot_data\nWHERE\n timestamp >= cast(1740684632633000 as timestamp) AND timestamp <= cast(1740684932633000 as timestamp)\n ), absolute_total AS (\n SELECT count(*) as total_seen\nFROM iot_data\n )\n select * from in_interval cross join absolute_total\n \n",112,"admin"
"2025-02-27T19:35:32.679355Z","select device_type, avg(duration_ms) as avg_duration_ms, avg(speed) as avg_speed, avg(measure1) as avg_m1, avg(measure2) as avg_m2 from iot_data \n WHERE\n timestamp >= cast(1740684632633000 as timestamp) AND timestamp <= cast(1740684932633000 as timestamp)\n\n",1476,"admin"
"2025-02-27T19:35:33.146405Z","SELECT\n timestamp AS ""time"", device_type,\n avg(measure1) AS metric,\n avg(measure2) as m2\nFROM iot_data\nWHERE\n timestamp >= cast(1740684633135000 as timestamp) AND timestamp <= cast(1740684933135000 as timestamp)\nsample by 30s \nORDER BY 1,2",1153,"admin"
"2025-02-27T19:35:33.155656Z","WITH average_vals AS (\n select device_type, avg(duration_ms) as avg_duration_ms, avg(speed) as avg_speed, avg(measure1) as avg_m1, avg(measure2) as avg_m2 from iot_data \n WHERE\n timestamp >= cast(1740684633135000 as timestamp) AND timestamp <= cast(1740684933135000 as timestamp)\n), latest_seen AS (\nselect timestamp, device_type, lat, lon from 'iot_data' latest on timestamp partition by device_type)\nSelect timestamp as `time`, latest_seen.device_type as device_type, lat, lon, avg_duration_ms, avg_speed, avg_m1, avg_m2\nfrom latest_seen JOIN average_vals ON (device_type)\n\n",2293,"admin"
"2025-02-27T19:35:33.155821Z","select device_type, avg(duration_ms) as avg_duration_ms, avg(speed) as avg_speed, avg(measure1) as avg_m1, avg(measure2) as avg_m2 from iot_data \n WHERE\n timestamp >= cast(1740684633135000 as timestamp) AND timestamp <= cast(1740684933135000 as timestamp)\n\n",1139,"admin"
"2025-02-27T19:35:33.162180Z","SELECT\n device_type,\n avg(speed) as avg_speed\nFROM iot_data\nWHERE\n timestamp >= cast(1740684633135000 as timestamp) AND timestamp <= cast(1740684933135000 as timestamp)\nORDER BY 1",1207,"admin"
"2025-02-27T19:35:33.163882Z","SELECT\n timestamp AS ""time"", device_type,\n avg(measure1) AS metric,\n avg(measure2) as m2\nFROM iot_data\nWHERE\n timestamp >= cast(1740684633135000 as timestamp) AND timestamp <= cast(1740684933135000 as timestamp)\nsample by 1m \nORDER BY 1,2",2068,"admin"
"2025-02-27T19:35:33.166622Z","WITH in_interval AS (\nSELECT count(*) as total_in_interval\nFROM iot_data\nWHERE\n timestamp >= cast(1740684633135000 as timestamp) AND timestamp <= cast(1740684933135000 as timestamp)\n ), absolute_total AS (\n SELECT count(*) as total_seen\nFROM iot_data\n )\n select * from in_interval cross join absolute_total\n \n",124,"admin"
"2025-02-27T19:35:33.651734Z","SELECT\n timestamp AS ""time"", device_type,\n avg(measure1) AS metric,\n avg(measure2) as m2\nFROM iot_data\nWHERE\n timestamp >= cast(1740684633637000 as timestamp) AND timestamp <= cast(1740684933637000 as timestamp)\nsample by 30s \nORDER BY 1,2",1724,"admin"
"2025-02-27T19:35:33.665582Z","select device_type, avg(duration_ms) as avg_duration_ms, avg(speed) as avg_speed, avg(measure1) as avg_m1, avg(measure2) as avg_m2 from iot_data \n WHERE\n timestamp >= cast(1740684633638000 as timestamp) AND timestamp <= cast(1740684933638000 as timestamp)\n\n",522,"admin"
"2025-02-27T19:35:33.667067Z","WITH average_vals AS (\n select device_type, avg(duration_ms) as avg_duration_ms, avg(speed) as avg_speed, avg(measure1) as avg_m1, avg(measure2) as avg_m2 from iot_data \n WHERE\n timestamp >= cast(1740684633638000 as timestamp) AND timestamp <= cast(1740684933638000 as timestamp)\n), latest_seen AS (\nselect timestamp, device_type, lat, lon from 'iot_data' latest on timestamp partition by device_type)\nSelect timestamp as `time`, latest_seen.device_type as device_type, lat, lon, avg_duration_ms, avg_speed, avg_m1, avg_m2\nfrom latest_seen JOIN average_vals ON (device_type)\n\n",1041,"admin"
"2025-02-27T19:35:33.678890Z","SELECT\n device_type,\n avg(speed) as avg_speed\nFROM iot_data\nWHERE\n timestamp >= cast(1740684633637000 as timestamp) AND timestamp <= cast(1740684933637000 as timestamp)\nORDER BY 1",928,"admin"
"2025-02-27T19:35:33.679755Z","SELECT\n timestamp AS ""time"", device_type,\n avg(measure1) AS metric,\n avg(measure2) as m2\nFROM iot_data\nWHERE\n timestamp >= cast(1740684633637000 as timestamp) AND timestamp <= cast(1740684933637000 as timestamp)\nsample by 1m \nORDER BY 1,2",1834,"admin"
"2025-02-27T19:35:33.690640Z","WITH in_interval AS (\nSELECT count(*) as total_in_interval\nFROM iot_data\nWHERE\n timestamp >= cast(1740684633638000 as timestamp) AND timestamp <= cast(1740684933638000 as timestamp)\n ), absolute_total AS (\n SELECT count(*) as total_seen\nFROM iot_data\n )\n select * from in_interval cross join absolute_total\n \n",146,"admin"
"2025-02-27T19:35:33.831041Z","tables();",844,"admin"
I can then do SAMPLE BY
to get queries per second or GROUP BY
to get total time spent for specific queries, as in:
select md5(query_text), first(query_text), sum(execution_micros) as total from _query_trace order by total desc;
But I cannot figure out how to get an histogram.
Upvotes: 0
Views: 10
Reputation: 4032
We can create a subquery that first calculates the percentiles for each bucket, in this case at 10% intervals. Then on a second query we can do a UNION
of 10 subqueries where each is doing a CROSS JOIN
against the calculated percentiles and finding how many queries are below the threshold for the bucket.
Note in this case the histogram is cumulative, and each bucket includes the results from the smaller buckets as well. If we prefer non cumulative, the condition would change from less than to between
.
WITH quantiles AS (
SELECT
approx_percentile(execution_micros, 0.10, 5) AS p10,
approx_percentile(execution_micros, 0.20, 5) AS p20,
approx_percentile(execution_micros, 0.30, 5) AS p30,
approx_percentile(execution_micros, 0.40, 5) AS p40,
approx_percentile(execution_micros, 0.50, 5) AS p50,
approx_percentile(execution_micros, 0.60, 5) AS p60,
approx_percentile(execution_micros, 0.70, 5) AS p70,
approx_percentile(execution_micros, 0.80, 5) AS p80,
approx_percentile(execution_micros, 0.90, 5) AS p90,
approx_percentile(execution_micros, 1.0, 5) AS p100
FROM _query_trace
), cumulative_hist AS (
SELECT '10' AS bucket, p10 as micros_threshold, count(*) AS frequency
FROM _query_trace CROSS JOIN quantiles
WHERE execution_micros < p10
UNION ALL
SELECT '20', p20 as micros_threshold, count(*)
FROM _query_trace CROSS JOIN quantiles
WHERE execution_micros < p20
UNION ALL
SELECT '30', p30 as micros_threshold, count(*)
FROM _query_trace CROSS JOIN quantiles
WHERE execution_micros < p30
UNION ALL
SELECT '40', p40 as micros_threshold, count(*)
FROM _query_trace CROSS JOIN quantiles
WHERE execution_micros < p40
UNION ALL
SELECT '50', p50 as micros_threshold, count(*)
FROM _query_trace CROSS JOIN quantiles
WHERE execution_micros < p50
UNION ALL
SELECT '60', p60 as micros_threshold, count(*)
FROM _query_trace CROSS JOIN quantiles
WHERE execution_micros < p60
UNION ALL
SELECT '70', p70 as micros_threshold, count(*)
FROM _query_trace CROSS JOIN quantiles
WHERE execution_micros < p70
UNION ALL
SELECT '80', p80 as micros_threshold, count(*)
FROM _query_trace CROSS JOIN quantiles
WHERE execution_micros < p80
UNION ALL
SELECT '90', p90 as micros_threshold, count(*)
FROM _query_trace CROSS JOIN quantiles
WHERE execution_micros < p90
UNION ALL
SELECT '100', p100 as micros_threshold, count(*)
FROM _query_trace CROSS JOIN quantiles
)
select * from cumulative_hist;
The output is this:
"bucket","micros_threshold","frequency"
"10",215.0,26
"20",348.0,53
"30",591.0,80
"40",819.0,106
"50",1088.0,133
"60",1527.0,160
"70",2293.0,186
"80",4788.0,213
"90",23016.0,240
"100",1078759.0,267
Upvotes: 0