Reputation: 159
I have a large table of percentiles generated using the PERCENT_RANK() function within Big Query. The table output generates many rows of data that have percentile numbers that are very close to each other. I am looking to only return 10 rows with values that are closets to the 100th, 90th, 80th, 70th etc. percentiles.
More specifically, I am looking for the number that is closest to the 80th percentile (.8) and have the following sample values:
.81876543 .81123141 .80121214 .80012123 .80001213 .80001112 .79999121
In this case .80001112 is the closest to .8.
Is there a SQL function I can use that only returns the ten values closest to those percentiles.
Upvotes: 0
Views: 1557
Reputation: 172993
Below example is for BigQuery Standard SQL
#standardSQL
WITH `project.dataset.percentiles` AS (
SELECT .81876543 percentile UNION ALL
SELECT .81123141 UNION ALL
SELECT .80121214 UNION ALL
SELECT .80012123 UNION ALL
SELECT .80001213 UNION ALL
SELECT .80001112 UNION ALL
SELECT .79999121
), targets AS (
SELECT check
FROM UNNEST([1, .9, .8, .7, .6, .5, .4, .3, .2, .1]) check
)
SELECT check, ARRAY_AGG(percentile ORDER BY ABS(percentile - check) LIMIT 10) val
FROM `project.dataset.percentiles`
CROSS JOIN targets
WHERE ABS(percentile - check) < .05
GROUP BY check
ORDER BY check
above gives you 10 closest values per each percentile - 100%, 90% 80% etc
If you need just one per each - you can check out below query
#standardSQL
WITH `project.dataset.percentiles` AS (
SELECT .81876543 percentile UNION ALL
SELECT .81123141 UNION ALL
SELECT .80121214 UNION ALL
SELECT .80012123 UNION ALL
SELECT .80001213 UNION ALL
SELECT .80001112 UNION ALL
SELECT .79999121
), targets AS (
SELECT check
FROM UNNEST([1, .9, .8, .7, .6, .5, .4, .3, .2, .1]) check
)
SELECT check, ARRAY_AGG(percentile ORDER BY ABS(percentile - check) LIMIT 1)[SAFE_OFFSET(0)] val
FROM `project.dataset.percentiles`
CROSS JOIN targets
WHERE ABS(percentile - check) < .05
GROUP BY check
ORDER BY check
Upvotes: 1