Reputation: 172993
All the rank formulas on GBQ reference page assume there are multiple rows, 1 column. For the below, I’m trying to figure out the rank of Algo1 through Algo5s.
theTable:
cid algo_1 algo_2 algo_3 algo_4 algo_5
1 4.31 4.15 4.33 4.35 4.35
2 1.31 4.15 4.33 4.34 3.35
So, result is
cid algo_1 algo_2 algo_3 algo_4 algo_5 algo_1_rank algo_2_rank algo_3_rank algo_4_rank algo_5_rank
1 4.31 4.15 4.33 4.35 4.35 4 5 3 1 1
2 1.31 4.15 4.33 4.34 3.35 5 3 2 1 4
P.S. Actually, I was asked this question outside SO, so decided to share here
Upvotes: 0
Views: 658
Reputation: 33745
Mikhail gave some great solutions to your problem--please mark his response as accepted if one of them worked for you. I wanted to give an alternate form of #3 if you end up using it, though; you can express the transformation with a SQL UDF:
CREATE TEMPORARY FUNCTION myRank(arr ARRAY<FLOAT64>) AS
((SELECT ARRAY(SELECT off + 1 FROM UNNEST(arr) x WITH OFFSET off ORDER BY x DESC)));
WITH theTable AS (
SELECT 1 AS cid, 4.31 AS algo_1, 4.15 AS algo_2, 4.33 AS algo_3, 4.35 AS algo_4, 4.35 AS algo_5 UNION ALL
SELECT 2 AS cid, 1.31 AS algo_1, 4.15 AS algo_2, 4.33 AS algo_3, 4.35 AS algo_4, 3.35 AS algo_5
),
tempTable AS (
SELECT *, myRank([algo_1, algo_2, algo_3, algo_4, algo_5]) AS ranks
FROM theTable
)
SELECT
cid, algo_1, algo_2, algo_3, algo_4, algo_5,
ranks[ORDINAL(1)] AS algo_1_rank,
ranks[ORDINAL(2)] AS algo_2_rank,
ranks[ORDINAL(3)] AS algo_3_rank,
ranks[ORDINAL(4)] AS algo_4_rank,
ranks[ORDINAL(5)] AS algo_5_rank
FROM tempTable;
Upvotes: 1
Reputation: 172993
Option 1 - Standard SQL
WITH theTable AS (
SELECT 1 AS cid, 4.31 AS algo_1, 4.15 AS algo_2, 4.33 AS algo_3, 4.35 AS algo_4, 4.35 AS algo_5 UNION ALL
SELECT 2 AS cid, 1.31 AS algo_1, 4.15 AS algo_2, 4.33 AS algo_3, 4.35 AS algo_4, 3.35 AS algo_5
),
tempTable AS (
SELECT
cid,
alg.k AS algo,
alg.v AS value,
RANK() OVER(PARTITION BY cid ORDER BY v DESC) AS rnk
FROM theTable, UNNEST(
ARRAY[STRUCT<k STRING, v FLOAT64>("algo_1", algo_1), STRUCT("algo_2", algo_2),
STRUCT("algo_3", algo_3), STRUCT("algo_4", algo_4), STRUCT("algo_5", algo_5)]
) AS alg
)
SELECT
cid,
MAX(IF(algo = "algo_1", value, NULL)) AS algo_1,
MAX(IF(algo = "algo_2", value, NULL)) AS algo_2,
MAX(IF(algo = "algo_3", value, NULL)) AS algo_3,
MAX(IF(algo = "algo_4", value, NULL)) AS algo_4,
MAX(IF(algo = "algo_5", value, NULL)) AS algo_5,
MAX(IF(algo = "algo_1", rnk, NULL)) AS algo_1_rank,
MAX(IF(algo = "algo_2", rnk, NULL)) AS algo_2_rank,
MAX(IF(algo = "algo_3", rnk, NULL)) AS algo_3_rank,
MAX(IF(algo = "algo_4", rnk, NULL)) AS algo_4_rank,
MAX(IF(algo = "algo_5", rnk, NULL)) AS algo_5_rank
FROM tempTable
GROUP BY cid
Option 2 - Legacy SQL
SELECT
cid,
MAX(IF(num = 1, value, NULL)) AS algo_1,
MAX(IF(num = 2, value, NULL)) AS algo_2,
MAX(IF(num = 3, value, NULL)) AS algo_3,
MAX(IF(num = 4, value, NULL)) AS algo_4,
MAX(IF(num = 5, value, NULL)) AS algo_5,
MAX(IF(num = 1, rnk, NULL)) AS algo_1_rank,
MAX(IF(num = 2, rnk, NULL)) AS algo_2_rank,
MAX(IF(num = 3, rnk, NULL)) AS algo_3_rank,
MAX(IF(num = 4, rnk, NULL)) AS algo_4_rank,
MAX(IF(num = 5, rnk, NULL)) AS algo_5_rank
FROM (
SELECT
cid, num,
CASE
WHEN num = 1 THEN algo_1
WHEN num = 2 THEN algo_2
WHEN num = 3 THEN algo_3
WHEN num = 4 THEN algo_4
WHEN num = 5 THEN algo_5
END AS value,
RANK() OVER(PARTITION BY cid ORDER BY value DESC) AS rnk
FROM (
SELECT * FROM
(SELECT 1 AS cid, 4.31 AS algo_1, 4.15 AS algo_2, 4.33 AS algo_3, 4.35 AS algo_4, 4.35 AS algo_5),
(SELECT 2 AS cid, 1.31 AS algo_1, 4.15 AS algo_2, 4.33 AS algo_3, 4.35 AS algo_4, 3.35 AS algo_5)
) AS theTable
CROSS JOIN (
SELECT INTEGER(SPLIT("1,2,3,4,5")) AS num FROM (SELECT 1)
) AS nums
)
GROUP BY cid
Option 3 - Standard SQL with Scalar UDF
CREATE TEMPORARY FUNCTION myRank(a float64, b float64, c float64, d float64, e float64)
RETURNS ARRAY<int64>
LANGUAGE js AS """
var arr = [a, b, c, d, e];
var sorted = arr.slice().sort(function(a,b){return b-a})
var ranks = arr.slice().map(function(v){ return sorted.indexOf(v)+1 });
return ranks
""";
WITH theTable AS (
SELECT 1 AS cid, 4.31 AS algo_1, 4.15 AS algo_2, 4.33 AS algo_3, 4.35 AS algo_4, 4.35 AS algo_5 UNION ALL
SELECT 2 AS cid, 1.31 AS algo_1, 4.15 AS algo_2, 4.33 AS algo_3, 4.35 AS algo_4, 3.35 AS algo_5
),
tempTable AS (
SELECT *, myRank(algo_1, algo_2, algo_3, algo_4, algo_5) AS ranks
FROM theTable
)
SELECT
cid, algo_1, algo_2, algo_3, algo_4, algo_5,
ranks[ORDINAL(1)] AS algo_1_rank,
ranks[ORDINAL(2)] AS algo_2_rank,
ranks[ORDINAL(3)] AS algo_3_rank,
ranks[ORDINAL(4)] AS algo_4_rank,
ranks[ORDINAL(5)] AS algo_5_rank
FROM tempTable
Option 4 – Taking care of ranking in advance
Usually tables of schema as theTable
is generated off of multiple rows with just one test entry per row (cid, algo, value as in theOriginalData
below)
It makes most sense to actually do ranking right at this point
SELECT
cid,
MAX(IF(algo = "algo_1", value, NULL)) AS algo_1,
MAX(IF(algo = "algo_2", value, NULL)) AS algo_2,
MAX(IF(algo = "algo_3", value, NULL)) AS algo_3,
MAX(IF(algo = "algo_4", value, NULL)) AS algo_4,
MAX(IF(algo = "algo_5", value, NULL)) AS algo_5,
MAX(IF(algo = "algo_1", rnk, NULL)) AS algo_1_rank,
MAX(IF(algo = "algo_2", rnk, NULL)) AS algo_2_rank,
MAX(IF(algo = "algo_3", rnk, NULL)) AS algo_3_rank,
MAX(IF(algo = "algo_4", rnk, NULL)) AS algo_4_rank,
MAX(IF(algo = "algo_5", rnk, NULL)) AS algo_5_rank,
FROM (
SELECT
cid, algo, value,
RANK() OVER(PARTITION BY cid ORDER BY value DESC) AS rnk
FROM (
SELECT * FROM
(SELECT 1 AS cid, "algo_1" AS algo, 4.31 AS value),
(SELECT 1 AS cid, "algo_2" AS algo, 4.15 AS value),
(SELECT 1 AS cid, "algo_3" AS algo, 4.33 AS value),
(SELECT 1 AS cid, "algo_4" AS algo, 4.35 AS value),
(SELECT 1 AS cid, "algo_5" AS algo, 4.35 AS value),
(SELECT 2 AS cid, "algo_1" AS algo, 1.31 AS value),
(SELECT 2 AS cid, "algo_2" AS algo, 4.15 AS value),
(SELECT 2 AS cid, "algo_3" AS algo, 4.33 AS value),
(SELECT 2 AS cid, "algo_4" AS algo, 4.34 AS value),
(SELECT 2 AS cid, "algo_5" AS algo, 3.35 AS value)
) AS theOriginalData
)
GROUP BY cid
Note: if if were up to me - I would go with Option #4 and in case if it is not possible for some reason (for example theTable already exists) I would go with Option #3 as it looks most elegant to me
Upvotes: 1