Reputation: 31960
I have a table myTable
, with a single column table containing an integer that represents a single byte value like this:
status_code
-----------
(single bit values:)
1
2
4
8
(multiple bit values:)
...
12
3
etc.
I'm selecting an overall 'status' based on the total proportion of bits that match a certain value, using logic that looks like the following. As you can see, there's a bit of repetition where I'm re-performing some identical calculations:
select
(case when (sum(case when status_code >> 4 = 1 then 1 else 0 end) / count(*)) +
(sum(case when status_code >> 3 = 1 then 1 else 0 end) / count(*)) > 0.75
then 0
else
case when (sum(case when status_code >> 4 = 1 then 1 else 0 end) / count(*)) +
(sum(case when status_code >> 3 = 1 then 1 else 0 end) / count(*)) +
(sum(case when status_code >> 2 = 1 then 1 else 0 end) / count(*)) > 0.75
then 1
else
case when (sum(case when status_code >> 4 = 1 then 1 else 0 end) / count(*)) +
(sum(case when status_code >> 3 = 1 then 1 else 0 end) / count(*)) +
(sum(case when status_code >> 2 = 1 then 1 else 0 end) / count(*)) +
(sum(case when status_code >> 1 = 1 then 1 else 0 end) / count(*)) > 0.75
then 2
else
3
end
end
end) as status
from myTable
How can I simplify the query (and probably make it more performant), by re-using the calculated proportions of each individual status? For example, how could I maybe alias the following and re-use it in different parts of the case statement?
(sum(case when status_code >> 4 = 1 then 1 else 0 end) / count(*))
Is that even possible?
Upvotes: 0
Views: 183
Reputation: 173046
How can I simplify the query?
Below is one of the option, I think
#standardSQL
CREATE TEMP FUNCTION f(code INT64, bits INT64) AS (
IF(code >> bits = 1, 1, 0)
);
SELECT (CASE
WHEN (SUM(f(status_code, 4)) +
SUM(f(status_code, 3))) / COUNT(*) > 0.75 THEN 0
WHEN (SUM(f(status_code, 4)) +
SUM(f(status_code, 3)) +
SUM(f(status_code, 2))) / COUNT(*) > 0.75 THEN 1
WHEN (SUM(f(status_code, 4)) +
SUM(f(status_code, 3)) +
SUM(f(status_code, 2)) +
SUM(f(status_code, 1))) / COUNT(*) > 0.75 THEN 2
ELSE 3
END) AS status
FROM `project.dataset.table`
or further "refactoring" gives:
#standardSQL
CREATE TEMP FUNCTION f(code INT64, bits INT64) AS (
IF(code >> bits = 1, 1, 0)
);
SELECT
(CASE
WHEN SUM(f(status_code, 4) + f(status_code, 3)) / COUNT(*) > 0.75 THEN 0
WHEN SUM(f(status_code, 4) + f(status_code, 3) + f(status_code, 2)) / COUNT(*) > 0.75 THEN 1
WHEN SUM(f(status_code, 4) + f(status_code, 3) + f(status_code, 2) + f(status_code, 1)) / COUNT(*) > 0.75 THEN 2
ELSE 3
END) AS status
FROM `project.dataset.table`
and probably make it more performant ...
So, finally
#standardSQL
CREATE TEMP FUNCTION f(code INT64, bits INT64) AS (
IF(code >> bits = 1, 1, 0)
);
SELECT
(CASE
WHEN f4 + f3 > 0.75 THEN 0
WHEN f4 + f3 + f2 > 0.75 THEN 1
WHEN f4 + f3 + f2 + f1 > 0.75 THEN 2
ELSE 3
END) AS status
FROM (
SELECT
SUM(f(status_code, 4))/ COUNT(*) f4,
SUM(f(status_code, 3))/ COUNT(*) f3,
SUM(f(status_code, 2))/ COUNT(*) f2,
SUM(f(status_code, 1))/ COUNT(*) f1
FROM `project.dataset.table`
)
Upvotes: 1