Mateusz Urbański
Mateusz Urbański

Reputation: 7862

Counting the occurrences of distinct values in a column

I have the following query:

select 
    jsonb_build_object('high', count(*) filter (where total = 'High')) ||
    jsonb_build_object('medium', count(*) filter (where total = 'Medium')) ||
    jsonb_build_object('low', count(*) filter (where total = 'Low')) as total,
    jsonb_build_object('high', count(*) filter (where social = 'High')) ||
    jsonb_build_object('medium', count(*) filter (where social = 'Medium')) ||
    jsonb_build_object('low', count(*) filter (where social = 'Low')) as social
from (
    select score_labels->>'total' as total, 
    score_labels->>'social' as social,

    from survey_results
    ) s;

I'm wondering if there is any way to simplify it? Let's say use iteration instead of repeat jsonb_build_object statements?

This query returns following results:

total                                  social
-------------------------------------  -------------------------------
{"low": 80, "high": 282, "medium": 0}  {"low": 103, "high": 115, "medium": 0} 

Upvotes: 2

Views: 342

Answers (1)

klin
klin

Reputation: 121604

This particular case

You need a plpgsql function:

create or replace function my_arr_to_jsonb(text[])
returns jsonb language plpgsql as $$
declare
    agg int[] = array[0, 0, 0];
    s text;
begin
    foreach s in array $1 loop
        if lower(s) = 'high' then
            agg[1]:= agg[1]+ 1;
        elsif lower(s) = 'medium' then
            agg[2]:= agg[2]+ 1;
        else
            agg[3]:= agg[3]+ 1;
        end if;
    end loop;
    return jsonb_build_object(
        'high', agg[1],
        'medium', agg[2],
        'low', agg[3]);
end $$;

The function in action:

with my_table (id, score_labels) as (
values
(1, '{"total": "High", "risk": "High"}'::jsonb),
(2, '{"total": "High", "risk": "Low"}'::jsonb),
(3, '{"total": "Low", "risk": "Medium"}'::jsonb)
)

select 
    my_arr_to_jsonb(array_agg(score_labels->>'total')) as total, 
    my_arr_to_jsonb(array_agg(score_labels->>'risk')) as risk
from my_table

               total                |                risk                
------------------------------------+------------------------------------
 {"low": 1, "high": 2, "medium": 0} | {"low": 1, "high": 1, "medium": 1}
(1 row)

It is possible to use the algorithm in the function to create a custom aggregate function (see below).

Generalized solution

The question touched upon the interesting topic of counting the occurrences of distinct values in a table column using a single aggregation function.

create or replace function count_labels_state(text[], text)
returns text[] language plpgsql as $$
declare
    i int;
begin
    if $2 is not null then
        i:= array_position($1, quote_ident($2));
        if i is null then
            $1:= $1 || array[quote_ident($2), '0'];
            i:= cardinality($1)- 1;
        end if;
        $1[i+1]:= $1[i+1]::int+ 1;
    end if;
    return $1;
end $$;

create or replace function count_labels_final(text[])
returns jsonb language plpgsql as $$
declare
    j jsonb = '{}';
    i int = 1;
begin
    loop exit when i > cardinality($1); 
        j:= j || jsonb_build_object(trim($1[i], '"'), $1[i+1]::int);
        i:= i+ 2;
    end loop;
    return j;
end $$;

create aggregate count_labels(text) (
    sfunc = count_labels_state,
    stype = text[],
    finalfunc = count_labels_final
);

Usage. Instead of:

with my_table (label) as (
values
    ('low'), ('medium'), ('high'), ('low'),
    ('low'), ('medium'), ('high'), ('low'),
    ('low'), ('unknown')
)

select
    count(*) filter (where label = 'low') as low,
    count(*) filter (where label = 'medium') as medium,
    count(*) filter (where label = 'high') as high,
    count(*) filter (where label = 'unknown') as unknown
from my_table;

 low | medium | high | unknown 
-----+--------+------+---------
   5 |      2 |    2 |       1
(1 row)

you can use this (you do not have to know labels):

select count_labels(label) as labels
from my_table;

                      labels                      
--------------------------------------------------
 {"low": 5, "high": 2, "medium": 2, "unknown": 1}
(1 row)

The aggregate works well on integer columns:

with my_table (n) as (
values 
    (1), (2), (3), (4),
    (1), (2), (1), (2)
)

select count_labels(n::text) as integers
from my_table;

              integers              
----------------------------------
 {"1": 3, "2": 3, "3": 1, "4": 1}
(1 row) 

In case of other types one should remember that the aggregate works on text representations of values (e.g. numeric 1.10 = 1.1 but '1.10' <> '1.1').

Upvotes: 1

Related Questions