Reputation: 7400
I have a table like this
SELECT id, items
FROM ( VALUES
( '1', ARRAY['A', 'B'] ),
( '2', ARRAY['A', 'B', 'C'] ),
( '3', ARRAY['E', 'F'] ),
( '4', ARRAY['G'] )
) AS t(id, items)
Two items belongs to the same group if the have at least one item in common.
For example #1 and #2 belongs to the same group because they both have A and B. #3 and #4 are other different group.
So my desidered output would be
ID | items | group_alias |
---|---|---|
1 | {A,B} | {A,B} |
2 | {A,B,C} | {A,B} |
3 | {E,F} | {E,F} |
4 | {G} | {G} |
The group_alias field is a new field that say to me that the record #1 and #2 belongs to the same group.
Upvotes: 0
Views: 549
Reputation: 1621
This query returns all group alias of an item. For example item no. 5 has group alias {E}
and {A,B}
. The performance is maybe better if you create a temporary table for the items instead of creating them dynamically like you mentioned in one comment. Temporary tables are automatically dropped at the end of a session. You can create
indexes on temporary tables, too, which can speed up the query.
CREATE TEMP TABLE temp
(
id int PRIMARY KEY,
items char[] NOT NULL
);
INSERT INTO temp VALUES
( '1', ARRAY['A', 'B'] ),
( '2', ARRAY['A', 'B', 'C'] ),
( '3', ARRAY['E', 'F'] ),
( '4', ARRAY['G'] ),
( '5', ARRAY['A', 'B', 'E'] );
The query:
SELECT DISTINCT
t1.id, t1.items, coalesce(match, t1.items) AS group_alias
FROM temp t1 LEFT JOIN (
SELECT
t2.id, match
FROM
temp t2,
LATERAL(
SELECT
match
FROM
temp t3,
LATERAL(
SELECT
array_agg(aa) AS match
FROM
unnest(t2.items) aa
JOIN
unnest(t3.items) ab
ON aa = ab
) AS m1
WHERE
t2.id != t3.id AND t2.items && t3.items
) AS m2
) AS groups
ON groups.id = t1.id
ORDER BY t1.id;
And the result:
id | items | group_alias
----+---------+-------------
1 | {A,B} | {A,B}
2 | {A,B,C} | {A,B}
3 | {E,F} | {E}
4 | {G} | {G}
5 | {A,B,E} | {A,B}
5 | {A,B,E} | {E}
Upvotes: 0
Reputation: 2344
Having
CREATE TABLE temp1
(
id int PRIMARY KEY,
items char[] NOT NULL
);
INSERT INTO temp1 VALUES
( '1', ARRAY['A', 'B'] ),
( '2', ARRAY['A', 'B', 'C'] ),
( '3', ARRAY['E', 'F'] ),
( '4', ARRAY['G'] );
--Indexing array field to speedup queries
CREATE INDEX idx_items on temp1 USING GIN ("items");
Then
select t1.*,
coalesce( (select t2.items from temp1 t2
where t2.items && t1.items
and t1.id != t2.id
and array_length(t2.items,1)<array_length(t1.items,1)
order by array_length(t2.items,1) limit 1 )/*minimum common*/
, t1.items /*trivial solution*/ ) group_alias
from temp1 t1;
https://www.db-fiddle.com/f/46ydeE5ZXCJDk4Rw3cu4jt/10
Upvotes: 1