sr33
sr33

Reputation: 83

Looking for a simpler alternative to a recursive query

The actual query is more involved, but the problem I'm facing can be distilled to this:

A query to filter a rowset of monotonically increasing integers so that - in the final result set, row(n+1).value >= row(n).value + 5.

For the actual problem I need to solve, the rowset count is in the 1000s.

A few examples to clarify:

I've managed to get the required results with the following query, but it seems overly complicated. Uncomment the different "..with t(k).." to try them out.

I'm looking for any simplifications or alternative approaches to get the same results.

    with recursive r(n, pri) as (
    with t(k) as (values (1),(2),(3),(4),(5))   -- the data we want to filter
    -- with t(k) as (values (1),(5),(7),(10),(11),(12),(13))
    -- with t(k) as (values (6),(8),(11),(16),(20),(23))
    -- with t(k) as (values (6),(8),(12),(16),(20),(23))
    select min(k), 1::bigint from t             -- bootstrap for recursive processing. 1 here represents rank().
    UNION
    select k, (rank() over(order by k)) rr      -- rank() is required just to filter out the rows we dont want from the final result set, and no other reason
    from r, t 
    where t.k >= r.n+5 and r.pri = 1            -- capture the rows we want, AND unfortunately a bunch of rows we dont want 
)
select n from r where pri = 1;                  -- filter out the rows we dont want

Upvotes: 1

Views: 568

Answers (2)

joop
joop

Reputation: 4503

-- The data:
CREATE TABLE rowseq(val INTEGER NOT NULL) ;
INSERT INTO rowseq(val ) values
    -- (1),(2),(3),(4),(5)
         (1), (5), (7), (10), (11), (12), (13)
    --(6),(8),(11),(16),(20),(23)
    --(6),(8),(12),(16),(20),(23)
        ;

        -- need this view, because a recursive CTE cannot be based on a CTE
        -- [we could also duplicate the row_number() in both legs of the recursive CTE]
CREATE TEMP VIEW qqq AS
        SELECT val, row_number() OVER (ORDER BY val) AS rn
        FROM rowseq
        ;

WITH RECURSIVE rrr AS (
        SELECT qqq.val, qqq.rn
        FROM qqq
        WHERE qqq.rn = 1
UNION
        SELECT q1.val, q1.rn
        FROM rrr
        JOIN qqq q1 ON q1.rn > rrr.rn AND q1.val >= rrr.val+5 -- The "Gap" condition
                AND NOT EXISTS ( SELECT * FROM  qqq nx                  -- But it must be the FISRT match
                        WHERE nx.rn > rrr.rn AND nx.val >= rrr.val+5    -- same condition
                        AND nx.rn < q1.rn                               -- but NO earlier match
                        )
        )
-- prove it to the world!
SELECT *
FROM rrr
        ;

Upvotes: 1

Abelisto
Abelisto

Reputation: 15624

What about using plpgsql?

drop table if exists t;
create table t(k) as (
  values 
    (1),(2),(3),(4),(5)
    --(1),(5),(7),(10),(11),(12),(13)
    --(6),(8),(11),(16),(20),(23)
    --(6),(8),(12),(16),(20),(23)
);

create or replace function foo(in n int, out k int) returns setof int as $$
declare
  r t;
  rp t;
begin
  rp := null;
  for r in (select * from t) loop
    if (rp is null) or (r.k >= rp.k + n) then
      rp := r;
      k := r.k;
      return next;
    end if;
  end loop;
  return;
end; $$ immutable language plpgsql;

select * from foo(5);

Upvotes: 0

Related Questions