SQL Server 2008 R2: Query performance

Question

I have the following table:

Example:

Table: for_test

/* Create table for testing */
create table for_test
(
 cola varchar(10),
 colb varchar(10)
);

Inserting some records:

/* Inserting some data for test */
insert into for_test values('A101','B201'),('B201','C301'),
                           ('A101','X901'),('Z808','L707'),
                           ('N606','M505'),('O111','P222'),
                           ('R333','Z808'),('X901','Y444'),
                           ('T555','N606'),('F666','E777'),
                           ('G888','H999'),('X901','I221'),
                           ('O111','H999'),('V878','U766'),
                           ('S565','T555'),('W','Z808'),('B201','E777');

Creating Indexes:

/*Creating indexs for columns*/ 
create index indx1 on for_test(cola);
create index indx2 on for_test(colb);

Note:I am try to group the related records into one group id. For example the record A,B and B,C should come under the group 1 because B is present in both records in cola for first record and colb for second record. You can come to know with more details inserting some records which I have given above and please run once the following query with above records.

/* Query for Group the data */
with CTE as
(
  select row_number() over(order by cola, colb) row_id, cola, colb
  from for_test
) 
, a as
(
 select s1.*
 , (select min(s2.row_id) from CTE s2 
    where (s1.cola = s2.cola or s1.colb = s2.cola 
    or s1.cola =  s2.colb or s1.colb = s2.colb)) row_id2
   from CTE s1
)
, b as
(
   select m1.*
   , (select m2.row_id2 from a m2 where m2.row_id = m1.row_id2) row_id3
   from a m1
)
select dense_rank() over(order by row_id3) Group_ID,cola, colb 
from b 
order by cola,colb

OUTPUT:

http://sqlfiddle.com/#!3/fe60c/1

Time taken:

 Number of Records   Time Taken
--------------------------------
    40K              11:21 min
    1Milion          above 3 hours

Question: How to improve performance of the query?

Mikael Eriksson · Accepted Answer

You are scanning your table four times and row_number() over(order by cola, colb) makes it sort on cola, colb three times. Remove the indexes you have (if they are not used elsewhere) and add one that supports that order by so the sort is not needed.

create index indx1 on for_test(cola, colb);

Another thing you could try is to persist your first CTE in a temporary table.

create table #T
(
  row_id int identity primary key,
  cola varchar(10),
  colb varchar(10)
);

insert into #T(cola, colb)
select cola, colb
from for_test
order by cola, colb;

create index IX_T_cola on #T(cola);
create index IX_T_colb on #T(colb);

And then rewrite your query to use the temp table and a union all sequence instead of the where clause with or statements to to make it possible to use the index on cola and colb to do seeks to find matching rows.

with a as
(
  select s1.*,
         (
         select min(s2.row_id)
         from (
              select T.row_id
              from #T as T
              where s1.cola = T.cola
              union all
              select T.row_id
              from #T as T
              where s1.colb = T.cola
              union all
              select T.row_id
              from #T as T
              where s1.cola = T.colb
              union all
              select T.row_id
              from #T as T
              where s1.colb = T.colb
              ) as s2 
         ) as row_id2
  from #T s1
) ,
b as
(
  select m1.*,
         (
         select m2.row_id2 
         from a m2 
         where m2.row_id = m1.row_id2
         ) as row_id3
  from a m1
)
select dense_rank() over(order by row_id3) as Group_ID,
       cola, 
       colb 
from b 
order by cola,
         colb;

SQL Server 2008 R2: Query performance

Answers (1)

Related Questions