Misiu
Misiu

Reputation: 4919

Search all positions of char in string and return as comma separated string

I have string (VARCHAR(255)) that contains only zeros or ones.
I need to search all positions and return them as comma separated string. I've build two queries using solutions from https://dba.stackexchange.com/questions/41961/how-to-find-all-positions-of-a-string-within-another-string

Here is my code so far:

DECLARE @TERM VARCHAR(5);
SET @TERM = '1';
DECLARE @STRING VARCHAR(255);
SET @STRING = '101011011000000000000000000000000000000000000000';

DECLARE @RESULT VARCHAR(100);
SET @RESULT = '';

SELECT
   @RESULT = @RESULT + CAST(X.pos AS VARCHAR(10)) + ','
FROM
   ( SELECT
      pos = Number - LEN(@TERM)
     FROM
      ( SELECT
         Number
        ,Item = LTRIM(RTRIM(SUBSTRING(@STRING, Number, CHARINDEX(@TERM, @STRING + @TERM, Number) - Number)))
        FROM
         ( SELECT ROW_NUMBER () OVER (ORDER BY [object_id]) FROM sys.all_objects
         ) AS n ( Number )
        WHERE
         Number > 1
         AND Number <= CONVERT(INT, LEN(@STRING))
         AND SUBSTRING(@TERM + @STRING, Number, LEN(@TERM)) = @TERM
      ) AS y
   ) X;

SELECT
   SUBSTRING(@RESULT, 0, LEN(@RESULT));



DECLARE @POS INT;
DECLARE @OLD_POS INT;
DECLARE @POSITIONS VARCHAR(100);
SELECT
   @POSITIONS = '';
SELECT
   @OLD_POS = 0;
SELECT
   @POS = PATINDEX('%1%', @STRING); 
WHILE @POS > 0
   AND @OLD_POS <> @POS
   BEGIN
      SELECT
         @POSITIONS = @POSITIONS + CAST(@POS AS VARCHAR(2)) + ',';
      SELECT
         @OLD_POS = @POS;
      SELECT
         @POS = PATINDEX('%1%', SUBSTRING(@STRING, @POS + 1, LEN(@STRING))) + @POS;
   END;
SELECT
   LEFT(@POSITIONS, LEN(@POSITIONS) - 1);

I'm wondering if this can be done faster/better? I'm searching only for single character positions and I have only two characters that can occur in my string (0 and 1).

I've build two functions using this code, and run them for 1000 records and got same results in same time, so I can't tell which one is better.

for single record second part gives CPU and reads equals to 0 in Profiler, where first piece of code give me CPU=16 and reads=17.

I need to get result that looks like this: 1,3,5,6,8,9 (when multiple occurrences), 3 for single occurence, NONE if there are no ones.

Upvotes: 1

Views: 2246

Answers (2)

JosephStyons
JosephStyons

Reputation: 58795

Giorgi's response is very clever, but I'd prefer a more old-fashioned approach that's more readable. My suggestion, including test cases:

if object_id('UFN_CSVPOSITIONS') is not null
begin
  drop function ufn_csvpositions;
end
go

create function dbo.UFN_CSVPOSITIONS
(
  @string nvarchar(255)
 ,@delimiter nvarchar(1) = ','
)
returns nvarchar(255)
as
begin
  --given a string that contains ones,
  --return a comma-delimited list of the positions of those ones
  --example: '1001' returns '1,4'
  declare @result nvarchar(255) = '';
  declare @i int = 1;
  declare @slen int = len(@string);
  declare @idx int = 0;

  while @i < @slen
  begin
    set @idx = charindex('1',@string,@i);
    if 0 = @idx
    begin
      set @i = @slen;  --no more to be found, break out early
    end
    else
    begin
      set @result = @result + @delimiter + convert(nvarchar(3),@idx);
      set @i = @idx; --jump ahead
    end;
    set @i = @i + 1;
  end  --while

  if (0 < len(@result)) and (',' = substring(@result,1,1))
  begin
    set @result = substring(@result,2,len(@result)-1)
  end

  return @result;
end
go

--test cases
DECLARE @STRING NVARCHAR(255) = '';
set @string = '101011011000000000000000000000000000000000000000';
print dbo.UFN_CSVPOSITIONS(@string,',');
set @string = null;
print dbo.UFN_CSVPOSITIONS(@string,',');
set @string = '';
print dbo.UFN_CSVPOSITIONS(@string,',');
set @string = '1111111111111111111111111111111111111111111111111';
print dbo.UFN_CSVPOSITIONS(@string,',');
set @string = '0000000000000000000000000000000000000000000000000';
print dbo.UFN_CSVPOSITIONS(@string,',');

--lets try a very large # of test cases, see how fast it comes out
--255 "ones" should be the worst case scenario for performance, so lets run through 50k of those.
--on my laptop, here are test case results:
--all 1s   : 13 seconds
--all 0s   :  7 seconds
--all nulls:  1 second
declare @testinput nvarchar(255) = replicate('1',255);
declare @iterations int = 50000;
declare @i int = 0;
while @i < @iterations
begin
  print dbo.ufn_csvpositions(@testinput,',');
  set @i = @i + 1;
end;

--repeat the test using the CTE method.
--the same test cases are as follows on my local:
--all 1s   : 18 seconds
--all 0s   : 15 seconds
--all NULLs: 1  second
set nocount on;
set @i = 0;
set @iterations = 50000;
declare @result nvarchar(255) = '';
set @testinput = replicate('1',255);
while @i < @iterations
begin
  ;with cte as(SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) p
               FROM sys.all_columns a CROSS JOIN sys.all_columns b)
  SELECT @result = STUFF((SELECT  ',' + CAST(p AS VARCHAR(100))
                FROM cte
                WHERE p <= LEN(@testinput) AND SUBSTRING(@testinput, p, 1) = '1'
                FOR XML PATH('')), 1, 1, '')
  print @result;
  set @i = @i + 1;
end;

Upvotes: 1

Giorgi Nakeuri
Giorgi Nakeuri

Reputation: 35790

Some tally table and xml solution:

DECLARE @STRING NVARCHAR(100) = '101011011000000000000000000000000000000000000000';

;with cte as(select ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) p 
             from (values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t1(n) cross join
                  (values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t2(n) cross join
                  (values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t3(n))
SELECT STUFF((SELECT  ',' + CAST(p AS VARCHAR(100))
              FROM cte
              WHERE p <= LEN(@STRING) AND SUBSTRING(@STRING, p, 1) = '1'
              FOR XML PATH('')), 1, 1, '')

You just generate numbers from 1 to 1000(add more joins if length of string is bigger) and with substring function filter needed values. Then standard trick for concatenating rows to comma separated value.

For old versions:

;with cte as(SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) p
             FROM sys.all_columns a CROSS JOIN sys.all_columns b)
SELECT STUFF((SELECT  ',' + CAST(p AS VARCHAR(100))
              FROM cte
              WHERE p <= LEN(@STRING) AND SUBSTRING(@STRING, p, 1) = '1'
              FOR XML PATH('')), 1, 1, '')

Here is a good article on generating ranges http://dwaincsql.com/2014/03/27/tally-tables-in-t-sql/

EDIT:

;with cte as(SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) p
               FROM (SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t1 CROSS JOIN 
                (SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t2 CROSS JOIN 
                (SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t3 CROSS JOIN 
                (SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t4 CROSS JOIN 
                (SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t5 CROSS JOIN 
                (SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t6)

Upvotes: 3

Related Questions