Reputation: 4919
I have string (VARCHAR(255)
) that contains only zeros or ones.
I need to search all positions and return them as comma separated string.
I've build two queries using solutions from https://dba.stackexchange.com/questions/41961/how-to-find-all-positions-of-a-string-within-another-string
Here is my code so far:
DECLARE @TERM VARCHAR(5);
SET @TERM = '1';
DECLARE @STRING VARCHAR(255);
SET @STRING = '101011011000000000000000000000000000000000000000';
DECLARE @RESULT VARCHAR(100);
SET @RESULT = '';
SELECT
@RESULT = @RESULT + CAST(X.pos AS VARCHAR(10)) + ','
FROM
( SELECT
pos = Number - LEN(@TERM)
FROM
( SELECT
Number
,Item = LTRIM(RTRIM(SUBSTRING(@STRING, Number, CHARINDEX(@TERM, @STRING + @TERM, Number) - Number)))
FROM
( SELECT ROW_NUMBER () OVER (ORDER BY [object_id]) FROM sys.all_objects
) AS n ( Number )
WHERE
Number > 1
AND Number <= CONVERT(INT, LEN(@STRING))
AND SUBSTRING(@TERM + @STRING, Number, LEN(@TERM)) = @TERM
) AS y
) X;
SELECT
SUBSTRING(@RESULT, 0, LEN(@RESULT));
DECLARE @POS INT;
DECLARE @OLD_POS INT;
DECLARE @POSITIONS VARCHAR(100);
SELECT
@POSITIONS = '';
SELECT
@OLD_POS = 0;
SELECT
@POS = PATINDEX('%1%', @STRING);
WHILE @POS > 0
AND @OLD_POS <> @POS
BEGIN
SELECT
@POSITIONS = @POSITIONS + CAST(@POS AS VARCHAR(2)) + ',';
SELECT
@OLD_POS = @POS;
SELECT
@POS = PATINDEX('%1%', SUBSTRING(@STRING, @POS + 1, LEN(@STRING))) + @POS;
END;
SELECT
LEFT(@POSITIONS, LEN(@POSITIONS) - 1);
I'm wondering if this can be done faster/better? I'm searching only for single character positions and I have only two characters that can occur in my string (0 and 1).
I've build two functions using this code, and run them for 1000 records and got same results in same time, so I can't tell which one is better.
for single record second part gives CPU and reads equals to 0 in Profiler, where first piece of code give me CPU=16 and reads=17.
I need to get result that looks like this: 1,3,5,6,8,9
(when multiple occurrences), 3
for single occurence, NONE
if there are no ones.
Upvotes: 1
Views: 2246
Reputation: 58795
Giorgi's response is very clever, but I'd prefer a more old-fashioned approach that's more readable. My suggestion, including test cases:
if object_id('UFN_CSVPOSITIONS') is not null
begin
drop function ufn_csvpositions;
end
go
create function dbo.UFN_CSVPOSITIONS
(
@string nvarchar(255)
,@delimiter nvarchar(1) = ','
)
returns nvarchar(255)
as
begin
--given a string that contains ones,
--return a comma-delimited list of the positions of those ones
--example: '1001' returns '1,4'
declare @result nvarchar(255) = '';
declare @i int = 1;
declare @slen int = len(@string);
declare @idx int = 0;
while @i < @slen
begin
set @idx = charindex('1',@string,@i);
if 0 = @idx
begin
set @i = @slen; --no more to be found, break out early
end
else
begin
set @result = @result + @delimiter + convert(nvarchar(3),@idx);
set @i = @idx; --jump ahead
end;
set @i = @i + 1;
end --while
if (0 < len(@result)) and (',' = substring(@result,1,1))
begin
set @result = substring(@result,2,len(@result)-1)
end
return @result;
end
go
--test cases
DECLARE @STRING NVARCHAR(255) = '';
set @string = '101011011000000000000000000000000000000000000000';
print dbo.UFN_CSVPOSITIONS(@string,',');
set @string = null;
print dbo.UFN_CSVPOSITIONS(@string,',');
set @string = '';
print dbo.UFN_CSVPOSITIONS(@string,',');
set @string = '1111111111111111111111111111111111111111111111111';
print dbo.UFN_CSVPOSITIONS(@string,',');
set @string = '0000000000000000000000000000000000000000000000000';
print dbo.UFN_CSVPOSITIONS(@string,',');
--lets try a very large # of test cases, see how fast it comes out
--255 "ones" should be the worst case scenario for performance, so lets run through 50k of those.
--on my laptop, here are test case results:
--all 1s : 13 seconds
--all 0s : 7 seconds
--all nulls: 1 second
declare @testinput nvarchar(255) = replicate('1',255);
declare @iterations int = 50000;
declare @i int = 0;
while @i < @iterations
begin
print dbo.ufn_csvpositions(@testinput,',');
set @i = @i + 1;
end;
--repeat the test using the CTE method.
--the same test cases are as follows on my local:
--all 1s : 18 seconds
--all 0s : 15 seconds
--all NULLs: 1 second
set nocount on;
set @i = 0;
set @iterations = 50000;
declare @result nvarchar(255) = '';
set @testinput = replicate('1',255);
while @i < @iterations
begin
;with cte as(SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) p
FROM sys.all_columns a CROSS JOIN sys.all_columns b)
SELECT @result = STUFF((SELECT ',' + CAST(p AS VARCHAR(100))
FROM cte
WHERE p <= LEN(@testinput) AND SUBSTRING(@testinput, p, 1) = '1'
FOR XML PATH('')), 1, 1, '')
print @result;
set @i = @i + 1;
end;
Upvotes: 1
Reputation: 35790
Some tally
table and xml
solution:
DECLARE @STRING NVARCHAR(100) = '101011011000000000000000000000000000000000000000';
;with cte as(select ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) p
from (values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t1(n) cross join
(values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t2(n) cross join
(values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t3(n))
SELECT STUFF((SELECT ',' + CAST(p AS VARCHAR(100))
FROM cte
WHERE p <= LEN(@STRING) AND SUBSTRING(@STRING, p, 1) = '1'
FOR XML PATH('')), 1, 1, '')
You just generate numbers from 1 to 1000(add more joins if length of string is bigger) and with substring
function filter needed values. Then standard trick for concatenating rows to comma separated value.
For old versions:
;with cte as(SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) p
FROM sys.all_columns a CROSS JOIN sys.all_columns b)
SELECT STUFF((SELECT ',' + CAST(p AS VARCHAR(100))
FROM cte
WHERE p <= LEN(@STRING) AND SUBSTRING(@STRING, p, 1) = '1'
FOR XML PATH('')), 1, 1, '')
Here is a good article on generating ranges http://dwaincsql.com/2014/03/27/tally-tables-in-t-sql/
EDIT:
;with cte as(SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) p
FROM (SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t1 CROSS JOIN
(SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t2 CROSS JOIN
(SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t3 CROSS JOIN
(SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t4 CROSS JOIN
(SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t5 CROSS JOIN
(SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t6)
Upvotes: 3