Reputation: 947
Suppose I have a table with values like this:
CREATE TABLE foo
(
ID int NOT NULL,
Box1 varchar(50),
Box2 varchar(50),
Box3 varchar(50),
CONSTRAINT PK_foo PRIMARY KEY (ID)
);
INSERT INTO foo (ID, Box1, Box2, Box3) VALUES
(0, 'FOOBIE BLETCH', NULL, NULL),
(1, 'DUAM XNAHT', NULL, NULL),
(2, 'HACKEM MUCHE', 'FNORD', NULL),
(3, 'DAIYEN FOOELS', 'ELBIB YLOH', 'GARVEN DEH'),
(4, 'JUYED AWK YACC', 'FNORD', NULL),
(5, 'FOOBIE BLETCH', NULL, NULL),
(6, 'JUYED AWK YACC', 'FOOBIE BLETCH', NULL),
(7, 'HACKEM MUCHE', 'FNORD', 'FOOBIE BLETCH'),
(8, 'DAIYEN FOOELS', 'GARVEN DEH', 'ELBIB YLOH')
How do I find values of foo.ID
where any combination of Box1
, Box2
, and Box3
contain specified values? The order of the values to be found does not matter. Rows that contain more values than the ones being looked for should appear in the results. e.g.
DECLARE @ArgValue varchar
SET @ArgValue = 'FOOBIE BLETCH' -- match 0, 5, 6, 7
SET @ArgValue = 'GARVEN DEH, DAIYEN FOOELS, ELBIB YLOH' -- match 3, 8
SET @ArgValue = 'FNORD, JUYED AWK YACC' -- match 4
Assume (if necessary) the existence of a stored proc ArgVal_Split(@ArgVal varchar(max), @Delimiter char(1))
that can extract the substrings and return them as a single-column table.
My real data is actually much more complicated than this (one table has 20 different columns that might match) so I'm looking for solutions that don't involve enumerating the column names.
Upvotes: 4
Views: 1670
Reputation: 947
-- uncomment the 'create table' commands the first time you run it, and then comment them out again
-- requires costest_split, with fixes
---- this table stands for CostEst_ClaimInfo
CREATE TABLE foo
(
ID int NOT NULL,
Box1 varchar(50),
Box2 varchar(50),
Box3 varchar(50),
CONSTRAINT PK_foo PRIMARY KEY (ID)
);
---- This table stands for CostEst_ClaimDiagnoses
create table FooCrossRef
(
ID int NOT NULL,
BoxVal varchar(50)
)
---- This table stands for #tdiags
create table FooVals ( ABoxVal varchar(50) )
---- This table stands for #tClaims
CREATE TABLE fooResults
(
ID int NOT NULL,
Box1 varchar(50),
Box2 varchar(50),
Box3 varchar(50),
CONSTRAINT PK_fooRes PRIMARY KEY (ID)
);
-- empty out all the tables every time, for simplicity
delete from foo
delete from FooCrossRef
delete from FooVals
delete from fooResults
declare @BoxCount int
INSERT INTO foo (ID, Box1, Box2, Box3) VALUES
(0, 'FOOBIE BLETCH', NULL, NULL),
(1, 'DUAM XNAHT', NULL, NULL),
(2, 'HACKEM MUCHE', 'FNORD', NULL),
(3, 'DAIYEN FOOELS', 'ELBIB YLOH', 'GARVEN DEH'),
(4, 'JUYED AWK YACC', 'FNORD', NULL),
(5, 'FOOBIE BLETCH', NULL, NULL),
(6, 'JUYED AWK YACC', 'FOOBIE BLETCH', NULL),
(7, 'HACKEM MUCHE', 'FNORD', 'FOOBIE BLETCH'),
(8, 'DAIYEN FOOELS', 'GARVEN DEH', 'ELBIB YLOH')
,(9, 'JUYED AWK YACC', 'GARVEN DEH', 'ELBIB YLOH')
,(10, 'ELBIB YLOH', NULL, NULL)
,(11, 'JUYED AWK YACC', 'FOOBIE BLETCH', 'FNORD')
DECLARE @ArgValue varchar(max)
-- unit-test arg values
--SET @ArgValue = 'FOOBIE BLETCH' -- match 0, 5, 6, 7, 11
--SET @ArgValue = 'GARVEN DEH, DAIYEN FOOELS, ELBIB YLOH' -- match 3, 8
--SET @ArgValue = 'FNORD, JUYED AWK YACC' -- match 4, 11
SET @ArgValue = 'FOOBIE BLETCH, LEP GEX VEN ZEA' -- match nothing
insert into FooCrossRef (ID, BoxVal )
select ID, Box
from
(
select foo.ID, Box1, Box2, Box3 from foo
) as PVT
UNPIVOT (Box for position in (Box1, Box2, Box3)) as UnPvt
-- Do the thing with the argument
insert into FooVals exec costest_split @OrigString = @ArgValue, @Delimiter = ',';
set @BoxCount = (select Count(distinct ABoxVal) from FooVals)
insert into fooResults (ID, Box1, Box2, Box3)
(
select distinct foo.ID, Box1, Box2, Box3 from foo
join FooCrossRef on foo.ID = FooCrossRef.ID
join FooVals on FooCrossRef.BoxVal = FooVals.ABoxVal
group by foo.ID, Box1, Box2, Box3
having count(distinct FooCrossRef.BoxVal) >= @BoxCount
)
select * from fooResults
Upvotes: 0
Reputation: 67311
This design smells...
Whenever you want to add columns with numbers (often seen: Telephone1, Telephone2...) you'd rather take a 1:n-related side-table!
But still this is possible:
DECLARE @foo TABLE
(
ID int NOT NULL,
Box1 varchar(50),
Box2 varchar(50),
Box3 varchar(50)
);
INSERT INTO @foo (ID, Box1, Box2, Box3) VALUES
(0, 'FOOBIE BLETCH', NULL, NULL),
(1, 'DUAM XNAHT', NULL, NULL),
(2, 'HACKEM MUCHE', 'FNORD', NULL),
(3, 'DAIYEN FOOELS', 'ELBIB YLOH', 'GARVEN DEH'),
(4, 'JUYED AWK YACC', 'FNORD', NULL),
(5, 'FOOBIE BLETCH', NULL, NULL),
(6, 'JUYED AWK YACC', 'FOOBIE BLETCH', NULL),
(7, 'HACKEM MUCHE', 'FNORD', 'FOOBIE BLETCH'),
(8, 'DAIYEN FOOELS', 'GARVEN DEH', 'ELBIB YLOH');
DECLARE @ArgValue VARCHAR(100);
--SET @ArgValue = 'FOOBIE BLETCH'; -- match 0, 5, 6, 7
SET @ArgValue = 'GARVEN DEH, DAIYEN FOOELS, ELBIB YLOH'; -- match 3, 8
--SET @ArgValue = 'FNORD, JUYED AWK YACC'; -- match 4
WITH DerivedTable AS
(
SELECT x.value('.','nvarchar(max)') aS part
FROM
(
SELECT CAST('<x>' + REPLACE(@ArgValue,', ','</x><x>') + '</x>' AS XML)
) AS tbl(Casted)
CROSS APPLY Casted.nodes('/x') AS A(x)
)
SELECT ID
FROM @foo AS f
CROSS APPLY (VALUES(Box1),(Box2),(Box3)) AS boxes(box)
INNER JOIN DerivedTable AS dt ON box=part
GROUP BY ID
HAVING COUNT(ID)>=(SELECT COUNT(*) FROM DerivedTable)
A short explanation:
By replacing ,
with xml-tags you transfrom your string 'GARVEN DEH, DAIYEN FOOELS, ELBIB YLOH'
to '<x>GARVEN DEH</x><x>DAIYEN FOOELS</x><x>ELBIB YLOH</x>'
. This is XML and can be easily used as derived table.
Worth to mention, that your string should not include forbidden characters like <
, >
and &
. If you need this, you'd find a sure solution here.
The select uses CROSS APPLY
with VALUES
, a trick to UNPIVOT
and performs an INNER JOIN
. Only the result are valid, where the count is the same, as there are parts in the search argument.
Upvotes: 2
Reputation: 17915
Split @ArgValue
into @a1, @a2, @a3
using any of a million solutions out there. The easiest is probably just nested charindex()
expressions. (I see you have twenty so you may just want a loop.) The actually query is fairly simple.
select *
from foo
where
coalesce(@a1, '!@#$%') in (Box1, Box2, Box3, '!@#$%')
and coalesce(@a2, '!@#$%') in (Box1, Box2, Box3, '!@#$%')
and coalesce(@a3, '!@#$%') in (Box1, Box2, Box3, '!@#$%');
This query is not likely to be very fast. If this is a core operation in your system you should reconsider the design.
Upvotes: 0
Reputation: 1990
select fooID from dbo.IntersectFooBoxes('FOOBIE BLETCH')
--RESULT: 0,5,6,7
select fooID from dbo.IntersectFooBoxes('GARVEN DEH, DAIYEN FOOELS, ELBIB YLOH')
--RESULT: 3,8
select fooID from dbo.IntersectFooBoxes('FNORD, JUYED AWK YACC')
--RESULT: 4
CREATE TABLE foo
(
ID int NOT NULL PRIMARY KEY,
Box1 varchar(50),
Box2 varchar(50),
Box3 varchar(50)
)
GO
INSERT INTO foo (ID, Box1, Box2, Box3)
SELECT 0, 'FOOBIE BLETCH', NULL, NULL UNION ALL
SELECT 1, 'DUAM XNAHT', NULL, NULL UNION ALL
SELECT 2, 'HACKEM MUCHE', 'FNORD', NULL UNION ALL
SELECT 3, 'DAIYEN FOOELS', 'ELBIB YLOH', 'GARVEN DEH' UNION ALL
SELECT 4, 'JUYED AWK YACC', 'FNORD', NULL UNION ALL
SELECT 5, 'FOOBIE BLETCH', NULL, NULL UNION ALL
SELECT 6, 'JUYED AWK YACC', 'FOOBIE BLETCH', NULL UNION ALL
SELECT 7, 'HACKEM MUCHE', 'FNORD', 'FOOBIE BLETCH' UNION ALL
SELECT 8, 'DAIYEN FOOELS', 'GARVEN DEH', 'ELBIB YLOH'
GO
This function takes a single value and returns any fooIDs that contain this value in any of the desired columns. Use this function to have a single place to define which columns to search.
CREATE FUNCTION dbo.LookupFooBoxes (@Value varchar(50))
RETURNS @results TABLE
(
-- Columns returned by the function
ID INT PRIMARY KEY NOT NULL IDENTITY(1,1),
fooID INT
)
AS
BEGIN
INSERT INTO @results (fooID)
SELECT ID
FROM
(
SELECT ID FROM foo WHERE Box1 = @Value
UNION
SELECT ID FROM foo WHERE Box2 = @Value
UNION
SELECT ID FROM foo WHERE Box3 = @Value
) matches
ORDER BY ID asc
RETURN;
END
This function takes a @Values
parameter with a separated 'list' of values, and a @Split
parameter that defines what to split on (default: comma), and also trims any spaces out of the resulting values
CREATE FUNCTION dbo.Split (@Values varchar(max), @Split varchar(5) = ',')
RETURNS @result TABLE
(
-- Columns returned by the function
ID INT PRIMARY KEY NOT NULL IDENTITY(1,1),
Value varchar(max)
)
AS
BEGIN
DECLARE @X xml
SELECT @X = CONVERT(xml,' <root> <s>' + REPLACE(@Values, @Split,'</s> <s>') + '</s> </root> ')
INSERT INTO @result (Value)
SELECT LTRIM(RTRIM(T.c.value('.','varchar(max)')))
FROM @X.nodes('/root/s') T(c)
RETURN;
END
This function takes a comma separated list of values as a single parameter, and iterates through them with cursor. If there are no values it returns an empty table, for the first value it just uses dbo.LookupFooBoxes to find all the rows that contain that value, and for any other values it removes IDs that do not contain it, resulting in the intersection of only those fooIDs that have all the elements.
CREATE FUNCTION dbo.IntersectFooBoxes (@Values varchar(max))
RETURNS @results TABLE
(
-- Columns returned by the function
ID INT PRIMARY KEY NOT NULL IDENTITY(1,1),
fooID INT
)
AS
BEGIN
DECLARE c CURSOR FOR SELECT Value FROM dbo.Split(@Values, ',')
OPEN c
DECLARE @value varchar(max)
FETCH NEXT FROM c INTO @value
--No Elements
IF @@FETCH_STATUS <> 0
RETURN;
INSERT INTO @results (fooID)
SELECT fooID FROM dbo.LookupFooBoxes(@Value)
FETCH NEXT FROM c INTO @value
WHILE @@FETCH_STATUS = 0
BEGIN
DELETE @results
FROM
@results r
LEFT OUTER JOIN dbo.LookupFooBoxes(@Value) m ON m.fooID = r.fooID
WHERE
m.fooID is null
FETCH NEXT FROM c INTO @value
END
RETURN;
END
Upvotes: 0
Reputation: 350290
First of all, you need to correct the datatype of the argument, otherwise it will be a single character:
DECLARE @ArgValue varchar(1000)
You could then use a recursive query to split the argument into its individual terms -- one row per term -- and then join each result with the foo
table using an in
operator. Finally, filter the result for cases where the number of join-matches equals the number of words in the argument.
;with terms(term, rest) as (
select cast(LEFT(@ArgValue, CHARINDEX(',',@ArgValue+',')-1) as varchar(1000))
as term,
LTRIM(STUFF(@ArgValue, 1, CHARINDEX(',',@ArgValue+','), '')) as rest
union all
select cast(LEFT(rest, CHARINDEX(',',rest+',')-1) as varchar(1000)),
LTRIM(STUFF(rest, 1, CHARINDEX(',',rest+','), ''))
from terms
where rest > ''
)
select foo.id
from foo
inner join terms on term in (box1, box2, box3)
group by foo.id
having count(term) = (select count(*) from terms)
The recursive common table expression can be replaced by the ArgVal_Split()
results, but performance may vary.
The column names are only enumerated at one place, just as they are. Even if you have 20, that should not be an issue.
Upvotes: 1
Reputation: 22811
I have no ArgVal_Split() function, a number of splitters can be found.
with argVal as ( -- simulates ArgVal_Split() results
select 'GARVEN DEH' as txt
union select 'DAIYEN FOOELS'
union select 'ELBIB YLOH'
)
select *
from foo
where 0 = (select count(*) n from (
select Box1 x
union select Box2
union select Box3
except
select txt
from argVal --from ArgVal_Split(@ArgVal, ',')
) t )
This part select Box1 x union select Box2 union select Box3
can be adapted to variable number of columns only with dynamic sql. No way to iterate columns in regular sql.
Upvotes: 0