gkoul
gkoul

Reputation: 1277

Get unique values using STRING_AGG in SQL Server

The following query returns the results shown below:

SELECT 
    ProjectID, newID.value
FROM 
    [dbo].[Data] WITH(NOLOCK)  
CROSS APPLY 
    STRING_SPLIT([bID],';') AS newID  
WHERE 
    newID.value IN ('O95833', 'Q96NY7-2') 

Results:

ProjectID   value
---------------------
2           Q96NY7-2
2           O95833
2           O95833
2           Q96NY7-2
2           O95833
2           Q96NY7-2
4           Q96NY7-2
4           Q96NY7-2

Using the newly added STRING_AGG function (in SQL Server 2017) as it is shown in the following query I am able to get the result-set below.

SELECT 
    ProjectID,
    STRING_AGG( newID.value, ',') WITHIN GROUP (ORDER BY newID.value) AS 
NewField
FROM
    [dbo].[Data] WITH(NOLOCK)  
CROSS APPLY 
    STRING_SPLIT([bID],';') AS newID  
WHERE 
    newID.value IN ('O95833', 'Q96NY7-2')  
GROUP BY 
    ProjectID
ORDER BY 
    ProjectID

Results:

ProjectID   NewField
-------------------------------------------------------------
2           O95833,O95833,O95833,Q96NY7-2,Q96NY7-2,Q96NY7-2
4           Q96NY7-2,Q96NY7-2

I would like my final output to have only unique elements as below:

ProjectID   NewField
-------------------------------
2           O95833, Q96NY7-2
4           Q96NY7-2

Any suggestions about how to get this result? Please feel free to refine/redesign from scratch my query if needed.

Upvotes: 73

Views: 184592

Answers (11)

JohnLBevan
JohnLBevan

Reputation: 24410

Use the DISTINCT keyword in a subquery to remove duplicates before combining the results: SQL Fiddle

SELECT 
 ProjectID
,STRING_AGG(value, ',') WITHIN GROUP (ORDER BY value) AS 
NewField
FROM (
    SELECT DISTINCT 
      ProjectID
    , newId.value 
    FROM [dbo].[Data] WITH (NOLOCK)  
    CROSS APPLY STRING_SPLIT([bID],';') AS newId  
    WHERE newId.value IN (   'O95833' , 'Q96NY7-2'  )  
) x
GROUP BY ProjectID
ORDER BY ProjectID

Upvotes: 69

Krzysztof Krysztofczyk
Krzysztof Krysztofczyk

Reputation: 499

You can use that function to remove duplicates:

CREATE FUNCTION fn_DistinctSeparatedList (@InputString VARCHAR(MAX), @separator nvarchar(10))
RETURNS VARCHAR(MAX)
AS
BEGIN
    DECLARE @Items TABLE (Item VARCHAR(MAX));

    INSERT INTO @Items
    SELECT value 
    FROM STRING_SPLIT(replace(@InputString, @separator,'~'),'~')
    WHERE value IS NOT NULL AND value != '';

    WITH DistinctItems AS (
        SELECT DISTINCT Item AS Item
        FROM @Items
    )

    SELECT @InputString = STRING_AGG(Item, '~') 
    FROM DistinctItems;
    
    RETURN Replace(@InputString,'~',@separator);
END

you can use that this way: let's create the table to have some data:

drop table if exists #PetsOwner
Select 'Olivier' as Person, 'Cat' as Pet, 'Charlie' as PetName
into #PetsOwner
union
Select 'Olivier' as Person, 'cat' as Pet, 'Luna' as PetName
union
Select 'Olivier' as Person, 'Cat' as Pet, 'Cooper '  as PetName
union 
Select 'Leo' as Person, 'Cat' as Pet, 'Daisy'  as PetName
union 
Select 'Leo' as Person, 'Dog' as Pet, 'Milo'  as PetName
union
Select 'Michael' as Person, 'Fish' as Pet, 'Max'  as PetName

And now we can aggregate with duplicates:

select Person, STRING_AGG(Pet, ', ')
from #PetsOwner
group by Person

Or without duplicates with the usage of that function:

select Person, fn_DistinctSeparatedList(STRING_AGG(Pet, ', '),', ')
from #PetsOwner
group by Person

Upvotes: 2

Martin Smith
Martin Smith

Reputation: 452947

For your particular case instead of exploding the values from the rows out and intermingling them and then needing to use GROUP BY to reassemble them you can just do the following (Fiddle).

SELECT ProjectId, 
      NewField = (SELECT  STRING_AGG( value, ',') WITHIN GROUP (ORDER BY value) FROM  (SELECT DISTINCT value FROM STRING_SPLIT(bID,';') WHERE value IN ('O95833', 'Q96NY7-2') )X)
FROM [data]

In the more general case - e.g. with the starting point in Darryl's answer you could use

 WITH T AS
 (
SELECT *,  
       ROW_NUMBER() OVER (PARTITION BY ProjectID, value ORDER BY ProjectID, value) AS RN
FROM #data d
)
SELECT ProjectID,
       SUM(Cost),
       STRING_AGG(CASE WHEN RN = 1 THEN value END, ',') WITHIN GROUP (ORDER BY value)
FROM T
GROUP BY ProjectID
ORDER BY ProjectID

This can use a single sort on ProjectID, value to both apply the row numbering and for the subsequent GROUP BY ProjectID and WITHIN GROUP (ORDER BY value)

Fiddle

Upvotes: 3

Darryl McKenna
Darryl McKenna

Reputation: 1

In case you want to include other aggregates with your query, you can do:

DROP TABLE IF EXISTS #data
CREATE TABLE #data (row_id INT IDENTITY(1,1), projectID INT, value NVARCHAR(40), cost FLOAT)
INSERT INTO #data(projectID, value, cost )
VALUES 
 (2,'Q96NY7-2',100) 
,(2,'O95833'  ,100) 
,(2,'O95833'  ,100) 
,(2,'Q96NY7-2',100) 
,(2,'O95833'  ,100) 
,(2,'Q96NY7-2',100) 
,(4,'Q96NY7-2',100) 
,(4,'Q96NY7-2',100) 
 
SELECT projectID  = d.projectID
     , value      = REPLACE(STRING_AGG(IIF(x.row_id = d.row_id, x.value, '(x)'),',')   WITHIN GROUP (ORDER BY IIF(x.row_id = d.row_id, x.value, '(x)')), '(x),','')
     , Cost       = SUM(d.COST)
FROM #data d
JOIN (  SELECT DISTINCT projectid, value, row_id = MIN(row_id) 
        FROM #data 
        GROUP BY projectid, value 
     ) x ON x.projectid = d.projectid AND x.value = d.value
GROUP BY d.projectID
projectID value Cost
2 O95833,Q96NY7-2 600
4 Q96NY7-2 200

Upvotes: -1

Domagoj Peharda
Domagoj Peharda

Reputation: 133

Here is my improvement on @ttugates to make it more generic:

CREATE OR ALTER FUNCTION [dbo].[fn_DistinctList]
(
  @String NVARCHAR(MAX),
  @Delimiter char(1)
)
RETURNS NVARCHAR(MAX)
WITH SCHEMABINDING
AS
BEGIN
  DECLARE @Result NVARCHAR(MAX);
  WITH MY_CTE AS ( SELECT Distinct(value) FROM STRING_SPLIT(@String, 
@Delimiter)  )
  SELECT @Result = STRING_AGG(value, @Delimiter) FROM MY_CTE
  RETURN @Result
END

Upvotes: 6

sasynkamil
sasynkamil

Reputation: 944

Oracle (since version 19c) suports listagg (DISTINCT ..., but Microsoft SQL Server not probably.

Upvotes: -7

ttugates
ttugates

Reputation: 6271

This is a function that I wrote that answers the OP Title: Improvements welcome!

CREATE OR ALTER FUNCTION [dbo].[fn_DistinctWords]
(
  @String NVARCHAR(MAX)  
)
RETURNS NVARCHAR(MAX)
WITH SCHEMABINDING
AS
BEGIN
  DECLARE @Result NVARCHAR(MAX);
  WITH MY_CTE AS ( SELECT Distinct(value) FROM STRING_SPLIT(@String, ' ')  )
  SELECT @Result = STRING_AGG(value, ' ') FROM MY_CTE
  RETURN @Result
END
GO

Use like:

SELECT dbo.fn_DistinctWords('One Two      Three Two One');

Upvotes: 16

gil.fernandes
gil.fernandes

Reputation: 14591

Another possibility to get unique strings from STRING_AGG would be to perform these three steps after fetching the comma separated string:

  1. Split the string (STRING_SPLIT)
  2. Select DISTINCT from the splits
  3. Apply STRING_AGG again to a select with a group on a single key

Example:

(select STRING_AGG(CAST(value as VARCHAR(MAX)), ',') 
        from (SELECT distinct 1 single_key, value 
            FROM STRING_SPLIT(STRING_AGG(CAST(customer_division as VARCHAR(MAX)), ','), ',')) 
                q group by single_key) as customer_division

Upvotes: 3

Thomas Riedel
Thomas Riedel

Reputation: 71

You can make a distinct view of the table, that holds the aggregate values, that is even simpler:

Create Table Test (field1 varchar(1), field2 varchar(1));

go

Create View DistinctTest as (Select distinct field1, field2 from test group by field1,field2);

go

insert into Test Select 'A', '1';
insert into Test Select 'A', '2';
insert into Test Select 'A', '2';
insert into Test Select 'A', '2';
insert into Test Select 'D', '1';
insert into Test Select 'D', '1';

select string_agg(field1, ',')  from Test where field2 = '1';  /* duplicates: A,D,D */;

select string_agg(field1, ',')  from DistinctTest where field2 = '1';  /* no duplicates: A,D  */;

Upvotes: 0

John Bustos
John Bustos

Reputation: 19544

As @SeanLange pointed out in the comments, this is a terrible way to pull out the data, but if you had to, just make it 2 separate queries as follows:

SELECT 
    ProjectID
    ,STRING_AGG( val, ',') WITHIN GROUP (ORDER BY val) AS NewField
FROM
(
    SELECT DISTINCT 
        ProjectID
        ,newID.value AS val
    FROM 
        [dbo].[Data] WITH(NOLOCK)  
        CROSS APPLY STRING_SPLIT([bID],';') AS newID  
    WHERE 
        newID.value IN ('O95833' , 'Q96NY7-2') 
) t
GROUP BY
    ProjectID

That should do it.

Upvotes: 3

Gordon Linoff
Gordon Linoff

Reputation: 1269443

You can use distinct in the subquery used for the apply:

SELECT d.ProjectID,
       STRING_AGG(  newID.value, ',') WITHIN GROUP (ORDER BY newID.value) AS 
NewField
FROM [dbo].[Data] d CROSS APPLY
     (select distinct value
      from STRING_SPLIT(d.[bID], ';') AS newID 
     ) newID
WHERE newID.value IN (   'O95833' , 'Q96NY7-2'  ) 
group by projectid;

Upvotes: 7

Related Questions