Reputation: 1990
I'm looking for the equivalent of a GROUP_CONCAT() MySQL function in SQL Server 2012 - THAT DOES NOT USE A SUBQUERY, explained below:
CREATE TABLE Temp
(
ID INT PRIMARY KEY NOT NULL IDENTITY(1,1),
ColA varchar(900) NULL,
ColB varchar(900) NULL
)
INSERT INTO Temp (ColA, ColB)
SELECT 'A', 'some' UNION ALL
SELECT 'A', 'thing' UNION ALL
SELECT 'A', 'and' UNION ALL
SELECT 'B', 'some' UNION ALL
SELECT 'B', 'more' UNION ALL
SELECT 'B', 'and' UNION ALL
SELECT 'B', 'more' UNION ALL
SELECT 'C', 'things' UNION ALL
SELECT 'C', 'things'
-- Desired Output. Note that the lists are in descending order of frequency ('more' appears twice)
ColA, Frequency, ColBs
'B', 4, 'more, some, and'
'A', 3, 'some, thing, and'
'C', 2, 'things'
SELECT
ColA,
COUNT(*) as Frequency,
GROUP_CONCAT(ColB) --Would be nice
FROM Temp
GROUP BY ColA
ORDER BY Frequency DESC
The common answer to this in SQL Server is using STUFF() on a subquery. In my case, the performance is simply unacceptable (200 million records, 26 seconds per subquery * 200 million = 164 years).
SELECT
ColA,
COUNT(*) as Frequency,
ISNULL(
STUFF((
SELECT ', ' + ColBs FROM
(SELECT ColBs, Count(*) as Frequency
FROM Temp sub
WHERE sub.ColA = t.ColA
GROUP BY ColB
ORDER BY Frequency DESC)
FOR XML PATH('')
), 1, 2, '')
), '') as ColBs --Would take 164 years on the entire data set
FROM Temp t
GROUP BY ColA
ORDER BY Frequency DESC
The desired output is the ColB values for each unique ColA, grouped together and in order of descending occurrence, as seen above. However, this needs to be done with a SINGLE QUERY through the table.
Do I need to construct this myself and abandon the 'GROUP BY' call? Iterate through the data set manually and build the new table through in a console application? Or is there something I'm missing?
Upvotes: 2
Views: 4445
Reputation: 124
Try this:
WITH prelim
AS
(
SELECT
cola
,colb
,count(*) AS recs
,row_number() over (partition BY cola ORDER BY count(*) DESC ,colb) AS recno
,Count(*) over (partition BY cola ) AS cnt
FROM TEMP
GROUP BY cola,colb ),
Group_Concat (recno,cnt,recs,cola,colbs)
AS
(
SELECT
recno
,cnt
,recs
,cola
,CAST (colb AS varchar(MAX)) AS colbs
FROM
prelim
WHERE
recno=1
UNION ALL
SELECT
p.recno
,p.cnt
,g.recs+p.recs
,p.cola
, g.colbs + ', ' + CAST (p.colb AS varchar(MAX)) AS colbs
FROM
prelim p
JOIN Group_Concat g ON p.cola=g.cola AND p.recno=g.recno+1
)
SELECT COLA,Recs as Frequency,COLBS
FROM Group_Concat
where recno=cnt
order by cola
Upvotes: 2