Reputation: 2253
I am new to SQL. My data is like this:
c1 c2 c3 group
1 2 1 1
1 2 2 1
3 5 6 1
2 4 7 2
5 3 4 2
2 1 3 2
6 4 4 2
7 8 3 3
1 2 2 3
3 2 2 3
For each group (indicated by the column group), I want to sample 50% of the records (keeping all the columns) and save the result into a new table.
Upvotes: 0
Views: 264
Reputation: 462
You can use NTILE. If you want each group to be order by other column, just change the ORDER BY
from NTILE(2) OVER ( PARTITION BY [t].[group] ORDER BY [t].[group] ) AS rn
IF OBJECT_ID('tempdb..#SampleData') IS NOT NULL
DROP TABLE #SampleData;
CREATE TABLE #SampleData
(
c1 INT ,
c2 INT ,
c3 INT ,
[group] INT
);
IF OBJECT_ID('tempdb..#OutputData') IS NOT NULL
DROP TABLE #OutputData;
CREATE TABLE #OutputData
(
c1 INT ,
c2 INT ,
c3 INT ,
[group] INT
);
INSERT INTO [#SampleData]
( [c1], [c2], [c3], [group] )
VALUES ( 1, -- c1 - int
2, -- c2 - int
1, -- c3 - int
1 -- groupNo - int
),
( 1, -- c1 - int
22, -- c2 - int
12, -- c3 - int
1 -- groupNo - int
),
( 144, -- c1 - int
244, -- c2 - int
144, -- c3 - int
1 -- groupNo - int
),
( 2, -- c1 - int
23, -- c2 - int
4, -- c3 - int
1 -- groupNo - int
),
( 13, -- c1 - int
24, -- c2 - int
14, -- c3 - int
1 -- groupNo - int
),
( 16, -- c1 - int
266, -- c2 - int
133, -- c3 - int
2 -- groupNo - int
),
( 16, -- c1 - int
266, -- c2 - int
133, -- c3 - int
2 -- groupNo - int
),
( 16, -- c1 - int
266, -- c2 - int
133, -- c3 - int
2 -- groupNo - int
),
( 123, -- c1 - int
293, -- c2 - int
123, -- c3 - int
2 -- groupNo - int
),
( 123, -- c1 - int
223, -- c2 - int
123, -- c3 - int
3 -- groupNo - int
)
;WITH cte
AS ( SELECT [t].[c1] ,
[t].[c2] ,
[t].[c3] ,
[t].[group] ,
NTILE(2) OVER ( PARTITION BY [t].[group] ORDER BY [t].[group] ) AS rn
FROM [#SampleData] AS [t]
)
INSERT INTO [#OutputData]
( [c1] ,
[c2] ,
[c3] ,
[group]
)
SELECT [cte].[c1] ,
[cte].[c2] ,
[cte].[c3] ,
[cte].[group]
FROM [cte]
WHERE rn = 1;
SELECT [sd].[c1] ,
[sd].[c2] ,
[sd].[c3] ,
[sd].[group]
FROM [#OutputData] AS [sd];
Upvotes: 2