Homer Jay Simpson
Homer Jay Simpson

Reputation: 1280

Table function in SQL Server with multiple parameters as an argument

I have a table in SQL Server 2016 named df:

-- Create a new table with department and gender columns
CREATE TABLE df 
(
    country VARCHAR(50),
    year INT,
    val1 INT,
    val2 INT,
    val3 INT,
    department VARCHAR(50),
    gender VARCHAR(10)
);

-- Insert data into the new table, including department and gender
INSERT INTO df (country, year, val1, val2, val3, department, gender) 
VALUES ('USA', 2020, 4, 4, 5, 'Sales', 'Male'),
('USA', 2020, 4, 4, 5, 'Sales', 'Male'),
('USA', 2020, 5, 5, 5, 'Sales', 'Female'),
('USA', 2020, 5, 5, 5, 'Sales', 'Female'),
('USA', 2020, 1, 1, 5, 'Sales', 'Male'),
('USA', 2020, 3, 3, 5, 'Sales', 'Female'),
('USA', 2020, 4, 2, 5, 'Sales', 'Male'),
('USA', 2020, 1, 1, 5, 'Sales', 'Female'),
('USA', 2020, 2, 2, 5, 'Sales', 'Male'),
('Canada', 2020, 2, 2, 3, 'HR', 'Female'),
('Canada', 2020, 2, 2, 3, 'HR', 'Female'),
('Canada', 2020, 2, 2, 3, 'HR', 'Male'),
('Canada', 2020, 2, 2, 3, 'HR', 'Male'),
('Canada', 2020, 5, 5, 3, 'HR', 'Female'),
('Canada', 2020, 5, 5, 3, 'HR', 'Male'),
('Canada', 2020, 1, 1, 3, 'HR', 'Female'),
('Canada', 2020, 1, 1, 3, 'HR', 'Male'),
('Canada', 2020, 3, 4, 3, 'HR', 'Female'),
('Canada', 2020, 3, 4, 3, 'HR', 'Male'),
('Canada', 2020, 5, 4, 3, 'HR', 'Female'),
('Canada', 2020, 5, 4, 5, 'HR', 'Male'),
('Canada', 2020, 5, 4, 5, 'HR', 'Female'),
('Germany', 2022, 5, 5, 4, 'IT', 'Male'),
('France', 2020, 1, 1, 2, 'Finance', 'Female'),
('France', 2020, 1, 1, 2, 'Finance', 'Female'),
('France', 2020, 3, 2, 2, 'Finance', 'Male'),
('France', 2020, 3, 4, 2, 'Finance', 'Female'),
('France', 2020, 3, 5, 5, 'Finance', 'Male'),
('France', 2020, 3, 4, 4, 'Finance', 'Female'),
('France', 2020, 3, 4, 4, 'Finance', 'Male'),
('France', 2020, 3, 4, 3, 'Finance', 'Female'),
('UK', 2021, 4, 2, 3, 'Marketing', 'Male'),
('Australia', 2022, 3, 3, 4, 'Support', 'Female'),
('Italy', 2020, 5, 5, 5, 'Operations', 'Male'),
('Italy', 2020, 5, 5, 5, 'Operations', 'Female'),
('Italy', 2020, 5, 1, 1, 'Operations', 'Male'),
('Italy', 2020, 4, 4, 1, 'Operations', 'Female'),
('Italy', 2020, 2, 1, 2, 'Operations', 'Male'),
('Italy', 2020, 3, 5, 3, 'Operations', 'Female'),
('Spain', 2021, 1, 2, 3, 'Customer Service', 'Male'),
('Mexico', 2022, 4, 4, 4, 'Logistics', 'Female'),
('Brazil', 2020, 4, 1, 1, 'R&D', 'Male'),
('Brazil', 2020, 4, 1, 1, 'R&D', 'Female'),
('Brazil', 2020, 4, 3, 4, 'R&D', 'Male'),
('Brazil', 2020, 5, 3, 5, 'R&D', 'Female'),
('Brazil', 2020, 5, 3, 5, 'R&D', 'Male'),
('Brazil', 2020, 3, 3, 1, 'R&D', 'Female'),
('Brazil', 2020, 2, 3, 1, 'R&D', 'Male');

-- Select all rows from the new table to check the data
SELECT * FROM df;

With this table, I create some percentages and a count column based on some filtering.

-- Parameters
DECLARE @Year INT = 2020;
DECLARE @Metric VARCHAR(50) = 'count'; 
DECLARE @Gender VARCHAR(20) = NULL; -- Set to specific gender (e.g., 'Male', 'Female') or NULL to include all
DECLARE @Department VARCHAR(50) = NULL; -- Set to specific department (e.g., 'HR', 'Engineering') or NULL to include all
-- Set @Metric to 'dissatisfaction', 'satisfaction', or 'count'

WITH UnpivotedData AS 
(
    SELECT country, gender, department, year, Vals
    FROM 
        (SELECT country, gender, department, year, val1, val2, val3
         FROM df) AS SourceTable
    UNPIVOT 
        (Vals FOR ValueColumn IN (val1, val2, val3)) AS Unpivoted
    WHERE year = @Year
),
Proportions AS 
(
    SELECT 
        country,
        gender,
        department,
        CASE 
            WHEN Vals = 1 THEN 'Very Dissatisfied'
            WHEN Vals = 2 THEN 'Dissatisfied'
            WHEN Vals = 3 THEN 'Neutral'
            WHEN Vals = 4 THEN 'Satisfied'
            WHEN Vals = 5 THEN 'Very Satisfied'
        END AS SatisfactionLevel,
        COUNT(*) * 1.0 / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department) AS Proportion
    FROM 
        UnpivotedData
    GROUP BY 
        country, gender, department, Vals
),
Pivoted AS 
(
    SELECT country, gender, department, 
           [Very Dissatisfied], 
           [Dissatisfied], 
           [Neutral], 
           [Satisfied], 
           [Very Satisfied]
    FROM Proportions
    PIVOT 
        (MAX(Proportion)
         FOR SatisfactionLevel IN ([Very Dissatisfied], [Dissatisfied], [Neutral], [Satisfied], [Very Satisfied])) AS p
),
CountryCounts AS 
(
    SELECT 
        CASE WHEN country IS NULL THEN 'Unknown' ELSE country END AS country,
        gender, 
        department,
        COUNT(*) AS Total
    FROM df
    WHERE year = @Year
    -- Apply filters for gender and department if provided
    AND (@Gender IS NULL OR gender = @Gender)
    AND (@Department IS NULL OR department = @Department)
    GROUP BY CASE WHEN country IS NULL THEN 'Unknown' ELSE country END, gender, department
),
OrderedData AS 
(
    SELECT 
        p.country,
        p.gender,
        p.department,
        [Very Dissatisfied],
        [Dissatisfied],
        [Neutral],
        [Satisfied],
        [Very Satisfied],
        c.Total,
        CASE 
            WHEN @Metric = 'satisfaction' THEN ISNULL([Satisfied], 0) + ISNULL([Very Satisfied], 0)
            WHEN @Metric = 'dissatisfaction' THEN ISNULL([Very Dissatisfied], 0) + ISNULL([Dissatisfied], 0)
            WHEN @Metric = 'count' THEN c.Total
        END AS SortValue
    FROM Pivoted AS p
    INNER JOIN CountryCounts AS c ON p.country = c.country AND p.gender = c.gender AND p.department = c.department
)
SELECT 
    country,
    gender,
    department,
    [Very Dissatisfied],
    [Dissatisfied],
    [Neutral],
    [Satisfied],
    [Very Satisfied],
    Total
FROM 
    OrderedData
ORDER BY 
    SortValue DESC;

I want to create a table function that will have 3 arguments:

  1. Metric
  2. Year
  3. Factor

Factor can be the Gender or the Department or both of them. If for example the Factor is the Gender the table to be grouped by the Gender and if is the Department to be grouped by Department.

If both to be grouped by both. If the Factor is null or default to not be grouped at all.

Regarding the Year: if the Year is passed in to be grouped by year. If the Year is null, show all the years without grouping.

Is there a way to do that in SQL Server?

I have a fiddle here

Upvotes: 1

Views: 114

Answers (2)

Charlieface
Charlieface

Reputation: 71544

As I told you on your previous SQL question, you are hugely over-complicating this.

You can do the filtering, unpivot and pivot in a single level of CTE, and you only need one level to add the Total, which itself wouldn't be necessary if there was an ID column, because then you could do COUNT(DISTINCT ID).

To create a function, just add the normal CREATE FUNCTION syntax. You can't add ORDER BY to a table function, it's basically just a view. You need to add that on to the outer query.

CREATE OR ALTER FUNCTION dbo.MyAggregation (
    @Year INT,
    @Gender VARCHAR(20), -- Set to specific gender (e.g., 'Male', 'Female') or NULL to include all
    @Department VARCHAR(50), -- Set to specific department (e.g., 'HR', 'Engineering') or NULL to include all
    @Metric VARCHAR(50) -- Set @Metric to 'dissatisfaction', 'satisfaction', or 'count'
)
RETURNS TABLE
AS RETURN

WITH AllRows AS (
      SELECT *,
          COUNT(*) OVER (PARTITION BY country, gender, department, year) AS Total
      FROM df
      WHERE (year = @Year OR @Year IS NULL)
        AND (Department = @Department OR @DepARTMENT IS NULL)
        AND (Gender = @Gender OR @Gender IS NULL)
)
SELECT
      country,
      gender,
      department,
      year,
      COUNT(CASE WHEN Vals = 1 THEN 1 END) * 1.0
         / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year) AS [Very Dissatisfied],
      COUNT(CASE WHEN Vals = 2 THEN 1 END) * 1.0
         / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year) AS [Dissatisfied],
      COUNT(CASE WHEN Vals = 3 THEN 1 END) * 1.0
         / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year) AS [Neutral],
      COUNT(CASE WHEN Vals = 4 THEN 1 END) * 1.0
         / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year) AS [Satisfied],
      COUNT(CASE WHEN Vals = 5 THEN 1 END) * 1.0
         / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year) AS [Very Satisfied],
      MIN(Total) AS Total,
      CASE @Metric
        WHEN 'satisfaction' THEN
          COUNT(CASE WHEN Vals IN (4, 5) THEN 1 END) * 1.0
            / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year)
        WHEN 'dissatisfaction' THEN
          COUNT(CASE WHEN Vals IN (1, 2) THEN 1 END) * 1.0
           / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year)
        WHEN 'count' THEN MIN(Total)
      END AS SortValue
FROM AllRows
CROSS APPLY (VALUES
        ('val1', val1),
        ('val2', val2),
        ('val3', val3)
) v(ValueColumn, Vals)
GROUP BY
    country,
    gender,
    department,
    year;

Then you just do

SELECT *
FROM dbo.MyAggregation(2020, NULL, NULL, 'count')
ORDER BY
    SortValue;    

db<>fiddle

Note that the sort-value parameter should not be passed in from a variable or lateral join, as that will slow down your query a lot. If it's a constant string then the optimizer can factor it out.

Adding in dynamic grouping substantially complicates this, because now you need to null out the values before you group them up (as shown in the other answer). It will also be really slow on large tables, as you can't use indexes. I would strongly recommend you create separate functions with different grouping/partitioning constructs, alternatively do this in dynamic SQL.

Upvotes: 1

ValNik
ValNik

Reputation: 5623

We are applying value substitution for the column to be grouped by. For example, if parameter @factorGender is null, we group by value gender else group by constant value all - in fact, no grouping by gender.

To simplify things, subquery is your UNPIVOT-PIVOT operation equivalent. We can directly count values dispersion for val1,val2,val3. Expression

iif(val1=1,1.0,0)+iif(val2=1,1.0,0)+iif(val3=1,1.0,0) cv1

or as

case  when val1=1 then 1.0 else 0 end
   +case when val2=1 then 1.0 else 0 end
   +case when val3=1 then 1.0 else 0 end as cv1

counts val1=1,val2=1,val3=1 for all rows.

See example

Update1. After @DaleK comment, I decided that I really needed to answer the question more precisely and suggest an example of a function and an example of its use.


CREATE FUNCTION SatisfiedByGroups (
 -- -- Parameters for row filters
    @Year INT = 2020
   ,@Metric VARCHAR(50) ='dissatisfaction' -- Set @Metric to 'satisfaction','dissatisfaction', 'count' ; 
   ,@Gender VARCHAR(20) = NULL  -- Set to specific gender (e.g., 'Male', 'Female') or NULL to include all
   ,@Department VARCHAR(50) = NULL  -- Set to specific department (e.g., 'HR', 'Engineering') or NULL to include all

-- parameters for grouping
   ,@factorYear int = null -- 0 ; -- Set to any value if do not group by, else null
   ,@factorgender VARCHAR(50) = null  --'all'  -- Set to any value if do not group by, else null
   ,@factorDepartment VARCHAR(50) = null --'all'  -- Set to value if do not group by, else null
   ,@factorCountry VARCHAR(50) = null -- 'all' -- Set to value if do not group by, else null
  )
RETURNS TABLE
AS
RETURN (
select grCountry country,grGender gender,grDepartment department
  ,sum(cv1)/count(*)/3.0 [Very Dissatisfied]
  ,sum(cv2)/count(*)/3.0 [Dissatisfied]
  ,sum(cv3)/count(*)/3.0 [Neutral]
  ,sum(cv4)/count(*)/3.0 [Satisfied]
  ,sum(cv5)/count(*)/3.0 [Very Satisfied]
  ,count(*) total
  ,CASE 
      WHEN @Metric = 'satisfaction' THEN (sum(cv4)+sum(cv5))/3.0/count(*)
      WHEN @Metric = 'dissatisfaction' THEN (sum(cv1)+sum(cv2))/3.0/count(*)
      WHEN @Metric = 'count' THEN count(*)
   END AS SortValue
  ,grCountry,grYear,grDepartment,grGender,@metric as metric
from(
select coalesce(country,'unknown') country,year ,gender,department
  ,iif(val1=1,1.0,0)+iif(val2=1,1.0,0)+iif(val3=1,1.0,0) cv1 
  ,iif(val1=2,1.0,0)+iif(val2=2,1.0,0)+iif(val3=2,1.0,0) cv2
  ,iif(val1=3,1.0,0)+iif(val2=3,1.0,0)+iif(val3=3,1.0,0) cv3
  ,iif(val1=4,1.0,0)+iif(val2=4,1.0,0)+iif(val3=4,1.0,0) cv4
  ,iif(val1=5,1.0,0)+iif(val2=5,1.0,0)+iif(val3=5,1.0,0) cv5
  ,coalesce(@factorYear,year) grYear
  ,coalesce(@FactorDepartment,department)grDepartment
  ,coalesce(@factorGender,gender) grGender
  ,coalesce(@factorCountry,coalesce(country,'UNKNOWN')) grCountry
from df
WHERE (year = @Year or @Year is null)
      -- Apply filters for gender and department if provided
    AND (@Gender IS NULL OR gender = @Gender)
    AND (@Department IS NULL OR department = @Department)
)d 
group by grCountry,grYear,grDepartment,grGender
);

And call this function

select * from SatisfiedByGroups(
        2020 -- @year
       ,'count' -- @metric
       ,NULL    -- @gender
       ,NULL    -- @department
       ,null -- @factorYear
       ,null -- @factorgender
       ,null -- @factorDepartment
       ,null -- @factorCountry
)

Output is

country gender department Very Dissatisfied Dissatisfied Neutral Satisfied Very Satisfied total SortValue grCountry grYear grDepartment grGender metric
Canada Female HR 0.095238 0.190476 0.333333 0.142857 0.238095 7 7.000000 Canada 2020 HR Female count
Canada Male HR 0.111111 0.222222 0.333333 0.111111 0.222222 6 6.000000 Canada 2020 HR Male count
France Female Finance 0.266666 0.200000 0.266666 0.266666 0.000000 5 5.000000 France 2020 Finance Female count
USA Male Sales 0.133333 0.200000 0.000000 0.333333 0.333333 5 5.000000 USA 2020 Sales Male count
USA Female Sales 0.166666 0.000000 0.166666 0.000000 0.666666 4 4.000000 USA 2020 Sales Female count
Brazil Male R&D 0.250000 0.083333 0.250000 0.250000 0.166666 4 4.000000 Brazil 2020 R&D Male count
Brazil Female R&D 0.333333 0.000000 0.333333 0.111111 0.222222 3 3.000000 Brazil 2020 R&D Female count
France Male Finance 0.000000 0.222222 0.333333 0.222222 0.222222 3 3.000000 France 2020 Finance Male count
Italy Female Operations 0.111111 0.000000 0.222222 0.222222 0.444444 3 3.000000 Italy 2020 Operations Female count
Italy Male Operations 0.333333 0.222222 0.000000 0.000000 0.444444 3 3.000000 Italy 2020 Operations Male count

So try run query with another group condition.

select * from SatisfiedByGroups(
        2020 -- @year
       ,'dissatisfaction' -- @metric
       ,NULL    -- @gender
       ,NULL    -- @department
       ,null  -- @factorYear
       ,'all' -- @factorgender
       ,'all' -- @factorDepartment
       ,null -- @factorCountry
)
country gender department Very Dissatisfied Dissatisfied Neutral Satisfied Very Satisfied total SortValue grCountry grYear grDepartment grGender metric
France all all 0.166666 0.208333 0.291666 0.250000 0.083333 8 0.375000 France 2020 all all dissatisfaction
Italy all all 0.222222 0.111111 0.111111 0.111111 0.444444 6 0.333333 Italy 2020 all all dissatisfaction
Brazil all all 0.285714 0.047619 0.285714 0.190476 0.190476 7 0.333333 Brazil 2020 all all dissatisfaction
Canada all all 0.102564 0.205128 0.333333 0.128205 0.230769 13 0.307692 Canada 2020 all all dissatisfaction
USA all all 0.148148 0.111111 0.074074 0.185185 0.481481 9 0.259259 USA 2020 all all dissatisfaction

fiddle

If on your server do not available function IIF(...) convert this expression by using case when ... end

  ,case  when val1=1 then 1.0 else 0 end
   +case when val2=1 then 1.0 else 0 end
   +case when val3=1 then 1.0 else 0 end as cv1
  ,case  when val1=2 then 1.0 else 0 end
   +case when val2=2 then 1.0 else 0 end
   +case when val3=2 then 1.0 else 0 end as cv2
  ,case  when val1=3 then 1.0 else 0 end
   +case when val2=3 then 1.0 else 0 end
   +case when val3=3 then 1.0 else 0 end as cv3
  ,case  when val1=4 then 1.0 else 0 end
   +case when val2=4 then 1.0 else 0 end
   +case when val3=4 then 1.0 else 0 end as cv4
  ,case  when val1=5 then 1.0 else 0 end
   +case when val2=5 then 1.0 else 0 end
   +case when val3=5 then 1.0 else 0 end as cv5
--   ,iif(val1=1,1.0,0)+iif(val2=1,1.0,0)+iif(val3=1,1.0,0) cv1 
--   ,iif(val1=2,1.0,0)+iif(val2=2,1.0,0)+iif(val3=2,1.0,0) cv2
--  ,iif(val1=3,1.0,0)+iif(val2=3,1.0,0)+iif(val3=3,1.0,0) cv3
--  ,iif(val1=4,1.0,0)+iif(val2=4,1.0,0)+iif(val3=4,1.0,0) cv4
--  ,iif(val1=5,1.0,0)+iif(val2=5,1.0,0)+iif(val3=5,1.0,0) cv5

fiddle
and

Upvotes: -1

Related Questions