Marc
Marc

Reputation: 27

mssql subquery aggregate - sum wrong

So I'm trying to get some data in tsql (MSSQL2014) where I use a subquery to get a sum of some foreign key'd data table.

The structure looks like this:

TABLE [AggregateData](
    [Id] [bigint] IDENTITY(1,1) NOT NULL,
    [Aggregate_UUID] [uniqueidentifier] NOT NULL,
    [DataDate] [date] NOT NULL,
    [SizeAvailable] [bigint] NOT NULL,
    [SizeTotal] [bigint] NOT NULL,
    [SizeUsed] [bigint] NOT NULL,
    [PercentageUsed] [int] NOT NULL
)

TABLE [Aggregate](
    [Id] [bigint] IDENTITY(1,1) NOT NULL,
    [UUID] [uniqueidentifier] NOT NULL,
    [Name] [nvarchar](255) NOT NULL,
    [Cluster_UUID] [uniqueidentifier] NOT NULL,
    [DiskTypeID] [int] NOT NULL
)

TABLE [DiskType](
    [Id] [int] IDENTITY(1,1) NOT NULL,
    [TypeName] [nvarchar](255) NULL
)

TABLE [Volume](
    [Id] [bigint] IDENTITY(1,1) NOT NULL,
    [UUID] [uniqueidentifier] NOT NULL,
    [Name] [nvarchar](255) NOT NULL,
    [Aggregate_UUID] [uniqueidentifier] NOT NULL,
    [ServiceClassID] [int] NULL,
    [ProtocolID] [int] NOT NULL,
    [EnvironmentID] [int] NOT NULL
)

TABLE [VolumeData](
    [Id] [bigint] IDENTITY(1,1) NOT NULL,
    [Volume_UUID] [uniqueidentifier] NOT NULL,
    [DataDate] [date] NOT NULL,
    [SizeAvailable] [bigint] NOT NULL,
    [SizeTotal] [bigint] NOT NULL,
    [SizeUsed] [bigint] NOT NULL,
    [PercentageUsed] [int] NOT NULL
)

Now in the end I need to get the following data:

DataDate, DiskType, AggregateSizes (Avail, Used, Total), Aggregated Volume Sizes (Sum of Avail, Used, Total of Volumes in that Aggregate)

I was thinking of using subqueries but when trying to get the values for a specific Aggregate only (for testing, easier for my to check) I get wrong values in the subquery.

Here is what I tried;

SELECT
  AggregateData.DataDate,
  AggregateData.SizeTotal AS AggregateSizeTotal,
  (SELECT
    SUM(VolumeData.SizeTotal)
  FROM VolumeData
  LEFT JOIN Volume
    ON VolumeData.Volume_UUID = Volume.UUID
  WHERE Aggregate_UUID = Volume.Aggregate_UUID
  AND VolumeData.DataDate = AggregateData.DataDate)
  VolumeSizeTotal

FROM AggregateData

WHERE AggregateData.Aggregate_UUID = 'C58D0098-D1A4-4EE9-A0E9-7DE3EEB6275C'
ORDER BY AggregateData.DataDate

But this seems me to not get the correct value for the subquery sum. My subquery sum is way to high so I assume my where clause is incorrect (or the whole setup ;) ...)

Upvotes: 1

Views: 579

Answers (3)

SqlZim
SqlZim

Reputation: 38073

Going for a query to return the desired end results, I would use something like this:

Now in the end I need to get the following data: DataDate, DiskType, AggregateSizes (Avail, Used, Total), Aggregated Volume Sizes (Sum of Avail, Used, Total of Volumes in that Aggregate)

select 
      AggregateUuid          = a.uuid
    , DiskType               = dt.TypeName
    , DataDate               = ad.DataDate
    , AggregateSizeAvailable = ad.SizeAvailable
    , AggregateSizeUsed      = ad.SizeUsed
    , AggregateSizeTotal     = ad.SizeTotal
    , VolumeSizeAvailable    = sum(vd.SizeAvailable)
    , VolumeSizeUsed         = sum(vd.SizeUsed)
    , VolumeSizeTotal        = sum(vd.SizeTotal)
  from [Aggregate] a
      inner join DiskType      dt  on dt.Id             = a.DiskTypeId
      inner join AggregateData ad  on ad.Aggregate_uuid = a.uuid
      left  join Volume         v  on  v.Aggregate_uuid = a.uuid 
      left  join VolumeData     vd on vd.Volume_uuid    = v.uuid
                                 and vd.DataDate       = ad.DataDate
  where a.uuid = 'C58D0098-D1A4-4ee9-A0E9-7de3eeb6275C'
  group by 
      a.uuid
    , dt.TypeName
    , ad.DataDate
    , ad.SizeAvailable
    , ad.SizeUsed
    , ad.SizeTotal
  order by a.uuid, ad.DataDate;

test setup: http://rextester.com/HZZHLI45077

create table DiskType(
    Id int identity(1,1) not null
  , TypeName nvarchar(255) null
);
set identity_insert DiskType on;
insert into DiskType (Id, TypeName) values 
  (1,'Type1'), (2,'Type2');
set identity_insert DiskType off;

create table [Aggregate](
    Id bigint identity(1,1) not null
  , uuid uniqueidentifier not null
  , Name nvarchar(255) not null
  , Cluster_uuid uniqueidentifier not null
  , DiskTypeid int not null 
);

insert into [Aggregate] (uuid, name, cluster_uuid, disktypeid) 
            select 'C58D0098-D1A4-4EE9-A0E9-7DE3EEB6275C', 'ex', newid(), 1;

create table AggregateData(
    Id bigint identity(1,1) not null
  , Aggregate_uuid uniqueidentifier not null
  , DataDate date not null
  , SizeAvailable bigint not null
  , SizeTotal bigint not null
  , SizeUsed bigint not null
  , PercentageUsed int not null
);

insert into AggregateData 
            select 'C58D0098-D1A4-4EE9-A0E9-7DE3EEB6275C', '20170101', 12,100,87,87
  union all select 'C58D0098-D1A4-4EE9-A0E9-7DE3EEB6275C', '20170102', 9,100,90,90
  union all select 'C58D0098-D1A4-4EE9-A0E9-7DE3EEB6275C', '20170103', 6,100,93,93;

create table Volume(
    Id bigint identity(1,1) not null
  , uuid uniqueidentifier not null
  , Name nvarchar(255) not null
  , Aggregate_uuid uniqueidentifier not null
  , ServiceClassid int null
  , Protocolid int not null
  , Environmentid int not null
);
insert into Volume 
            select '00000000-0000-0000-0000-000000000001', 'v1'
                , 'C58D0098-D1A4-4EE9-A0E9-7DE3EEB6275C', null, 1, 1
  union all select '00000000-0000-0000-0000-000000000002', 'v2'
                , 'C58D0098-D1A4-4EE9-A0E9-7DE3EEB6275C', null, 1, 1
  union all select '00000000-0000-0000-0000-000000000003', 'v3'
                , 'C58D0098-D1A4-4EE9-A0E9-7DE3EEB6275C', null, 1, 1;

create table VolumeData(
    Id bigint identity(1,1) not null
  , Volume_uuid uniqueidentifier not null
  , DataDate date not null
  , SizeAvailable bigint not null
  , SizeTotal bigint not null
  , SizeUsed bigint not null
  , PercentageUsed int not null
);

insert into VolumeData 
            select '00000000-0000-0000-0000-000000000001', '20170101', 4,33,29,88
  union all select '00000000-0000-0000-0000-000000000002', '20170101', 4,33,29,88
  union all select '00000000-0000-0000-0000-000000000003', '20170101', 4,34,29,87
  union all select '00000000-0000-0000-0000-000000000001', '20170102', 3,33,30,91
  union all select '00000000-0000-0000-0000-000000000002', '20170102', 3,33,30,91
  union all select '00000000-0000-0000-0000-000000000003', '20170102', 3,34,30,90
  union all select '00000000-0000-0000-0000-000000000001', '20170103', 2,33,31,94
  union all select '00000000-0000-0000-0000-000000000002', '20170103', 2,33,31,94
  union all select '00000000-0000-0000-0000-000000000003', '20170103', 2,34,31,93

go
/* -------------------------------------------------------- */

select 
      AggregateUuid          = a.uuid
    , DiskType               = dt.TypeName
    , DataDate               = convert(varchar(10),ad.DataDate,121)
    , AggregateSizeAvailable = ad.SizeAvailable
    , AggregateSizeUsed      = ad.SizeUsed
    , AggregateSizeTotal     = ad.SizeTotal
    , VolumeSizeAvailable    = sum(vd.SizeAvailable)
    , VolumeSizeUsed         = sum(vd.SizeUsed)
    , VolumeSizeTotal        = sum(vd.SizeTotal)
  from [Aggregate] a
      inner join DiskType      dt  on dt.Id             = a.DiskTypeId
      inner join AggregateData ad  on ad.Aggregate_uuid = a.uuid
      left  join Volume         v  on  v.Aggregate_uuid = a.uuid 
      left  join VolumeData     vd on vd.Volume_uuid    = v.uuid
                                 and vd.DataDate       = ad.DataDate
  where a.uuid = 'C58D0098-D1A4-4ee9-A0E9-7de3eeb6275C'
  group by 
      a.uuid
    , dt.TypeName
    , ad.DataDate
    , ad.SizeAvailable
    , ad.SizeUsed
    , ad.SizeTotal
  order by a.uuid, ad.DataDate;

results in:

+--------------------------------------+----------+------------+------------------------+-------------------+--------------------+---------------------+----------------+-----------------+
|            AggregateUuid             | DiskType |  DataDate  | AggregateSizeAvailable | AggregateSizeUsed | AggregateSizeTotal | VolumeSizeAvailable | VolumeSizeUsed | VolumeSizeTotal |
+--------------------------------------+----------+------------+------------------------+-------------------+--------------------+---------------------+----------------+-----------------+
| c58d0098-d1a4-4ee9-a0e9-7de3eeb6275c | Type1    | 2017-01-01 |                     12 |                87 |                100 |                  12 |             87 |             100 |
| c58d0098-d1a4-4ee9-a0e9-7de3eeb6275c | Type1    | 2017-01-02 |                      9 |                90 |                100 |                   9 |             90 |             100 |
| c58d0098-d1a4-4ee9-a0e9-7de3eeb6275c | Type1    | 2017-01-03 |                      6 |                93 |                100 |                   6 |             93 |             100 |
+--------------------------------------+----------+------------+------------------------+-------------------+--------------------+---------------------+----------------+-----------------+

Upvotes: 0

Gordon Linoff
Gordon Linoff

Reputation: 1271171

You need to qualify all column names. I would recommend using table abbreviations. The problem is Aggregate_UUID = v.Aggregate_UUID. The first column is coming from v so this is (essentially) a no-op.

Presumably, you want this correlated with the outer query:

SELECT ad.DataDate, ad.SizeTotal AS AggregateSizeTotal,
       (SELECT SUM(vd.SizeTotal)
        FROM VolumeData vd LEFT JOIN
             Volume v
             ON vd.Volume_UUID = v.UUID
        WHERE ad.Aggregate_UUID = v.Aggregate_UUID AND
              ad.DataDate = vd.DataDate
       ) VolumeSizeTotal
FROM AggregateData ad
WHERE ad.Aggregate_UUID = 'C58D0098-D1A4-4EE9-A0E9-7DE3EEB6275C'
ORDER BY ad.DataDate

Upvotes: 1

Gurwinder Singh
Gurwinder Singh

Reputation: 39537

You can do this using JOIN instead of correlated subquery (O(n^2) performance) -

SELECT
  t1.DataDate,
  t1.SizeTotal AS AggregateSizeTotal,
  t2.total VolumeSizeTotal
FROM AggregateData t1 left join (SELECT
    DataDate, SUM(VolumeData.SizeTotal) total
  FROM VolumeData
  LEFT JOIN Volume
    ON VolumeData.Volume_UUID = Volume.UUID
  WHERE Aggregate_UUID = Volume.Aggregate_UUID
  group by DataDate) t2 on t1.datadate = t2.dataDate
WHERE t1.Aggregate_UUID = 'C58D0098-D1A4-4EE9-A0E9-7DE3EEB6275C';

Upvotes: 0

Related Questions