Hugues Stefanski
Hugues Stefanski

Reputation: 1182

Potential performance issue on SQL query

I feel like I am encountering a performance problem on the folowing SQL query:

SELECT g.Guid AS GroupGuid, Max(TimeStamp) AS LastProcessed
FROM Information 
INNER JOIN  GroupStepResults gsr ON Information.InformationId = gsr.InformationId
INNER JOIN TaskHistories th on th.TaskHistoryId=gsr.TaskHistoryId
INNER JOIN Groups g on g.GroupId=gsr.GroupId
WHERE th.Type=0
GROUP BY g.Guid;

This leads to the following execution plan: Execution plan

My queried tables are structured as follows (database contains other tables, that are not queried here):

DB Scheme

Here is the table creations scripts:

SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[Groups](
    [GroupId] [int] IDENTITY(1,1) NOT NULL,
    [Guid] [uniqueidentifier] NOT NULL,
 CONSTRAINT [PK_dbo.Groups] PRIMARY KEY CLUSTERED 
(
    [GroupId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
/****** Object:  Table [dbo].[GroupStepResults]    Script Date: 17/09/2021 11:58:29 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[GroupStepResults](
    [InformationId] [int] NOT NULL,
    [TaskHistoryId] [int] NOT NULL,
    [GroupId] [int] NOT NULL,
    [ElapsedSeconds] [float] NOT NULL,
    [IsLastResult] [bit] NOT NULL,
    [ErrorId] [int] NULL,
    [StepId] [int] NOT NULL,
 CONSTRAINT [PK_dbo.GroupStepResults] PRIMARY KEY CLUSTERED 
(
    [InformationId] ASC,
    [GroupId] ASC,
    [TaskHistoryId] ASC,
    [StepId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
/****** Object:  Table [dbo].[Information]    Script Date: 17/09/2021 11:58:29 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[Information](
    [InformationId] [int] IDENTITY(1,1) NOT NULL,
    [TimeStamp] [datetime] NOT NULL,
    [ModelVersion] [nvarchar](max) NULL,
    [WideVersion] [nvarchar](max) NULL,
 CONSTRAINT [PK_dbo.Information] PRIMARY KEY CLUSTERED 
(
    [InformationId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO
/****** Object:  Table [dbo].[TaskHistories]    Script Date: 17/09/2021 11:58:29 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[TaskHistories](
    [TaskHistoryId] [int] IDENTITY(1,1) NOT NULL,
    [TaskGuid] [uniqueidentifier] NOT NULL,
    [Creator] [nvarchar](max) NULL,
    [CreationDate] [datetime] NOT NULL,
    [Type] [int] NOT NULL,
 CONSTRAINT [PK_dbo.TaskHistories] PRIMARY KEY CLUSTERED 
(
    [TaskHistoryId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO
/****** Object:  Index [IX_Guid]    Script Date: 17/09/2021 11:58:29 ******/
CREATE UNIQUE NONCLUSTERED INDEX [IX_Guid] ON [dbo].[Groups]
(
    [Guid] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object:  Index [IX_ErrorId]    Script Date: 17/09/2021 11:58:29 ******/
CREATE NONCLUSTERED INDEX [IX_ErrorId] ON [dbo].[GroupStepResults]
(
    [ErrorId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object:  Index [IX_GroupId]    Script Date: 17/09/2021 11:58:29 ******/
CREATE NONCLUSTERED INDEX [IX_GroupId] ON [dbo].[GroupStepResults]
(
    [GroupId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object:  Index [IX_InformationId]    Script Date: 17/09/2021 11:58:29 ******/
CREATE NONCLUSTERED INDEX [IX_InformationId] ON [dbo].[GroupStepResults]
(
    [InformationId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object:  Index [IX_StepId]    Script Date: 17/09/2021 11:58:29 ******/
CREATE NONCLUSTERED INDEX [IX_StepId] ON [dbo].[GroupStepResults]
(
    [StepId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object:  Index [IX_TaskHistoryId]    Script Date: 17/09/2021 11:58:29 ******/
CREATE NONCLUSTERED INDEX [IX_TaskHistoryId] ON [dbo].[GroupStepResults]
(
    [TaskHistoryId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object:  Index [NonClusteredIndex-20210916-164346]    Script Date: 17/09/2021 11:58:29 ******/
CREATE NONCLUSTERED INDEX [NonClusteredIndex-20210916-164346] ON [dbo].[GroupStepResults]
(
    [InformationId] ASC,
    [GroupId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object:  Index [IX_Information_TimeStamp]    Script Date: 17/09/2021 11:58:29 ******/
CREATE UNIQUE NONCLUSTERED INDEX [IX_Information_TimeStamp] ON [dbo].[Information]
(
    [TimeStamp] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
ALTER TABLE [dbo].[GroupStepResults] ADD  DEFAULT ((0)) FOR [ElapsedSeconds]
GO
ALTER TABLE [dbo].[GroupStepResults] ADD  DEFAULT ((0)) FOR [IsLastResult]
GO
ALTER TABLE [dbo].[GroupStepResults]  WITH CHECK ADD  CONSTRAINT [FK_dbo.GroupStepResults_dbo.Errors_ErrorId] FOREIGN KEY([ErrorId])
REFERENCES [dbo].[Errors] ([ErrorId])
GO
ALTER TABLE [dbo].[GroupStepResults] CHECK CONSTRAINT [FK_dbo.GroupStepResults_dbo.Errors_ErrorId]
GO
ALTER TABLE [dbo].[GroupStepResults]  WITH CHECK ADD  CONSTRAINT [FK_dbo.GroupStepResults_dbo.Groups_GroupId] FOREIGN KEY([GroupId])
REFERENCES [dbo].[Groups] ([GroupId])
ON DELETE CASCADE
GO
ALTER TABLE [dbo].[GroupStepResults] CHECK CONSTRAINT [FK_dbo.GroupStepResults_dbo.Groups_GroupId]
GO
ALTER TABLE [dbo].[GroupStepResults]  WITH CHECK ADD  CONSTRAINT [FK_dbo.GroupStepResults_dbo.Information_InformationId] FOREIGN KEY([InformationId])
REFERENCES [dbo].[Information] ([InformationId])
ON DELETE CASCADE
GO
ALTER TABLE [dbo].[GroupStepResults] CHECK CONSTRAINT [FK_dbo.GroupStepResults_dbo.Information_InformationId]
GO
ALTER TABLE [dbo].[GroupStepResults]  WITH CHECK ADD  CONSTRAINT [FK_dbo.GroupStepResults_dbo.Steps_StepId] FOREIGN KEY([StepId])
REFERENCES [dbo].[Steps] ([StepId])
ON DELETE CASCADE
GO
ALTER TABLE [dbo].[GroupStepResults] CHECK CONSTRAINT [FK_dbo.GroupStepResults_dbo.Steps_StepId]
GO
ALTER TABLE [dbo].[GroupStepResults]  WITH CHECK ADD  CONSTRAINT [FK_dbo.GroupStepResults_dbo.TaskHistories_TaskHistoryId] FOREIGN KEY([TaskHistoryId])
REFERENCES [dbo].[TaskHistories] ([TaskHistoryId])
ON DELETE CASCADE
GO
ALTER TABLE [dbo].[GroupStepResults] CHECK CONSTRAINT [FK_dbo.GroupStepResults_dbo.TaskHistories_TaskHistoryId]
GO

GroupStepResults contains about 7M rows, TaskHistories about 200, Informations about 70k and Groups about 1k. The query takes about 5s, which seems quite a lot. Are my expectations too high?

Upvotes: 0

Views: 216

Answers (4)

Esat Erkec
Esat Erkec

Reputation: 1724

After analyzing your query plan, the following issues may cause a performance problem.

  • In the first Index Seek operator only one thread performs all read operations because the task histories table returns only one row enter image description here

You can check the SQL Server threads - one does all the work, the other MAXDOP - 1 (5..30) do nothing question for more information about this issue. Maybe you can try to force join type MERGE or HASH join.

  • Another point I have noticed is that the IX_HistoryId takes lots of modification and the sampling percent is very low.

enter image description here

I think this issue causes a difference between the estimated and actual number of rows.

enter image description here

Upvotes: 0

Charlieface
Charlieface

Reputation: 71144

Your indexing needs improvement. I would expect the foloowing indexes for this query

Information (InformationId) INLUDE (TimeStamp) -- clustered index already covers this
Groups (Guid, GroupId)
GroupStepResults (GroupId, TaskHistoryId) INCLUDE (InformationId)
-- alteratively
GroupStepResults (GroupId, InformationId) INCLUDE (TaskHistoryId)
TaskHistory ([Type], TaskHistoryId)

You can obviously add any other INCLUDE columns, but keep the key columns the same


Note that single-column indexes are generally useless, and should be removed. You ideally want an index that starts with the WHERE columns as keys, continues with the JOIN and GROUP BY columns, and has the SELECT columns in the INCLUDE.

For further help see Use The Index, Luke

Upvotes: 1

SQLpro
SQLpro

Reputation: 5103

Try to create those four indexes:

CREATE INDEX X1 ON TaskHistories (type, TaskHistoryId);
CREATE INDEX X2 ON Information   (InformationId, TimeStamp);
CREATE INDEX X3 ON GroupStepResults (InformationId, TaskHistoryId) INCLUDE (GroupId)
CREATE INDEX X4 ON Groups (GroupId, Guid);

Upvotes: 1

Rahul Biswas
Rahul Biswas

Reputation: 3457

Please execute this below two query. For first one use subquery for retrieving small data sets form large volume of data with type = 0.

SELECT g.Guid GroupGuid
     , Max(if.TimeStamp) LastProcessed
FROM (SELECT gsr.InformationId
           , gsr.GroupId
      FROM TaskHistories th
      INNER JOIN GroupStepResults gsr
              ON th.TaskHistoryId = gsr.TaskHistoryId 
             AND th.Type = 0) t
INNER JOIN Information if
        ON if.InformationId = t.InformationId
INNER JOIN Groups g
        ON g.GroupId = t.GroupId
GROUP BY g.Guid

Just table ordering change for 2nd one

SELECT g.Guid AS GroupGuid
     , Max(if.TimeStamp) AS LastProcessed
FROM TaskHistories th
INNER JOIN GroupStepResults gsr
        ON th.TaskHistoryId = gsr.TaskHistoryId 
       AND th.Type = 0
INNER JOIN Information if
        ON if.InformationId = gsr.InformationId
INNER JOIN Groups g
        ON g.GroupId = gsr.GroupId
GROUP BY g.Guid

Please create an index for type and also for gsr.TaskHistoryId because gsr table have composite primary key.

Upvotes: 0

Related Questions