Reputation: 719
I have a process where I check for SQL Server agent job execution status making sure all the jobs in question have their last run for the given date completed reporting either success or failure. Based on the outcome of the last run I either execute downstream jobs on success or abort on failure. I have this process which waits for a minute and then loops through all the specified jobs until all are complete.
The problem I run into is when I investigate the job activity monitor I see all the jobs complete either reporting success or failure. But, my query does not complete. Its stuck in this infinite loop.
Is there any better approach than what I'm already doing to address this.
SQL:
BEGIN
DECLARE @noofjob INT;
DECLARE @failedjob VARCHAR(MAX);
DECLARE @jobnames TABLE (name VARCHAR(255)); -- To store job names
DECLARE @jobname VARCHAR(MAX) = 'child_job_1,child_job_2,child_job_3,child_job_4,child_job_5';
-- Copy SQL Agent job history into a temp table
SELECT jh.[instance_id],
jh.[job_id],
j.[name],
j.[description],
jh.[step_id],
jh.[step_name],
jh.[sql_message_id],
jh.[sql_severity],
jh.[message],
jh.[run_status],
CASE WHEN jh.[run_status] = 1 THEN 'Succeeded'
WHEN jh.[run_status] = 0 THEN 'Failed'
WHEN jh.[run_status] = 2 THEN 'Canceled'
ELSE 'Unknown'
END AS [run_status_description],
jh.[run_date],
jh.[run_time],
msdb.dbo.agent_datetime(jh.run_date, jh.run_time) AS 'run_date_time',
jh.[run_duration],
jh.[operator_id_emailed],
jh.[operator_id_netsent],
jh.[operator_id_paged],
jh.[retries_attempted],
jh.[server]
INTO #jobhistory
FROM [msdb].[dbo].[sysjobhistory] jh
JOIN [msdb].[dbo].[sysjobs] j
ON j.[job_id] = jh.[job_id]
-- Insert job names into table variable
INSERT INTO @jobnames(name)
SELECT value FROM STRING_SPLIT(@jobname, ',');
-- Count number of distinct jobs
SELECT @noofjob = COUNT(*) FROM @jobnames;
WHILE 1 = 1
BEGIN
-- Drop temp table if it exists
DROP TABLE IF EXISTS #lastrun;
-- Get latest run information
SELECT name, MAX(run_date_time) AS latest_run
INTO #lastrun
FROM #jobhistory
WHERE run_date_time > CAST(GETDATE() AS DATE)
AND name IN (SELECT name FROM @jobnames)
GROUP BY name;
-- Check if all jobs have run in the last 6 hours
IF @noofjob != (
SELECT COUNT(DISTINCT name)
FROM #jobhistory
WHERE run_date_time > DATEADD(HOUR, -6, GETDATE())
AND name IN (SELECT name FROM @jobnames)
)
BEGIN
-- Wait for 1 minute before checking again
WAITFOR DELAY '00:01:00';
END
ELSE
BEGIN
-- Find failed jobs
SET @failedjob = (
SELECT STRING_AGG(name, ',')
FROM (
SELECT DISTINCT s.name
FROM #jobhistory S
JOIN #lastrun lr
ON S.name = lr.name
AND S.run_date_time = lr.latest_run
WHERE S.run_status != 1 -- '1' denotes success
) AS FailedJobs
);
-- Check if there are any failed jobs
IF LEN(@failedjob) > 0
BEGIN
RAISERROR ('Stored procedure execution stopped due to failed job: %s', 16, 1, @failedjob);
THROW 50010, 'Error', 1;
END
ELSE
BEGIN
-- Wait for 30 seconds before the next check
WAITFOR DELAY '00:00:30';
END
-- Exit loop once all conditions are met
BREAK;
END
END
END
Upvotes: 2
Views: 172