Is it better to block on an event in CPU-bound multithreaded method than making it async?

Question

I have a method that will spawn lots of CPU-bound workers with Task.Run(). Each worker may in turn spawn more workers, but I'm guaranteed that eventually, all workers will stop executing. My first thought was writing my method like this:

public Result OrchestrateWorkers(WorkItem[] workitems)
{
    this.countdown = new CountdownEvent(0);
    this.results = new ConcurrentQueue();
    foreach (var workItem in workitems)
    {
        SpawnWorker(workItem);
    }
    this.countdown.Wait(); // until all spawned workers have completed.
    return ComputeTotalResult(this.results);
}

The public SpawnWorker method is used to start a worker, and to keep track of when they complete by enqueueing the worker's result and decrementing the countdown.

public void SpawnWorker(WorkItem workItem)
{
    this.countdown.AddCount();
    Task.Run(() => {
        // Worker is passed an instance of this class
        // so it can call SpawnWorker if it needs to.
        var worker = new Worker(workItem, this);
        var result = worker.DoWork();
        this.results.Enqueue(result);
        countdown.Signal();
    });
}

Each worker can call SpawnWorker as much as they like, but they're guaranteed to terminate at some point.

In this design, the thread that calls OrchestrateWorkers will block untill all the workers have completed. My thinking is that it's a shame that there's a blocked thread; it would be nice if it could be doing work as well.

Would it be better to rearchitect the solution to something like this?

public Task OrchestrateWorkersAsync(WorkItem[] workitems)
{
    if (this.tcs is not null) throw InvalidOperationException("Already running!");
    this.tcs = new TaskCompletionSource();
    this.countdown = 0;     // just a normal integer.
    this.results = new ConcurrentQueue();
    foreach (var workItem in workitems)
    {
        SpawnWorker(workItem);
    }
    return tcs.Task;
}

public void SpawnWorker(WorkItem workItem)
{
    Interlocked.Increment(ref this.countdown);
    Task.Run(() => {
        var worker = new Worker(workItem, this);
        var result = worker.DoWork();
        this.results.Enqueue(result);
        if (Interlocked.Decrement(ref countdown) == 0)
        {
            this.tcs.SetResult(this.ComputeTotalResult(this.results));
        }
    });
}

EDIT: I've added a more full-fleshed sample below. It should be compileable and runnable. I'm seeing a ~10% performance improvement on my 8-core system, but I want to make sure this is the "canonical" way to orchestrate a swarm of spawning tasks.

using System.Collections.Concurrent;
using System.Diagnostics;
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using System.Linq;

public class Program
{
    const int ITERATIONS = 2500000;
    const int WORKERS = 200;

    public static async Task Main()
    {
        var o = new Orchestrator();
        var oo = new OrchestratorAsync();
        var array = Enumerable.Range(0, WORKERS);

        var result = Time(() => o.OrchestrateWorkers(array, DoWork));
        Console.Error.WriteLine("Sync spawned {0} workers", result.Count());
        var resultAsync = await TimeAsync(() => oo.OrchestrateWorkersAsync(array, DoWorkAsync));
        Console.Error.WriteLine("Async spawned {0} workers", resultAsync.Count());
    }

    static async Task TimeAsync(Func> work)
    {
        var sw = new Stopwatch();
        sw.Start();
        var result = await work();
        sw.Stop();
        Console.WriteLine("Total async time: {0}", sw.ElapsedMilliseconds);
        return result;
    }

    static T Time(Func work)
    {
        var sw = new Stopwatch();
        sw.Start();
        var result = work();
        sw.Stop();
        Console.WriteLine("Total time: {0}", sw.ElapsedMilliseconds);
        return result;
    }


    static int DoWork(int x, Orchestrator arg2)
    {
        var rnd = new Random();
        int n = 0;
        for (int i = 0; i < ITERATIONS; ++i)
        {
            n += rnd.Next();
        }
        if (x >= 0)
        {
            arg2.SpawnWorker(-1, DoWork);
            arg2.SpawnWorker(-1, DoWork);
        }
        return n;
    }

    static int DoWorkAsync(int x, OrchestratorAsync arg2)
    {
        var rnd = new Random();
        int n = 0;
        for (int i = 0; i < ITERATIONS; ++i)
        {
            n += rnd.Next();
        }
        if (x >= 0)
        {
            arg2.SpawnWorker(-1, DoWorkAsync);
            arg2.SpawnWorker(-1, DoWorkAsync);
        }
        return n;
    }

    public class Orchestrator
    {
        private ConcurrentQueue results;
        private CountdownEvent countdownEvent;

        public Orchestrator()
        {
            this.results = new();
            this.countdownEvent = new(1);
        }


        public IEnumerable OrchestrateWorkers(
            IEnumerable workItems,
            Func, TResult> worker)
        {
            foreach (var workItem in workItems)
            {
                SpawnWorker(workItem, worker);
            }
            countdownEvent.Signal();
            countdownEvent.Wait();
            return results;
        }

        public void SpawnWorker(
            TWorkItem workItem,
            Func, TResult> worker)
        {
            this.countdownEvent.AddCount(1);
            Task.Run(() =>
            {
                var result = worker(workItem, this);
                this.results.Enqueue(result);
                countdownEvent.Signal();
            });
        }
    }

    public class OrchestratorAsync
    {
        private ConcurrentQueue results;
        private volatile int countdown;
        private TaskCompletionSource> tcs;

        public OrchestratorAsync()
        {
            this.results = new();
            this.countdown = 0;
            this.tcs = new TaskCompletionSource>();
        }

        public Task> OrchestrateWorkersAsync(
            IEnumerable workItems,
            Func, TResult> worker)
        {
            this.countdown = 0;     // just a normal integer.
            foreach (var workItem in workItems)
            {
                SpawnWorker(workItem, worker);
            }
            return tcs.Task;
        }

        public void SpawnWorker(TWorkItem workItem,
            Func, TResult> worker)
        {
            Interlocked.Increment(ref this.countdown);
            Task.Run(() =>
            {
                var result = worker(workItem, this);
                this.results.Enqueue(result);
                if (Interlocked.Decrement(ref countdown) == 0)
                {
                    this.tcs.SetResult(this.results);
                }
            });
        }
    }
}

Is it better to block on an event in CPU-bound multithreaded method than making it async?

Answers (1)

Related Questions