Panos
Panos

Reputation: 627

Parallel code with Task.Factory slower than linear

I am playing with parallel programming and F#. I created a function that integrates a 1-variable function, and then I tried to make it parallel in two different ways:

module Quadrature = 

    let Integrate (f: double -> double) (x1: double) (x2: double) (samples: int64) =
        let step = (x2 - x1) / (double samples)
        let samplePoints = seq {x1 + step .. step .. x2 - step}
        let sum = samplePoints |> Seq.map (fun x -> f x) |> Seq.sum
        let sum = sum + ((f x1) + (f x2)) / 2.0
        step * sum

    let IntegrateWithStep (f: double -> double) (x1: double) (x2: double) (step: double) =
        let samples = (x2 - x1) / step |> round |> int64
        Integrate f x1 x2 samples

    let IntegrateWithTasks (f: double -> double) (x1: double) (x2: double) (samples: int64) (tasks: int) =
        let step = (x2 - x1) / (double samples)
        let samplesPerTask = ceil <| (double samples) / (double tasks)
        let interval = step * samplesPerTask
        let intervals = 
            seq { 
                for i in 0 .. (tasks - 1) do
                    let lowerBound = x1 + (double i) * interval
                    let upperBound = min (lowerBound + interval) x2  
                    yield (lowerBound, upperBound)
                }
        let tasks = intervals 
                    |> Seq.map (fun (a, b) -> Task.Factory.StartNew(fun () -> IntegrateWithStep f a b step))
        tasks |> Seq.map (fun t -> t.Result) |> Seq.sum

    let IntegrateParallel (f: double -> double) (x1: double) (x2: double) (samples: int64) (tasks: int) =
        let step = (x2 - x1) / (double samples)
        let samplesPerTask = ceil <| (double samples) / (double tasks)
        let interval = step * samplesPerTask
        let intervals = 
               [| for i in 0 .. (tasks - 1) do
                    let lowerBound = x1 + (double i) * interval
                    let upperBound = min (lowerBound + interval) x2  
                    yield (lowerBound, upperBound) |]
        intervals |> Array.Parallel.map (fun (a, b) -> IntegrateWithStep f a b step)
                  |> Array.sum

I run this code with the following input on a machine with 4 cores:

let f = (fun x -> - 1.0 + 2.0 * x - 3.0 * x * x + 4.0 * x * x * x ) 
let x1, x2 = 0.0, 1.0
let samples = 100000000L
let tasks = 100

However, the method with the task factory is always slightly slower than the linear one, while the one with the Parallel.map gives me a good speed up.

I have tried varying the number of tasks from thousands down to the number of cores but the implementation with the Task.Factory is always slower than the linear one. What am I doing wrong?

Upvotes: 1

Views: 201

Answers (1)

yamen
yamen

Reputation: 15618

Remember that sequences are lazily loaded. Here is the first time the task gets started:

tasks |> Seq.map (fun t -> t.Result) |> Seq.sum

And you are starting them sequentially and blocking each one for their result (when calling t.Result. You'll want to save the list of tasks as an array, and then call .WaitAll before collecting the results to ensure they all get started in parallel.

Try:

let tasks = intervals 
            |> Seq.map (fun (a, b) -> Task.Factory.StartNew(fun () -> IntegrateWithStep f a b step))
            |> Array.ofSeq

tasks.WaitAll()

Upvotes: 1

Related Questions