Reputation: 5630
I have a method which does a series of calculations which take quite a bit of time to complete. The objects that this method does computations on are generated at runtime and can range from a few to a few thousand. Obviously it would be better if I could run these computations across several threads concurrently, but when I try that, my program uses more CPU yet takes longer than running them one-by-one. Any ideas why?
let itemsPerThread = (dataArray.count / 4) + 1
for var i = 0; i < dataArray.count; i += itemsPerThread
{
let name = "ComputationQueue\(i)".bridgeToObjectiveC().cString()
let compQueue = dispatch_queue_create(name, DISPATCH_QUEUE_CONCURRENT)
dispatch_async(compQueue,
{
let itemCount = i + itemsPerThread < dataArray.count ? itemsPerThread : dataArray.count - i - 1
let subArray = dataArray.bridgeToObjectiveC().subarrayWithRange(NSMakeRange(i, dataCount)) as MyItem[]
self.reallyLongComputation(subArray, increment: increment, outputIndex: self.runningThreads-1)
})
NSThread.sleepForTimeInterval(1)
}
Alternatively:
If I run this same thing, but a single dispatch_async
call and on the whole dataArray
rather than the subarrays, it completes much faster while using less CPU.
Upvotes: 1
Views: 448
Reputation: 17534
what you (it is my guess) want to do should looks like
//
// main.swift
// test
//
// Created by user3441734 on 12/11/15.
// Copyright © 2015 user3441734. All rights reserved.
//
import Foundation
let computationGroup = dispatch_group_create()
var arr: Array<Int> = []
for i in 0..<48 {
arr.append(i)
}
print("arr \(arr)")
func job(inout arr: Array<Int>, workers: Int) {
let count = arr.count
let chunk = count / workers
guard chunk * workers == count else {
print("array.cout divided by workers must by integer !!!")
return
}
let compQueue = dispatch_queue_create("test", DISPATCH_QUEUE_CONCURRENT)
let syncQueue = dispatch_queue_create("aupdate", DISPATCH_QUEUE_SERIAL)
for var i = 0; i < count; i += chunk
{
let j = i
var tarr = arr[j..<j+chunk]
dispatch_group_enter(computationGroup)
dispatch_async(compQueue) { () -> Void in
for k in j..<j+chunk {
// long time computation
var z = 100000000
repeat {
z--
} while z > 0
// update with chunk
tarr[k] = j
}
dispatch_async(syncQueue, { () -> Void in
for k in j..<j+chunk {
arr[k] = tarr[k]
}
dispatch_group_leave(computationGroup)
})
}
}
dispatch_group_wait(computationGroup, DISPATCH_TIME_FOREVER)
}
var stamp: Double {
return NSDate.timeIntervalSinceReferenceDate()
}
print("running on dual core ...\n")
var start = stamp
job(&arr, workers: 1)
print("job done by 1 worker in \(stamp-start) seconds")
print("arr \(arr)\n")
start = stamp
job(&arr, workers: 2)
print("job done by 2 workers in \(stamp-start) seconds")
print("arr \(arr)\n")
start = stamp
job(&arr, workers: 4)
print("job done by 4 workers in \(stamp-start) seconds")
print("arr \(arr)\n")
start = stamp
job(&arr, workers: 6)
print("job done by 6 workers in \(stamp-start) seconds")
print("arr \(arr)\n")
with results
arr [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]
running on dual core ...
job done by 1 worker in 5.16312199831009 seconds
arr [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
job done by 2 workers in 2.49235796928406 seconds
arr [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24]
job done by 4 workers in 3.18479603528976 seconds
arr [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36]
job done by 6 workers in 2.51704299449921 seconds
arr [0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24, 24, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 40, 40, 40]
Program ended with exit code: 0
... you can use next pattern for distributing job between any number of workers (the number of workers which give you the best performance depends on worker definition and sources which are available in your environment). generally for any kind of long time calculation ( transformation ) you can expect some performance gain. in two core environment up to 50%. if your worker use highly optimized functions using more cores 'by default', the performance gain can be close to nothing :-)
// generic implementation
// 1) job distribute data between workers as fair, as possible
// 2) workers do their task in parallel
// 3) the order in resulting array reflect the input array
// 4) there is no requiremets of worker block, to return
// the same type as result of yor 'calculation'
func job<T,U>(arr: [T], workers: Int, worker: T->U)->[U] {
guard workers > 0 else { return [U]() }
var res: Dictionary<Int,[U]> = [:]
let workersQueue = dispatch_queue_create("workers", DISPATCH_QUEUE_CONCURRENT)
let syncQueue = dispatch_queue_create("sync", DISPATCH_QUEUE_SERIAL)
let group = dispatch_group_create()
var j = min(workers, arr.count)
var i = (0, 0, arr.count)
var chunk: ArraySlice<T> = []
repeat {
let a = (i.1, i.1 + i.2 / j, i.2 - i.2 / j)
i = a
chunk = arr[i.0..<i.1]
dispatch_group_async(group, workersQueue) { [i, chunk] in
let arrs = chunk.map{ worker($0) }
dispatch_sync(syncQueue) {[i,arrs] in
res[i.0] = arrs
}
}
j--
} while j != 0
dispatch_group_wait(group, DISPATCH_TIME_FOREVER)
let idx = res.keys.sort()
var results = [U]()
idx.forEach { (idx) -> () in
results.appendContentsOf(res[idx]!)
}
return results
}
Upvotes: 1
Reputation: 32622
You need to
Upvotes: 0