Reputation: 1
I'm trying some very simple algorithm using metal GPU acceleration to calculate some values in an array. The shader throws an error under some conditions I will explain. Error: Execution of the command buffer was aborted due to an error during execution. Ignored (for causing prior/excessive GPU errors) (IOAF code 4)
The shader only throws this error when adding a value to the existing value at an index of the array. Example:
This will not cause an error:
kernel void shader (device int *wPointsIntensity [[buffer(0)]],
const device uint *wPointsXCoord [[buffer(1)]],
const device uint *wPointsYCoord [[buffer(2)]],
device float *pixelSignalIntensity [[buffer(3)]],
device float *pixelDistance [[buffer(4)]],
const device uint& noOfPoints [[ buffer(5) ]],
const device uint& width [[ buffer(6) ]],
const device uint& height [[ buffer(7) ]],
uint id [[ thread_position_in_grid ]]) {
//this does not throw error
for (uint wpIndex = 0; wpIndex < noOfPoints; wpIndex++) {
for (uint heightIndex = 0; heightIndex < height; heightIndex++) {
for (uint widthIndex = 0; widthIndex < width; widthIndex++) {
uint pixelIndex = heightIndex * width + widthIndex;
pixelDistance[pixelIndex] = float(pixelIndex);
pixelSignalIntensity[pixelIndex] = float(pixelIndex);
}}}}
While if you change
pixelDistance[pixelIndex] = float(pixelIndex); with
pixelDistance[pixelIndex] += float(pixelIndex);
It will throw an error.
Here is the swift code:
var wPointsValues = [Int32](repeating:0, count: wPoints.count)
var wPointsXLocations = [Int32](repeating:0, count: wPoints.count)
var wPointsYLocations = [Int32](repeating:0, count: wPoints.count)
for i in 0..<wPoints.count {
wPointsValues[i] = Int32(wPoints[i].signalIntensity)
wPointsXLocations[i] = Int32(wPoints[i].location.x)
wPointsYLocations[i] = Int32(wPoints[i].location.y)
}
var numberOfWPoints:Int32 = Int32(wPoints.count)
var int32Width = Int32(width)
var int32Height = Int32(height)
//output arrays
let numberOfResults = wPoints.count * Int(width) * Int(height)
var wPointsSignalIntensity = [Float32](repeating:0.0, count: numberOfResults)
var wPointsDistance = [Float32](repeating:0.0, count: numberOfResults)
//local variables
var signalDensity:[Float32] = [Float32](repeating:0.0, count: numberOfResults)
var signalDistance:[Float32] = [Float32](repeating:0.0, count: numberOfResults)
//create input buffers
let inWPointSignalValues = device.makeBuffer(bytes: wPointsValues, length: (MemoryLayout<Int32>.stride * wPoints.count), options: [])
let inWPointXCoordBuffer = device.makeBuffer(bytes: wPointsXLocations, length: (MemoryLayout<Int32>.stride * wPoints.count), options: [])
let inWPointYCoordBuffer = device.makeBuffer(bytes: wPointsYLocations, length: (MemoryLayout<Int32>.stride * wPoints.count), options: [])
//create putput buffers
let outPixelSignalIntensityBuffer = device.makeBuffer(bytes: wPointsSignalIntensity, length: (MemoryLayout<Float32>.stride * numberOfResults), options: [])
let outPixelDistanceBuffer = device.makeBuffer(bytes: wPointsDistance, length: (MemoryLayout<Float32>.stride * numberOfResults), options: [])
let commandBuffer = (mtlCommmandQueue?.makeCommandBuffer())!
let computeCommandEncoder = (commandBuffer.makeComputeCommandEncoder())!
computeCommandEncoder.setComputePipelineState(mtlComputePipelineFilter!)
//set input buffers
computeCommandEncoder.setBuffer(inWPointSignalValues, offset: 0, index: 0)
computeCommandEncoder.setBuffer(inWPointXCoordBuffer, offset: 0, index: 1)
computeCommandEncoder.setBuffer(inWPointYCoordBuffer, offset: 0, index: 2)
//set output buffers
computeCommandEncoder.setBuffer(outPixelSignalIntensityBuffer, offset: 0, index: 3)
computeCommandEncoder.setBuffer(outPixelDistanceBuffer, offset: 0, index: 4)
//set constants
computeCommandEncoder.setBytes(&numberOfWPoints, length: MemoryLayout<Int32>.stride, index: 5)
computeCommandEncoder.setBytes(&int32Width, length: MemoryLayout<Int32>.stride, index: 6)
computeCommandEncoder.setBytes(&int32Height, length: MemoryLayout<Int32>.stride, index: 7)
let threadsPerGroup = MTLSize(width:2,height:2,depth:2)
let numThreadgroups = MTLSize(width:2, height:2, depth:2)
computeCommandEncoder.dispatchThreadgroups(numThreadgroups, threadsPerThreadgroup: threadsPerGroup)
let endBufferAllocation = mach_absolute_time()
print("time for creating and setting buffert: time: \(Double(endBufferAllocation - start) / Double(NSEC_PER_SEC))")
computeCommandEncoder.endEncoding()
commandBuffer.commit()
commandBuffer.waitUntilCompleted()
let allComplete = mach_absolute_time()
self.signalDistance = (outPixelDistanceBuffer?.contents())!
self.signalDensity = (outPixelSignalIntensityBuffer?.contents())!
Upvotes: 0
Views: 1986
Reputation: 31
I had this issue for ages and program crashed intermittently. It turned out that I was accessing memory in the kernel that had not been allocated by the buffer. In the kernel I was doing a for loop 0..<5 (i.e. output 5 values for each thread) but had not divided the num_threads by 5.
When it didn't crash it was giving the correct answer and no errors except " Execution of the command buffer was aborted due to an error during execution. Caused GPU Hang Error (IOAF code 3)" were ever thrown.
Upvotes: 1