Zemledelec
Zemledelec

Reputation: 94

GLSL optimization. What is faster?

I'm using OpenGL ES. And have two types of calculation "dir" vector, which code is fastest?

attribute vec2 order;

code1:

  if( abs(sinA) < 0.2 ) {
    if(order.x == 1.0){
        dir = sNormalPrev;   
    } else {
        dir = sNormalNext;   
    }
  } else {
    dir *= order.x / sinA;
  }

code 2:

float k = step(0.2, abs(sinA));
dir = k * dir * order.x / sinA - (k-1.0) * (step(1.0, order.x + 1.0) * sNormalPrev + step(1.0, -order.x + 1.0) * sNormalNext);

Upvotes: 2

Views: 2247

Answers (2)

fsasm
fsasm

Reputation: 540

GPU cores are mostly wide SIMD units and they handle if-statements via masking. Depending on the GPU architecture the shader compiler converts control statements to masking operations pretty much the same way you did with your code.

On PCs the GPU driver has enough processing power to properly optimize shaders, so your optimization makes no difference. According to this blog post from 2010 your optimization would make sense on mobile platforms. I assume that this isn't more the case with todays modern smartphones as they have enough processing power to properly optimize shaders and also the driver matured over time.

You can also try out the tool GLSL optimizer that is also mentioned in the blog post from earlier. Also some GPU vendors provide tools for profiling shaders.

Upvotes: 3

user128511
user128511

Reputation:

Writing a test I don't see much of a difference

var iterationsPerTiming = 40;

var gl = document.createElement("canvas").getContext("webgl");
gl.canvas.width = 1;
gl.canvas.height = 1;
var programInfo1 = twgl.createProgramInfo(gl, ["vs1", "fs"])
var programInfo2 = twgl.createProgramInfo(gl, ["vs2", "fs"]);

var count = new Float32Array(1000000);
for (var i = 0; i < count.length; ++i) {
  count[i] = i % 3 / 2;
}

var arrays = {
  vertexId: {
    data: count, numComponents: 1,
  },
};
var bufferInfo = twgl.createBufferInfoFromArrays(gl, arrays);

iterateTest(programInfo1, 10)  // prime this path
  .then(function() { return iterateTest(programInfo2, 10)})  // prime this path
  .then(function() { return iterateTest(programInfo1, 20)})
  .then(log)
  .then(function() { return iterateTest(programInfo2, 20)})
  .then(log);

function iterateTest(programInfo, times) {
  return new Promise(function(resolve, reject) {
    var timings = [];
    var totalTime = 0;

    function runNextIteration() {
      if (times) {
         --times;
         timings.push(test(programInfo, iterationsPerTiming));
         setTimeout(runNextIteration, 1);
      } else {
        var totalTime = 0;
        var msgs = timings.map(function(timing, ndx) {
          totalTime += timing;
          return "" + ndx + ": " + timing.toFixed(3);
        });
        msgs.push("average timing: " + (totalTime / timings.length).toFixed(3));
        resolve(msgs.join("\n"));
      }
    }
    runNextIteration();
  });
}

function test(programInfo, iterations) {
  gl.useProgram(programInfo.program);
  twgl.setBuffersAndAttributes(gl, programInfo, bufferInfo);
  var startTime = performance.now();
  for (var i = 0; i < iterations; ++i) {
    twgl.drawBufferInfo(gl, gl.TRIANGLES, bufferInfo, count.length);
  }
  
  // this effectively does a gl.finish. It's not useful for real timing
  // beacuse it stalls the pipeline but it should be useful for 
  // comparing times since the stalling would be included in both
  var temp = new Uint8Array(4);
  gl.readPixels(0, 0, 1, 1, gl.RGBA, gl.UNSIGNED_BYTE, temp);
  return performance.now() - startTime;
}

function log(msg) {
  var div = document.createElement("pre");
  div.appendChild(document.createTextNode(msg));
  document.body.appendChild(div);
  return Promise.resolve();
}
html, body { font-family: monospace; }
<script src="https://twgljs.org/dist/twgl.min.js"></script>

  <script id="vs1" type="notjs">
attribute float vertexId;
void main() {
  vec2 order = vec2(vertexId, 0);
  float sinA = vertexId;
  vec3 dir = vec3(0);
  vec3 sNormalPrev = vec3(1);
  vec3 sNormalNext = vec3(-1);
  if( abs(sinA) < 0.2 ) {
    if(order.x == 1.0){
        dir = sNormalPrev;   
    } else {
        dir = sNormalNext;   
    }
  } else {
    dir *= order.x / sinA;
  }
  gl_Position = vec4(dir, 1.0); // have to use dir
  gl_PointSize = 1.0;
}
  </script>
  <script id="vs2" type="notjs">
attribute float vertexId;

void main() {
  vec2 order = vec2(vertexId, 0);
  float sinA = vertexId;
  vec3 dir = vec3(0);
  vec3 sNormalPrev = vec3(1);
  vec3 sNormalNext = vec3(-1);
  
  float k = step(0.2, abs(sinA));
  dir = k * dir * order.x / sinA - (k-1.0) * (step(1.0, order.x + 1.0) * sNormalPrev + step(1.0,   -order.x + 1.0) * sNormalNext);
  
  gl_Position = vec4(dir, 1.0); // have to use dir
  gl_PointSize = 1.0;
}
  </script>
  <script id="fs" type="notjs">
precision mediump float;
void main() {
  gl_FragColor = vec4(1);
}
  </script>

Maybe my test is bad. Tested on an early 2015 macbook pro and an iPhone6s+

Upvotes: 2

Related Questions