Reputation: 231
im working on performance-differences between Android JAVA- and Android NDK-applications. I performed a Matrix4D-Vector4D Transformation on more than 90000 vertices as an example for 3D Graphics.
It seemes, that the JAVA Version is nearly 100 times slower than the C-Version. Did i something wrong? Does anyone have similar experiences?
my Java-Code for transformation:
long t1 = System.nanoTime();
for ( int i = 0; i < vCount; i++)
{
Vector4 vOut = new Vector4();
Vector4 v = vertices[i];
vOut.v_[0] = v.v_[0] * matrix[0].v_[0];
vOut.v_[1] = v.v_[0] * matrix[0].v_[1];
vOut.v_[2] = v.v_[0] * matrix[0].v_[2];
vOut.v_[3] = v.v_[0] * matrix[0].v_[3];
vOut.v_[0] += v.v_[1] * matrix[1].v_[0];
vOut.v_[1] += v.v_[1] * matrix[1].v_[1];
vOut.v_[2] += v.v_[1] * matrix[1].v_[2];
vOut.v_[3] += v.v_[1] * matrix[1].v_[3];
vOut.v_[0] += v.v_[2] * matrix[2].v_[0];
vOut.v_[1] += v.v_[2] * matrix[2].v_[1];
vOut.v_[2] += v.v_[2] * matrix[2].v_[2];
vOut.v_[3] += v.v_[2] * matrix[2].v_[3];
vOut.v_[0] += v.v_[3] * matrix[3].v_[0];
vOut.v_[1] += v.v_[3] * matrix[3].v_[1];
vOut.v_[2] += v.v_[3] * matrix[3].v_[2];
vOut.v_[3] += v.v_[3] * matrix[3].v_[3];
vertices[i] = vOut;
}
long t2 = System.nanoTime();
long diff = t2 - t1;
double ms = (double)(diff / 1000000.0f);
Log.w("GL2JNIView", String.format("ms %.2f ", ms));
Performance (Transform > 90 000 Vertices | Android 4.0.4 SGS II): (Median-value of 200 runs)
JAVA-Version: 2 FPS
C-Version: 190 FPS
Upvotes: 0
Views: 1128
Reputation: 2542
You also should change your loop. In addition to the answer by @toopok4k3 you should try these things:
I am assuming the values are doubles in the version below.
int i = 0;
try
{
Vector4 vOut = new Vector4();
final double m0v0 = matrix[0].v_[0];
final double m0v1 = matrix[0].v_[1];
final double m0v2 = matrix[0].v_[2];
final double m0v3 = matrix[0].v_[3];
final double m1v0 = matrix[1].v_[0];
final double m1v1 = matrix[1].v_[1];
final double m1v2 = matrix[1].v_[2];
final double m1v3 = matrix[1].v_[3];
final double m2v0 = matrix[2].v_[0];
final double m2v1 = matrix[2].v_[1];
final double m2v2 = matrix[2].v_[2];
final double m2v3 = matrix[2].v_[3];
final double m3v0 = matrix[3].v_[0];
final double m3v1 = matrix[3].v_[1];
final double m3v2 = matrix[3].v_[2];
final double m3v3 = matrix[3].v_[3];
while (true)
{
Vector4 v = vertices[i];
i++;
double vertexVal = v.v_[0];
vOut.v_[0] = vertexVal * m0v0;
vOut.v_[1] = vertexVal * m0v1;
vOut.v_[2] = vertexVal * m0v2;
vOut.v_[3] = vertexVal * m0v3;
vertexVal = v.v_[1];
vOut.v_[0] += vertexVal * m1v0;
vOut.v_[1] += vertexVal * m1v1;
vOut.v_[2] += vertexVal * m1v2;
vOut.v_[3] += vertexVal * m1v3;
vertexVal = v.v_[2];
vOut.v_[0] += vertexVal * m2v0;
vOut.v_[1] += vertexVal * m2v1;
vOut.v_[2] += vertexVal * m2v2;
vOut.v_[3] += vertexVal * m2v3;
vertexVal = v.v_[3];
vOut.v_[0] += vertexVal * m3v0;
vOut.v_[1] += vertexVal * m3v1;
vOut.v_[2] += vertexVal * m3v2;
vOut.v_[3] += vertexVal * m3v3;
vertices[i] = vOut;
}
}
catch (ArrayIndexOutOfBoundsException aioobe)
{
// loop is done
}
Upvotes: 0
Reputation: 66
You create a new Vector4 in each Iteration. From my own experience using new inside loops can cause unexpected performance problems in Android.
Upvotes: 5
Reputation: 1
AFAIK, Android Java implementation is thru a virtual machine called Dalvik which has a different instruction set than the JVM and does not use any just-in-time compilation techniques to dynamically translate some bytecodes to machine code, but just interpret them. So Dalvik is obviously slower on CPU bound tasks rthan C.
This might change in very recent Android systems.
Upvotes: 0