Reputation: 7572
I have an app that places a distortion on a bitmap. i'm trying to optimize the image procesing class further. Basically my app passes a bitmap to a method called barrel() in the filters class. Barrel calls sampleImage() which in turn calls getArgb(). it has been suggested to me that getArgb() can be threaded and can run concurrently whilst manipulating the pixel data. I don't see how this can be done as i can only get the pixel data as fast as the loop in the barrel method iterates. getARGB() calls Bitmap.getPixel, which means there are about 20000 method calls in the bitmap passed in to the barrel method. is there a way i can thread this class further? thanks in advance Matt.
.
class Filters{
private float xscale;
private float yscale;
private float xshift;
private float yshift;
private int [] s;
private int [] scalar;
private int [] s1;
private int [] s2;
private int [] s3;
private int [] s4;
private String TAG = "Filters";
public Filters(){
s = new int[4];
scalar = new int[4];
s1 = new int[4];
s2 = new int[4];
s3 = new int[4];
s4 = new int[4];
}
public Bitmap barrel (Bitmap input, float k){
float centerX=input.getWidth()/2; //center of distortion
float centerY=input.getHeight()/2;
int width = input.getWidth(); //image bounds
int height = input.getHeight();
xshift = calc_shift(0,centerX-1,centerX,k);
float newcenterX = width-centerX;
float xshift_2 = calc_shift(0,newcenterX-1,newcenterX,k);
yshift = calc_shift(0,centerY-1,centerY,k);
float newcenterY = height-centerY;
float yshift_2 = calc_shift(0,newcenterY-1,newcenterY,k);
xscale = (width-xshift-xshift_2)/width;
yscale = (height-yshift-yshift_2)/height;
int origPixel = 0;
int []arr = new int[input.getWidth()*input.getHeight()];
int color = 0;
int p = 0;
int i = 0;
long startLoop = System.currentTimeMillis();
for(int j=0;j<input.getHeight();j++){
for( i=0;i<input.getWidth();i++,p++){
origPixel= input.getPixel(i,j);
float x = getRadialX((float)j,(float)i,centerX,centerY,k);
float y = getRadialY((float)j,(float)i,centerX,centerY,k);
sampleImage(input,x,y);
color = ((s[1]&0x0ff)<<16)|((s[2]&0x0ff)<<8)|(s[3]&0x0ff);
if(((i-centerX)*(i-centerX) + (j-centerY)*(j-centerY)) <= 5500){
arr[p]=color;
}else{
arr[p]=origPixel;
}
}
}
Bitmap dst2 = Bitmap.createBitmap(arr,width,height,input.getConfig());
return dst2;
}// end of barrel()
void sampleImage(Bitmap arr, float idx0, float idx1)
{
if(idx0<0 || idx1<0 || idx0>(arr.getHeight()-1) || idx1>(arr.getWidth()-1)){
s[0]=0;
s[1]=0;
s[2]=0;
s[3]=0;
return;
}
float idx0_fl=(float) Math.floor(idx0);
float idx0_cl=(float) Math.ceil(idx0);
float idx1_fl=(float) Math.floor(idx1);
float idx1_cl=(float) Math.ceil(idx1);
s1 = getARGB(arr,(int)idx0_fl,(int)idx1_fl);
s2 = getARGB(arr,(int)idx0_fl,(int)idx1_cl);
s3 = getARGB(arr,(int)idx0_cl,(int)idx1_cl);
s4 = getARGB(arr,(int)idx0_cl,(int)idx1_fl);
float x = idx0 - idx0_fl;
float y = idx1 - idx1_fl;
// s[0]= (int) (s1[0]*(1-x)*(1-y) + s2[0]*(1-x)*y + s3[0]*x*y + s4[0]*x*(1-y));
s[1]= (int) (s1[1]*(1-x)*(1-y) + s2[1]*(1-x)*y + s3[1]*x*y + s4[1]*x*(1-y));
s[2]= (int) (s1[2]*(1-x)*(1-y) + s2[2]*(1-x)*y + s3[2]*x*y + s4[2]*x*(1-y));
s[3]= (int) (s1[3]*(1-x)*(1-y) + s2[3]*(1-x)*y + s3[3]*x*y + s4[3]*x*(1-y));
}
int [] getARGB(Bitmap buf,int x, int y){
int rgb = buf.getPixel(y, x); // Returns by default ARGB.
// scalar[0] = (rgb >>> 24) & 0xFF;
scalar[1] = (rgb >>> 16) & 0xFF;
scalar[2] = (rgb >>> 8) & 0xFF;
scalar[3] = (rgb >>> 0) & 0xFF;
return scalar;
}
float getRadialX(float x,float y,float cx,float cy,float k){
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = x+((x-cx)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}
float getRadialY(float x,float y,float cx,float cy,float k){
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = y+((y-cy)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}
float thresh = 1;
float calc_shift(float x1,float x2,float cx,float k){
float x3 = (float)(x1+(x2-x1)*0.5);
float res1 = x1+((x1-cx)*k*((x1-cx)*(x1-cx)));
float res3 = x3+((x3-cx)*k*((x3-cx)*(x3-cx)));
if(res1>-thresh && res1 < thresh)
return x1;
if(res3<0){
return calc_shift(x3,x2,cx,k);
}
else{
return calc_shift(x1,x3,cx,k);
}
}
}// end of filters class
.[update]
Am i along the correct lines here. i seem to be getting a NPE at line 31 which is where task1 returns a partialResult. I don't think the partialProcessing.call method is returning a partialResult correctly. I just get 2 empty black bitmaps where the distortion should be, so the array may not be populated. have you any ideas? thanks.
public class MultiProcessorFilter {
public Bitmap barrel (Bitmap input, float k){
int []arr = new int[input.getWidth()*input.getHeight()];
// replace the j, i for loops:
int jMax = input.getHeight();
int jMid = jMax / 2;
int iMax = input.getWidth();
int iMid = iMax / 2;
ExecutorService threadPool = Executors.newFixedThreadPool(2);
FutureTask<PartialResult> task1 = (FutureTask<PartialResult>) threadPool.submit(new PartialProcessing(0, jMid - 1, input, k, iMid, iMax));
FutureTask<PartialResult> task2 = (FutureTask<PartialResult>) threadPool.submit(new PartialProcessing(jMid, jMax - 1,input, k, iMid, iMax));
try{
PartialResult result1 = task1.get(); // blocks until the thread returns the result
result1.fill(arr);
PartialResult result2 = task2.get(); // blocks until the thread returns the result
result2.fill(arr);
}catch(Exception e){
e.printStackTrace();
}
Bitmap dst2 = Bitmap.createBitmap(arr,input.getWidth(),input.getHeight(),input.getConfig());
return dst2;
}
public class PartialResult {
int startP;
int endP;
int[] storedValues;
public PartialResult(int startp, int endp){
this.startP = startp;
this.endP = endp;
}
public void addValue(int p, int result) {
storedValues[p] = result;
}
public void fill(int[] arr) {
for (int p = startP; p < endP; p++)
arr[p] = storedValues[p];
}
}
public class PartialProcessing implements Callable<PartialResult> {
int startJ;
int endJ;
// ... other members needed for the computation such as
private int[] scalar;
private float xscale;
private float yscale;
private float xshift;
private float yshift;
private float thresh = 1;
private int [] s1;
private int [] s2;
private int [] s3;
private int [] s4;
private int [] s;
private Bitmap input;
private float k;
private int startI;
private int endI;
public PartialProcessing(int startj, int endj, Bitmap input, float k, int starti, int endi) {
this.startJ = startj;
this.endJ = endj;
this.input = input;
this.k = k;
this.startI = starti;
this.endI = endi;
}
int [] getARGB(Bitmap buf,int x, int y){
int rgb = buf.getPixel(y, x); // Returns by default ARGB.
// int [] scalar = new int[4];
// scalar[0] = (rgb >>> 24) & 0xFF;
scalar[1] = (rgb >>> 16) & 0xFF;
scalar[2] = (rgb >>> 8) & 0xFF;
scalar[3] = (rgb >>> 0) & 0xFF;
return scalar;
}
//... add other methods needed for the computation that where in class Filters
float getRadialX(float x,float y,float cx,float cy,float k){
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = x+((x-cx)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}
float getRadialY(float x,float y,float cx,float cy,float k){
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = y+((y-cy)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}
float calc_shift(float x1,float x2,float cx,float k){
float x3 = (float)(x1+(x2-x1)*0.5);
float res1 = x1+((x1-cx)*k*((x1-cx)*(x1-cx)));
float res3 = x3+((x3-cx)*k*((x3-cx)*(x3-cx)));
if(res1>-thresh && res1 < thresh)
return x1;
if(res3<0){
return calc_shift(x3,x2,cx,k);
}
else{
return calc_shift(x1,x3,cx,k);
}
}
void sampleImage(Bitmap arr, float idx0, float idx1)
{
// s = new int [4];
if(idx0<0 || idx1<0 || idx0>(arr.getHeight()-1) || idx1>(arr.getWidth()-1)){
s[0]=0;
s[1]=0;
s[2]=0;
s[3]=0;
return;
}
float idx0_fl=(float) Math.floor(idx0);
float idx0_cl=(float) Math.ceil(idx0);
float idx1_fl=(float) Math.floor(idx1);
float idx1_cl=(float) Math.ceil(idx1);
s1 = getARGB(arr,(int)idx0_fl,(int)idx1_fl);
s2 = getARGB(arr,(int)idx0_fl,(int)idx1_cl);
s3 = getARGB(arr,(int)idx0_cl,(int)idx1_cl);
s4 = getARGB(arr,(int)idx0_cl,(int)idx1_fl);
float x = idx0 - idx0_fl;
float y = idx1 - idx1_fl;
// s[0]= (int) (s1[0]*(1-x)*(1-y) + s2[0]*(1-x)*y + s3[0]*x*y + s4[0]*x*(1-y));
s[1]= (int) (s1[1]*(1-x)*(1-y) + s2[1]*(1-x)*y + s3[1]*x*y + s4[1]*x*(1-y));
s[2]= (int) (s1[2]*(1-x)*(1-y) + s2[2]*(1-x)*y + s3[2]*x*y + s4[2]*x*(1-y));
s[3]= (int) (s1[3]*(1-x)*(1-y) + s2[3]*(1-x)*y + s3[3]*x*y + s4[3]*x*(1-y));
}
// this will be called on some new thread
@Override public PartialResult call() {
PartialResult partialResult = new PartialResult(startJ, endJ);
float centerX=input.getWidth()/2; //center of distortion
float centerY=input.getHeight()/2;
int width = input.getWidth(); //image bounds
int height = input.getHeight();
xshift = calc_shift(0,centerX-1,centerX,k);
float newcenterX = width-centerX;
float xshift_2 = calc_shift(0,newcenterX-1,newcenterX,k);
yshift = calc_shift(0,centerY-1,centerY,k);
float newcenterY = height-centerY;
float yshift_2 = calc_shift(0,newcenterY-1,newcenterY,k);
xscale = (width-xshift-xshift_2)/width;
yscale = (height-yshift-yshift_2)/height;
int p = startI; // not 0! at the start since we don't start at j = 0
int origPixel = 0;
int color = 0;
for (int j = startJ; j < endJ; j++){
for (int i = startI; i < endI; i++, p++){
//... copy the rest of the code
origPixel= input.getPixel(i,j);
float x = getRadialX((float)j,(float)i,centerX,centerY,k);
float y = getRadialY((float)j,(float)i,centerX,centerY,k);
sampleImage(input,x,y);
color = ((s[1]&0x0ff)<<16)|((s[2]&0x0ff)<<8)|(s[3]&0x0ff);
if(((i-centerX)*(i-centerX) + (j-centerY)*(j-centerY)) <= 5500){
//arr[p]=color;
partialResult.addValue(p, color);
}else{
//arr[p]=origPixel;
partialResult.addValue(p, origPixel);
}
}
// partialResult.addValue(p, color);
}
return partialResult;
}
}
}//end of MultiProcesorFilter
.
07-25 16:35:20.552: WARN/System.err(10253): java.util.concurrent.ExecutionException: java.lang.NullPointerException
07-25 16:35:20.552: WARN/System.err(10253): at java.util.concurrent.FutureTask$Sync.innerGet(FutureTask.java:223)
07-25 16:35:20.552: WARN/System.err(10253): at java.util.concurrent.FutureTask.get(FutureTask.java:82)
07-25 16:35:20.557: WARN/System.err(10253): at com.tecmark.MultiProcessorFilter.barrel(MultiProcessorFilter.java:31)
07-25 16:35:20.557: WARN/System.err(10253): at com.tecmark.TouchView$2.run(TouchView.java:147)
07-25 16:35:20.557: WARN/System.err(10253): at java.lang.Thread.run(Thread.java:1096)
07-25 16:35:20.557: WARN/System.err(10253): Caused by: java.lang.NullPointerException
07-25 16:35:20.562: WARN/System.err(10253): at com.tecmark.MultiProcessorFilter$PartialProcessing.sampleImage(MultiProcessorFilter.java:160)
07-25 16:35:20.562: WARN/System.err(10253): at com.tecmark.MultiProcessorFilter$PartialProcessing.call(MultiProcessorFilter.java:235)
07-25 16:35:20.562: WARN/System.err(10253): at com.tecmark.MultiProcessorFilter$PartialProcessing.call(MultiProcessorFilter.java:1)
07-25 16:35:20.562: WARN/System.err(10253): at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:305)
07-25 16:35:20.567: WARN/System.err(10253): at java.util.concurrent.FutureTask.run(FutureTask.java:137)
07-25 16:35:20.567: WARN/System.err(10253): at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1068)
07-25 16:35:20.567: WARN/System.err(10253): at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:561)
07-25 16:35:20.567: WARN/System.err(10253): ... 1 more
07-25 16:35:20.572: WARN/System.err(10253): java.util.concurrent.ExecutionException: java.lang.NullPointerException
07-25 16:35:20.577: WARN/System.err(10253): at java.util.concurrent.FutureTask$Sync.innerGet(FutureTask.java:223)
07-25 16:35:20.577: WARN/System.err(10253): at java.util.concurrent.FutureTask.get(FutureTask.java:82)
07-25 16:35:20.577: WARN/System.err(10253): at com.tecmark.MultiProcessorFilter.barrel(MultiProcessorFilter.java:31)
07-25 16:35:20.577: WARN/System.err(10253): at com.tecmark.TouchView$2.run(TouchView.java:148)
07-25 16:35:20.577: WARN/System.err(10253): at java.lang.Thread.run(Thread.java:1096)
07-25 16:35:20.582: WARN/System.err(10253): Caused by: java.lang.NullPointerException
07-25 16:35:20.582: WARN/System.err(10253): at com.tecmark.MultiProcessorFilter$PartialProcessing.sampleImage(MultiProcessorFilter.java:160)
07-25 16:35:20.582: WARN/System.err(10253): at com.tecmark.MultiProcessorFilter$PartialProcessing.call(MultiProcessorFilter.java:235)
07-25 16:35:20.582: WARN/System.err(10253): at com.tecmark.MultiProcessorFilter$PartialProcessing.call(MultiProcessorFilter.java:1)
07-25 16:35:20.587: WARN/System.err(10253): at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:305)
07-25 16:35:20.587: WARN/System.err(10253): at java.util.concurrent.FutureTask.run(FutureTask.java:137)
07-25 16:35:20.587: WARN/System.err(10253): at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1068)
07-25 16:35:20.587: WARN/System.err(10253): at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:561)
07-25 16:35:20.587: WARN/System.err(10253): ... 1 more
Upvotes: 0
Views: 752
Reputation: 5326
You have to look for for
-loops when you want to parallelize your code:
for(int j=0;j < input.getHeight();j++){
for( i=0;i < input.getWidth();i++,p++){
You should divide some ranges of j
between various threads.
I haven't read carefully what you are doing, but this will not
work if the computation done on some j
is needed for the computation
of higher j
's.
EDIT:
some partial code:
public Bitmap barrel (Bitmap input, float k){
...
// replace the j, i for loops:
int jMax = input.getHeight();
ExecutorService threadPool = Executors.newFixedThreadPool(2);
int jMid = jMax / 2;
FutureTask<PartialResult> task1 = treadPool.submit(new PartialProcessing(0, jMid - 1, ...));
FutureTask<PartialResult> task2 = treadPool.submit(new PartialProcessing(jMid, jMax - 1(?) ...));
PartialResult result1 = task1.get(); // blocks until the thread returns the result
results1.fill(arr);
PartialResult result2 = task2.get(); // blocks until the thread returns the result
results2.fill(arr);
}
where
public class PartialResult {
int startP;
int endP;
int[] storedValues;
... constructor
public addValue(int p, int result) {
storedValues[p +/- some offset] = result;
}
public void fill(int[] arr) {
for (int p = startP; p < endP; p++)
arr[p] = storedValues[p +/- some offset];
}
}
}
and
public class PartialProcessing implements Callable<PartialResult> {
int startJ;
int endJ;
... other members needed for the computation such as
int[] scalar;
...
public PartialProcessing(int startJ, int endJ, ... others needed) {
...
}
int [] getARGB(Bitmap buf,int x, int y){
...
}
... add other methods needed for the computation that where in class Filters
// this will be called on some new thread
@Override public PartialResult call() {
PartialResult partialResult = new PartialResult(startJ, ...);
p = ...; // not 0! at the start since we don't start at j = 0
for (int j = startJ; j < (???<=) endJ; j++)
for (int i... p++)
... copy the rest of the code
partialResult.addResult(p, result);
}
}
By the way, you cannot fill directly arr[p]
in the call
method because of synchronization issues: the copy of the array arr
on one core does not necessarily have the same values as the copy on another core.
EDIT: since most processors have 2 cores, it would make more sense to use a multiple of 2 for the number of threads instead of 3. I would go for 4 since many upcoming ARM processors have 4 cores.
Upvotes: 1