Reputation: 51
So I have a task to calculate Euler's number using multiple threads, using this formula: sum( ((3k)^2 + 1) / ((3k)!) ), for k = 0...infinity.
import java.math.BigDecimal;
import java.math.BigInteger;
import java.io.FileWriter;
import java.io.IOException;
import java.math.RoundingMode;
class ECalculator {
private BigDecimal sum;
private BigDecimal[] series;
private int length;
public ECalculator(int threadCount) {
this.length = threadCount;
this.sum = new BigDecimal(0);
this.series = new BigDecimal[threadCount];
for (int i = 0; i < this.length; i++) {
this.series[i] = BigDecimal.ZERO;
}
}
public synchronized void addToSum(BigDecimal element) {
this.sum = this.sum.add(element);
}
public void addToSeries(int id, BigDecimal element) {
if (id - 1 < length) {
this.series[id - 1] = this.series[id - 1].add(element);
}
}
public synchronized BigDecimal getSum() {
return this.sum;
}
public BigDecimal getSeriesSum() {
BigDecimal result = BigDecimal.ZERO;
for (int i = 0; i < this.length; i++) {
result = result.add(this.series[i]);
}
return result;
}
}
class ERunnable implements Runnable {
private final int id;
private final int threadCount;
private final int threadRemainder;
private final int elements;
private final boolean quietFlag;
private ECalculator eCalc;
public ERunnable(int threadCount, int threadRemainder, int id, int elements, boolean quietFlag, ECalculator eCalc) {
this.id = id;
this.threadCount = threadCount;
this.threadRemainder = threadRemainder;
this.elements = elements;
this.quietFlag = quietFlag;
this.eCalc = eCalc;
}
@Override
public void run() {
if (!quietFlag) {
System.out.println(String.format("Thread-%d started.", this.id));
}
long start = System.currentTimeMillis();
int k = this.threadRemainder;
int iteration = 0;
BigInteger currentFactorial = BigInteger.valueOf(intFactorial(3 * k));
while (iteration < this.elements) {
if (iteration != 0) {
for (int i = 3 * (k - threadCount) + 1; i <= 3 * k; i++) {
currentFactorial = currentFactorial.multiply(BigInteger.valueOf(i));
}
}
this.eCalc.addToSeries(this.id, new BigDecimal(Math.pow(3 * k, 2) + 1).divide(new BigDecimal(currentFactorial), 100, RoundingMode.HALF_UP));
iteration += 1;
k += this.threadCount;
}
long stop = System.currentTimeMillis();
if (!quietFlag) {
System.out.println(String.format("Thread-%d stopped.", this.id));
System.out.println(String.format("Thread %d execution time: %d milliseconds", this.id, stop - start));
}
}
public int intFactorial(int n) {
int result = 1;
for (int i = 1; i <= n; i++) {
result *= i;
}
return result;
}
}
public class TaskRunner {
public static final String DEFAULT_FILE_NAME = "result.txt";
public static void main(String[] args) throws InterruptedException {
int threadCount = 2;
int precision = 10000;
int elementsPerTask = precision / threadCount;
int remainingElements = precision % threadCount;
boolean quietFlag = false;
calculate(threadCount, elementsPerTask, remainingElements, quietFlag, DEFAULT_FILE_NAME);
}
public static void writeResult(String filename, String result) {
try {
FileWriter writer = new FileWriter(filename);
writer.write(result);
writer.close();
} catch (IOException e) {
System.out.println("An error occurred.");
e.printStackTrace();
}
}
public static void calculate(int threadCount, int elementsPerTask, int remainingElements, boolean quietFlag, String outputFile) throws InterruptedException {
long start = System.currentTimeMillis();
Thread[] threads = new Thread[threadCount];
ECalculator eCalc = new ECalculator(threadCount);
for (int i = 0; i < threadCount; i++) {
if (i == 0) {
threads[i] = new Thread(new ERunnable(threadCount, i, i + 1, elementsPerTask + remainingElements, quietFlag, eCalc));
} else {
threads[i] = new Thread(new ERunnable(threadCount, i, i + 1, elementsPerTask, quietFlag, eCalc));
}
threads[i].start();
}
for (int i = 0; i < threadCount; i++) {
threads[i].join();
}
String result = eCalc.getSeriesSum().toString();
if (!quietFlag) {
System.out.println("E = " + result);
}
writeResult(outputFile, result);
long stop = System.currentTimeMillis();
System.out.println("Calculated in: " + (stop - start) + " milliseconds" );
}
}
I stripped out the prints, etc. in the code that have no effect. My problem is that the more threads I use the slower it gets. Currently the fastest run I have is for 1 thread. I am sure the factorial calculation is causing some issues. I tried using a thread pool but still got the same times.
EDIT I updated the code block to be in 1 file only and runnable without external libs.
EDIT 2 I found out that the factorial code messes up with the time. If I let the threads ramp up to some high precision without calculating factorials the time goes down with increasing threads. Yet I cannot implement the factorial calculating in any way while keeping the time decreasing.
EDIT 3 Adjusting code to address answers.
private static BigDecimal partialCalculator(int start, int threadCount, int id) {
BigDecimal nBD = BigDecimal.valueOf(start);
BigDecimal result = nBD.multiply(nBD).multiply(BigDecimal.valueOf(9)).add(BigDecimal.valueOf(1));
for (int i = start; i > 0; i -= threadCount) {
BigDecimal iBD = BigDecimal.valueOf(i);
BigDecimal iBD1 = BigDecimal.valueOf(i - 1);
BigDecimal iBD3 = BigDecimal.valueOf(3).multiply(iBD);
BigDecimal prevNumerator = iBD1.multiply(iBD1).multiply(BigDecimal.valueOf(9)).add(BigDecimal.valueOf(1));
// 3 * i * (3 * i - 1) * (3 * i - 2);
BigDecimal divisor = iBD3.multiply(iBD3.subtract(BigDecimal.valueOf(1))).multiply(iBD3.subtract(BigDecimal.valueOf(2)));
result = result.divide(divisor, 10000, RoundingMode.HALF_EVEN)
.add(prevNumerator);
}
return result;
}
public static void main(String[] args) {
int threadCount = 3;
int precision = 6;
ExecutorService executorService = Executors.newFixedThreadPool(threadCount);
ArrayList<Future<BigDecimal> > futures = new ArrayList<Future<BigDecimal> >();
for (int i = 0; i < threadCount; i++) {
int start = precision - i;
System.out.println(start);
final int id = i + 1;
futures.add(executorService.submit(() -> partialCalculator(start, threadCount, id)));
}
BigDecimal result = BigDecimal.ZERO;
try {
for (int i = 0; i < threadCount; i++) {
result = result.add(futures.get(i).get());
}
} catch (Exception e) {
e.printStackTrace();
}
executorService.shutdown();
System.out.println(result);
}
Seems to be working properly for 1 thread but messes up the calculation for multiple.
Upvotes: 0
Views: 753
Reputation: 111309
After a review of the updated code, I've made the following observations:
First of all, the program runs for a fraction of a second. That means that this is a micro benchmark. Several key features in Java make micro benchmarks difficult to implement reliably. See How do I write a correct micro-benchmark in Java? For example, if the program doesn't run enough repetitions, the "just in time" compiler doesn't have time to kick in to compile it to native code, and you end up benchmarking the intepreter. It seems possible that in your case the JIT compiler takes longer to kick in when there are multiple threads,
As an example, to make your program do more work, I changed the BigDecimal
precision from 100 to 10,000 and added a loop around the main method. The execution times were measured as follows:
1 thread:
Calculated in: 2803 milliseconds
Calculated in: 1116 milliseconds
Calculated in: 1040 milliseconds
Calculated in: 1066 milliseconds
Calculated in: 1036 milliseconds
2 threads:
Calculated in: 2354 milliseconds
Calculated in: 856 milliseconds
Calculated in: 624 milliseconds
Calculated in: 659 milliseconds
Calculated in: 664 milliseconds
4 threads:
Calculated in: 1961 milliseconds
Calculated in: 797 milliseconds
Calculated in: 623 milliseconds
Calculated in: 536 milliseconds
Calculated in: 497 milliseconds
The second observation is that there is a significant part of the workload that does not benefit from multiple threads: every thread is computing every factorial. This means the speed-up cannot be linear - as described by Amdahl's law.
So how can we get the result without computing factorials? One way is with Horner's method. As an example, consider the simpler series sum(1/k!)
which also conveges to e
but a little slower than yours.
Let's say you want to compute sum(1/k!)
up to k = 100. With Horner's method you start from the end and extract common factors:
sum(1/k!, k=0..n) = 1/100! + 1/99! + 1/98! + ... + 1/1! + 1/0!
= ((... (((1/100 + 1)/99 + 1)/98 + ...)/2 + 1)/1 + 1
See how you start with 1, divide by 100 and add 1, divide by 99 and add 1, divide by 98 and add 1, and so on? That makes a very simple program:
private static BigDecimal serialHornerMethod() {
BigDecimal accumulator = BigDecimal.ONE;
for (int k = 10000; k > 0; k--) {
BigDecimal divisor = new BigDecimal(k);
accumulator = accumulator.divide(divisor, 10000, RoundingMode.HALF_EVEN)
.add(BigDecimal.ONE);
}
return accumulator;
}
Ok that's a serial method, how do you make it use parallel? Here's an example for two threads: First split the series into even and odd terms:
1/100! + 1/99! + 1/98! + 1/97! + ... + 1/1! + 1/0! =
(1/100! + 1/98! + ... + 1/0!) + (1/99! + 1/97! + ... + 1/1!)
Then apply Horner's method to both the even and odd terms:
1/100! + 1/98! + 1/96! + ... + 1/2! + 1/0! =
((((1/(100*99) + 1)/(98*97) + 1)/(96*95) + ...)/(2*1) + 1
and:
1/99! + 1/97! + 1/95! + ... + 1/3! + 1/1! =
((((1/(99*98) + 1)/(97*96) + 1)/(95*94) + ...)/(3*2) + 1
This is just as easy to implement as the serial method, and you get pretty close to linear speedup going from 1 to 2 threads:
private static BigDecimal partialHornerMethod(int start) {
BigDecimal accumulator = BigDecimal.ONE;
for (int i = start; i > 0; i -= 2) {
int f = i * (i + 1);
BigDecimal divisor = new BigDecimal(f);
accumulator = accumulator.divide(divisor, 10000, RoundingMode.HALF_EVEN)
.add(BigDecimal.ONE);
}
return accumulator;
}
// Usage:
ExecutorService executorService = Executors.newFixedThreadPool(2);
Future<BigDecimal> submit = executorService.submit(() -> partialHornerMethod(10000));
Future<BigDecimal> submit1 = executorService.submit(() -> partialHornerMethod(9999));
BigDecimal result = submit1.get().add(submit.get());
Upvotes: 1
Reputation: 111309
There is a lot of contention between the threads: they all compete to get a lock on the ECalculator
object after every little bit of computation, because of this method:
public synchronized void addToSum(BigDecimal element) {
this.sum = this.sum.add(element);
}
In general having threads compete for frequent access to a common resource leads to bad performance, because you're asking for the operating system to intervene and tell the program which thread can continue. I haven't tested your code to confirm that this is the issue because it's not self-contained.
To fix this, have the threads accumulate their results separately, and merge results after the threads have finished. That is, create a sum
variable in ERunnable
, and then change the methods:
// ERunnable.run:
this.sum = this.sum.add(new BigDecimal(Math.pow(3 * k, 2) + 1).divide(new BigDecimal(factorial(3 * k)), 100, RoundingMode.HALF_UP));
// TaskRunner.calculate:
for (int i = 0; i < threadCount; i++) {
threads[i].join();
eCalc.addToSum(/* recover the sum computed by thread */);
}
By the way would be easier if you used the higher level java.util.concurrent API instead of creating thread objects yourself. You could wrap the computation in a Callable
which can return a result.
Q2 How would one go about calculating this big factorials?
Usually you don't. Instead, you reformulate the problem so that it does not involve the direct computation of factorials. One technique is Horner's method.
Q3 The precision parameter that is passed is the amount of elements in the sum that are used. Can I set the BigDecimal scale to be somehow dependent on that precision so I don't hard code it?
Sure why not. You can work out the error bound from the number of elements (it's proportional to the last term in the series) and set the BigDecimal scale to that.
Upvotes: 1