Reputation: 170
what I'm trying to do is send the integer value 0 to the function to use it as an index of my array. But instead of writing to patients[0], it writes to patients[1]. Any idea why? I am simple looping from 0 to 1, just to see if it's passing the value 0 correctly, passing i(0) to function, assign myArr[0] to something, but it assigns to myArr[1] instead.
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <time.h>
typedef struct patient_info {
pthread_t thread;
char treatment;
char department[20];
} patient;
patient patients[1000];
void* registration(void* arg)
{
int p_num = *((int*)arg); // my array index that supposed to be 0
if (rand() % 2 == 0)
{
patients[p_num].treatment = 'M';
}
else
{
patients[p_num].treatment = 'S';
}
return NULL;
}
int main(void)
{
srand(time(NULL));
for (size_t i = 0; i < 1; i++) // simple for loop to create my thread
{
if (pthread_create(&patients[i].thread, NULL, ®istration, (void*)&i) != 0)
{
perror("There has been an error with pthread_create().");
return 1;
}
}
for (size_t j = 0; j < 1; j++)
{
if (pthread_join(patients[j].thread, NULL) != 0)
{
perror("There has been an error with the pthread_join().");
return 2;
}
}
for (size_t i = 0; i < 1000; i++) // make this loop to see where it is writing.
{
if (patients[i].treatment == 'M' || patients[i].treatment == 'S')
{
printf("Treatment is: %c %d\n", patients[i].treatment, i);
}
}
return 0;
}
Upvotes: 1
Views: 375
Reputation: 33601
You are passing a pointer to i
, so each thread points to the same i
variable.
Thus, the threads race to get their value. (e.g.) threadA wants 0
and threadB wants 1
. But, if the main task is fast enough both might see either 0 or 1. Thus, a conflict.
Also, in main
, i
is a size_t
but in registration
, it's an int
pointer. They are [probably] different sizes.
The solution is to pass i
by value
pthread_create(&patients[i].thread, NULL, ®istration, (void *) i)
And, in registration
, we accept by value:
void *
registration(void *arg)
{
size_t p_num = (size_t) arg;
// ...
return (void *) 0;
}
Here's the corrected code:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <time.h>
typedef struct patient_info {
pthread_t thread;
char treatment;
char department[20];
} patient;
patient patients[1000];
void *
registration(void *arg)
{
// my array index that supposed to be 0
// NOTE/BUG: this uses the wrong size pointer and to prevent the race condition
// we want to accept by value
#if 0
int p_num = *((int *) arg);
#else
size_t p_num = (size_t) arg;
#endif
if (rand() % 2 == 0) {
patients[p_num].treatment = 'M';
}
else {
patients[p_num].treatment = 'S';
}
return NULL;
}
int
main(void)
{
srand(time(NULL));
// simple for loop to create my thread
for (size_t i = 0; i < 1; i++) {
if (pthread_create(&patients[i].thread, NULL, ®istration,
#if 0
(void *) &i) != 0) {
#else
(void *) i) != 0) {
#endif
perror("There has been an error with pthread_create().");
return 1;
}
}
for (size_t j = 0; j < 1; j++) {
if (pthread_join(patients[j].thread, NULL) != 0) {
perror("There has been an error with the pthread_join().");
return 2;
}
}
// make this loop to see where it is writing.
for (size_t i = 0; i < 1000; i++) {
if (patients[i].treatment == 'M' || patients[i].treatment == 'S') {
printf("Treatment is: %c %d\n", patients[i].treatment, i);
}
}
return 0;
}
Since you've gone to the trouble of creating a patient struct
, we can clean up the code a bit by using and passing around some pointers to that struct
:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <time.h>
typedef struct patient_info {
pthread_t thread;
char treatment;
char department[20];
} patient;
patient patients[1000];
void *
registration(void *arg)
{
patient *pt = arg;
if (rand() % 2 == 0) {
pt->treatment = 'M';
}
else {
pt->treatment = 'S';
}
return NULL;
}
int
main(void)
{
srand(time(NULL));
patient *pt;
// simple for loop to create my thread
for (size_t i = 0; i < 1; i++) {
pt = &patients[i];
if (pthread_create(&pt->thread, NULL, ®istration, pt) != 0) {
perror("There has been an error with pthread_create().");
return 1;
}
}
for (size_t j = 0; j < 1; j++) {
pt = &patients[j];
if (pthread_join(pt->thread, NULL) != 0) {
perror("There has been an error with the pthread_join().");
return 2;
}
}
// make this loop to see where it is writing.
for (size_t i = 0; i < 1000; i++) {
pt = &patients[i];
if (pt->treatment == 'M' || pt->treatment == 'S') {
printf("Treatment is: %c %d\n", pt->treatment, i);
}
}
return 0;
}
Note that we define the patient array to have 1000 elements.
At present, we are only creating one thread.
Presumably, we want to process all 1000 records.
But, creating 1000 threads is problematic and doesn't scale too well. If we had 100,000 patients, we [probably] could not create 100,000 threads in parallel.
And, even if we could, the system would spend most of its time switching between threads and the system would slow to a crawl.
Better to have a "pool" of "worker" threads and feed them a few records at a time.
If we do that, there's no reason to put the pthread_t
into the patient record. We can have two separate arrays: one for patients and another [smaller] array for "active" threads.
There are many ways to do this. Ideally, we monitor thread completion and add new threads dynamically. But, that's a bit complicated for a first try.
Here's a version that splits things up into limited chunks. It's the "good enough for now" solution:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <time.h>
typedef struct patient_info {
char treatment;
char department[20];
} patient;
#define NPATIENT 1000
patient patients[NPATIENT];
#define NWORKER 10
pthread_t threads[NWORKER];
void *
registration(void *arg)
{
patient *pt = arg;
if (rand() % 2 == 0) {
pt->treatment = 'M';
}
else {
pt->treatment = 'S';
}
return NULL;
}
int
main(void)
{
srand(time(NULL));
patient *pt;
for (size_t patlo = 0; patlo < NPATIENT; patlo += NWORKER) {
size_t pathi = patlo + NWORKER;
if (pathi > NPATIENT)
pathi = NPATIENT;
size_t itsk;
// simple for loop to create my thread
itsk = 0;
for (size_t ipat = patlo; ipat < pathi; ipat++, itsk++) {
pt = &patients[ipat];
if (pthread_create(&threads[itsk], NULL, ®istration, pt) != 0) {
perror("There has been an error with pthread_create().");
return 1;
}
}
// join this chunk of threads
itsk = 0;
for (size_t ipat = patlo; ipat < pathi; ipat++, itsk++) {
pt = &patients[ipat];
if (pthread_join(threads[itsk], NULL) != 0) {
perror("There has been an error with the pthread_join().");
return 2;
}
}
}
// make this loop to see where it is writing.
for (size_t ipat = 0; ipat < NPATIENT; ipat++) {
pt = &patients[ipat];
if (pt->treatment == 'M' || pt->treatment == 'S') {
printf("Treatment is: %c %zu\n", pt->treatment, ipat);
}
}
return 0;
}
UPDATE:
But why is it necessary to use pointer to struct in the below example you gave?
It's not strictly necessary, but it's a cleaner more extensible option. And, without optimization by the compiler, it generates faster code.
Doing pt->whatever
is simpler than patients[i].whatever
everywhere.
And how can 2 thread race for 0 or 1 when I only loop for once (create only 1 thread)? – covenant
With just one thread, they don't race. But, if we switched to a larger number (e.g.) 2, they would race.
Remember, we fixed two problems:
i
in main
, where it was size_t
which is 8 bytes and p_num
in the thread function where the size was 4.UPDATE #2:
Thank you so much again. Can you please expend the names of patlo, pathi, ipat and itsk?
Well, itsk
is the easiest. If I didn't know this code and had to analyze it, I would look at all the places it was used. It is only used as an index into the threads
array.
"tsk" is a "signature" style for me (Think: "task"). I often use three char abbreviations/acronyms. The threads
array is just a pthread_t
. But, if we needed more per-task (i.e. per-thread) information, I'd create a per-task struct (e.g.):
typedef struct {
pthread_t tsk_pthr; // the thread ID
int tsk_patdone; // number of patients processed
long long tsk_elap; // elapsed time of task
} tsk_t;
And, the pointer to the struct would be (e.g.): tsk_t *tskcur;
As to ipat
, it is the index into the parents
array. When we split up the parents arrays into chunks of NWORKER
, then patlo
is the first index of the current chunk and pathi
is the index that is one beyond the end of the current chunk. So, with NWORKER
at 10, then patlo,pathi
would be: 0,10 10,20 20,30
...
And yes, what I want was working with 1000 threads at once, but as you said above it is problematic and I have 4 CPU only. Is it a better idea to change NWORKER to 4? – covenant
Generally, using the number of CPUs is a good starting point. I've had luck with up to 2x the number of CPUs. This is a tuning parameter. You have to try it, measure it, adjust it. The "best" number can depend on the type of work being done.
Can this be done by semaphores or mutex_locks? Let's say I can only let 10 thread inside of my registration function. – covenant
A more advanced implementation (vs. the "good enough for now" implementation that I did above) would start NWORKER threads at the start. Then would just feed new work to the various threads. Then threads would only be joined at the end [i.e. not after each chunk].
To get this more dynamic, semaphores could help. If one is careful--otherwise, they "serialize" waiting on the semaphores. Logically correct, but parallelism is reduced.
Or, condition variables might help (e.g.) pthread_cond_signal
et. al.
Or, we could use atomic operations (from stdatomic.h
). Each thread runs independently and "atomically" grabs the "next" index into the patient array. For the simple use case here, this is [probably] the most performant.
Here is a version that does that:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <time.h>
#include <stdatomic.h>
typedef struct patient_info {
char treatment;
char department[20];
} patient;
#define NPATIENT 1000
patient patients[NPATIENT];
size_t patidx = 0;
#define NWORKER 10
pthread_t threads[NWORKER];
void *
registration(void *arg)
{
size_t ipat;
patient *pt;
while (1) {
// _atomically_ grab the next index to use
// NOTE: these next two lines are functionally equivalent, but ...
#if 0
// ordinary code -- has race condition
ipat = patidx++;
#else
// atomic code -- works correctly
ipat = atomic_fetch_add(&patidx,1);
#endif
// stop if we are done
if (ipat >= NPATIENT)
break;
pt = &patients[ipat];
if (rand() % 2 == 0) {
pt->treatment = 'M';
}
else {
pt->treatment = 'S';
}
}
return NULL;
}
int
main(void)
{
srand(time(NULL));
patient *pt;
// start all threads
for (size_t itsk = 0; itsk < NWORKER; ++itsk) {
if (pthread_create(&threads[itsk], NULL, ®istration, (void *) itsk)
!= 0) {
perror("There has been an error with pthread_create().");
return 1;
}
}
// wait for all threads to complete
for (size_t itsk = 0; itsk < NWORKER; ++itsk) {
pthread_join(threads[itsk], NULL);
}
// make this loop to see where it is writing.
for (size_t ipat = 0; ipat < NPATIENT; ipat++) {
pt = &patients[ipat];
if (pt->treatment == 'M' || pt->treatment == 'S') {
printf("Treatment is: %c %zu\n", pt->treatment, ipat);
}
}
return 0;
}
UPDATE #3:
In the above code examples, I missed the fact that rand
is not thread safe and that rand_r
should be used instead.
Also, I [briefly] talked about performance and how it should be measured in order to tune the app.
So, I've created [hopefully final :-)] version that incorporates the orignal chunked ("good enough for now") version with the "atomic" version, the use of the tsk_t
struct, additional use pointers, macros, and performance measurement.
I had to move everything to subfunctions. This is good example of something that good programmers have to do.
Anyway, here is the code:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <time.h>
#include <stdatomic.h>
unsigned int seed; // random seed
typedef struct patient_info {
char treatment;
char department[20];
} patient;
#ifndef NPATIENT
#define NPATIENT 100000
#endif
int npatient = 1000;
patient patients[NPATIENT];
size_t patidx; // current patient index
typedef struct {
int tsk_active; // 1=task active/joinable
pthread_t tsk_pthr; // the thread ID
unsigned int tsk_seed; // random seed
patient *tsk_pat; // pointer to patient record
void *tsk_ret; // thread return value
} tsk_t;
#ifndef NWORKER
#define NWORKER 100
#endif
int nworker;
tsk_t threads[NWORKER];
#define TSKFORCUR(_tsk) \
tsk_t *_tsk = &threads[0]; tsk < &threads[nworker]; ++tsk
#define TSKFORALL(_tsk) \
tsk_t *_tsk = &threads[0]; tsk < &threads[NWORKER]; ++tsk
typedef struct {
int (*fnc_ptr)(void); // pointer to function
const char *fnc_who; // name of function
double fnc_tscbest; // best time
int fnc_nworker; // best number of workers
} fnc_t;
int
joinall(void)
{
for (TSKFORCUR(tsk)) {
if (! tsk->tsk_active)
continue;
if (pthread_join(tsk->tsk_pthr, &tsk->tsk_ret) != 0) {
perror("There has been an error with the pthread_join().");
return 2;
}
}
return 0;
}
// registration_chunked -- "chunked" thread function
void *
registration_chunked(void *arg)
{
tsk_t *tsk = arg;
patient *pt = tsk->tsk_pat;
if (rand_r(&tsk->tsk_seed) % 2 == 0)
pt->treatment = 'M';
else
pt->treatment = 'S';
return NULL;
}
// perform_chunked -- do separate create/join on threads
int
perform_chunked(void)
{
int code = 0;
for (size_t patlo = 0; patlo < npatient; patlo += nworker) {
for (TSKFORALL(tsk))
tsk->tsk_active = 0;
size_t pathi = patlo + nworker;
if (pathi > npatient)
pathi = npatient;
// simple for loop to create my thread
tsk_t *tsk = &threads[0];
for (size_t ipat = patlo; ipat < pathi; ++ipat, ++tsk) {
tsk->tsk_active = 1;
tsk->tsk_pat = &patients[ipat];
if (pthread_create(&tsk->tsk_pthr, NULL, registration_chunked,
tsk) != 0) {
perror("There has been an error with pthread_create().");
return 1;
}
}
// join this chunk of threads
code = joinall();
if (code)
break;
}
return code;
}
// registration_atomic -- atomic thread function
void *
registration_atomic(void *arg)
{
tsk_t *tsk = arg;
size_t ipat;
patient *pt;
while (1) {
// _atomically_ grab the next index to use
// NOTE: these next two lines are functionally equivalent, but ...
#if 0
// ordinary code -- has race condition
ipat = patidx++;
#else
// atomic code -- works correctly
ipat = atomic_fetch_add(&patidx,1);
#endif
// stop if we are done
if (ipat >= npatient)
break;
pt = &patients[ipat];
if (rand_r(&tsk->tsk_seed) % 2 == 0) {
pt->treatment = 'M';
}
else {
pt->treatment = 'S';
}
}
return NULL;
}
// perform_atomic -- do all work with atomic primitives
int
perform_atomic(void)
{
atomic_store(&patidx,0);
// start all threads
for (TSKFORCUR(tsk)) {
tsk->tsk_active = 1;
if (pthread_create(&tsk->tsk_pthr, NULL, ®istration_atomic, tsk)
!= 0) {
perror("There has been an error with pthread_create().");
return 1;
}
}
// wait for all threads to complete
int code = joinall();
return code;
}
// patshow -- show patient data
void
patshow(void)
{
const patient *pt;
for (size_t ipat = 0; ipat < npatient; ipat++) {
pt = &patients[ipat];
if (pt->treatment == 'M' || pt->treatment == 'S') {
printf("Treatment is: %c %zu\n", pt->treatment, ipat);
}
}
}
// tscgetf -- get hires timestamp
double
tscgetf(void)
{
struct timespec ts;
double sec;
clock_gettime(CLOCK_MONOTONIC,&ts);
sec = ts.tv_nsec;
sec /= 1e9;
sec += ts.tv_sec;
return sec;
}
// NOTE: this uses "designated initializers"
fnc_t fnclist[] = {
{ .fnc_ptr = perform_chunked, .fnc_who = "chunked" },
{ .fnc_ptr = perform_atomic, .fnc_who = "atomic" },
{ .fnc_ptr = NULL }
};
// dofnc -- benchmark a given method
double
dofnc(fnc_t *fnc,double tsclast)
{
double tscbeg;
double tscdif;
double tscbest = 1e6;
patient *pt;
// do multiple trials and take the fastest (best) one
for (int iter = 1; iter <= 5; ++iter) {
// reset the random seed
for (TSKFORALL(tsk))
tsk->tsk_seed = seed;
// reset records and heat up the cache
for (size_t ipat = 0; ipat < npatient; ipat++) {
pt = &patients[ipat];
pt->treatment = 0;
}
tscbeg = tscgetf();
fnc->fnc_ptr();
tscdif = tscgetf();
// get elapsed time
tscdif -= tscbeg;
// take the best time to account for system delays and timeslicing
if (tscdif < tscbest)
tscbest = tscdif;
}
printf(" ELAPSED=(%.9f) RATE=(%.3f p/s) -- %s",
tscbest,(double) npatient / tscbest,fnc->fnc_who);
do {
if (tsclast == 0)
break;
printf(" --");
double ratio;
if (tsclast > tscbest) {
ratio = tsclast / tscbest;
printf(" %.3fx faster",ratio);
}
else {
ratio = tscbest / tsclast;
printf(" %.3fx slower",ratio);
}
} while (0);
printf("\n");
if ((fnc->fnc_nworker <= 0) || (tscbest < fnc->fnc_tscbest)) {
fnc->fnc_nworker = nworker;
fnc->fnc_tscbest = tscbest;
}
// remember this so we can take a ratio
return tscbest;
}
void
dosize(int nwork,size_t npat)
{
static int sep = 0;
if (sep)
printf("\n");
sep = 1;
if (nwork < 1)
nwork = 1;
if (nwork > NWORKER)
nwork = NWORKER;
nworker = nwork;
if (npat < 1)
npat = 1;
if (npat > NPATIENT)
npat = NPATIENT;
npatient = npat;
printf("NWORKER=%d NPATIENT=%d\n",nworker,npatient);
double tscnow = 0;
for (fnc_t *fnc = fnclist; fnc->fnc_ptr != NULL; ++fnc)
tscnow = dofnc(fnc,tscnow);
}
int
main(void)
{
seed = time(NULL);
for (size_t nwork = 1; nwork < 40; ++nwork)
dosize(nwork,1000);
// show the best number of workers to use
printf("\n");
printf("best nworkers:\n");
for (fnc_t *fnc = fnclist; fnc->fnc_ptr != NULL; ++fnc)
printf("fnc_nworker=%d fnc_tscbest=%.9f -- %s\n",
fnc->fnc_nworker,fnc->fnc_tscbest,fnc->fnc_who);
return 0;
}
Here is the program output. Look at the stats. The best number of workers to use is at the bottom. Surprises await!
NWORKER=1 NPATIENT=1000
ELAPSED=(0.032663233) RATE=(30615.463 p/s) -- chunked
ELAPSED=(0.000046097) RATE=(21693397.459 p/s) -- atomic -- 708.576x faster
NWORKER=2 NPATIENT=1000
ELAPSED=(0.021753732) RATE=(45969.124 p/s) -- chunked
ELAPSED=(0.000059036) RATE=(16938829.638 p/s) -- atomic -- 368.483x faster
NWORKER=3 NPATIENT=1000
ELAPSED=(0.021092976) RATE=(47409.147 p/s) -- chunked
ELAPSED=(0.000083985) RATE=(11906898.974 p/s) -- atomic -- 251.152x faster
NWORKER=4 NPATIENT=1000
ELAPSED=(0.024977652) RATE=(40035.789 p/s) -- chunked
ELAPSED=(0.000083009) RATE=(12046901.359 p/s) -- atomic -- 300.903x faster
NWORKER=5 NPATIENT=1000
ELAPSED=(0.038758768) RATE=(25800.614 p/s) -- chunked
ELAPSED=(0.000139154) RATE=(7186281.370 p/s) -- atomic -- 278.531x faster
NWORKER=6 NPATIENT=1000
ELAPSED=(0.029736476) RATE=(33628.733 p/s) -- chunked
ELAPSED=(0.000191748) RATE=(5215177.552 p/s) -- atomic -- 155.081x faster
NWORKER=7 NPATIENT=1000
ELAPSED=(0.026535172) RATE=(37685.831 p/s) -- chunked
ELAPSED=(0.000234081) RATE=(4272024.389 p/s) -- atomic -- 113.359x faster
NWORKER=8 NPATIENT=1000
ELAPSED=(0.025485060) RATE=(39238.676 p/s) -- chunked
ELAPSED=(0.000285933) RATE=(3497322.469 p/s) -- atomic -- 89.129x faster
NWORKER=9 NPATIENT=1000
ELAPSED=(0.026013032) RATE=(38442.270 p/s) -- chunked
ELAPSED=(0.000263240) RATE=(3798813.732 p/s) -- atomic -- 98.819x faster
NWORKER=10 NPATIENT=1000
ELAPSED=(0.029725359) RATE=(33641.309 p/s) -- chunked
ELAPSED=(0.000261056) RATE=(3830595.674 p/s) -- atomic -- 113.866x faster
NWORKER=11 NPATIENT=1000
ELAPSED=(0.026881332) RATE=(37200.538 p/s) -- chunked
ELAPSED=(0.000271164) RATE=(3687805.203 p/s) -- atomic -- 99.133x faster
NWORKER=12 NPATIENT=1000
ELAPSED=(0.030074292) RATE=(33250.991 p/s) -- chunked
ELAPSED=(0.000394198) RATE=(2536796.256 p/s) -- atomic -- 76.292x faster
NWORKER=13 NPATIENT=1000
ELAPSED=(0.030961288) RATE=(32298.398 p/s) -- chunked
ELAPSED=(0.000345326) RATE=(2895815.125 p/s) -- atomic -- 89.658x faster
NWORKER=14 NPATIENT=1000
ELAPSED=(0.027436778) RATE=(36447.428 p/s) -- chunked
ELAPSED=(0.000587254) RATE=(1702840.830 p/s) -- atomic -- 46.720x faster
NWORKER=15 NPATIENT=1000
ELAPSED=(0.032111215) RATE=(31141.768 p/s) -- chunked
ELAPSED=(0.000391190) RATE=(2556302.194 p/s) -- atomic -- 82.086x faster
NWORKER=16 NPATIENT=1000
ELAPSED=(0.027765346) RATE=(36016.119 p/s) -- chunked
ELAPSED=(0.000475762) RATE=(2101891.519 p/s) -- atomic -- 58.360x faster
NWORKER=17 NPATIENT=1000
ELAPSED=(0.026204446) RATE=(38161.463 p/s) -- chunked
ELAPSED=(0.000951203) RATE=(1051300.372 p/s) -- atomic -- 27.549x faster
NWORKER=18 NPATIENT=1000
ELAPSED=(0.030340088) RATE=(32959.694 p/s) -- chunked
ELAPSED=(0.000467318) RATE=(2139870.524 p/s) -- atomic -- 64.924x faster
NWORKER=19 NPATIENT=1000
ELAPSED=(0.028912229) RATE=(34587.440 p/s) -- chunked
ELAPSED=(0.000553825) RATE=(1805624.340 p/s) -- atomic -- 52.205x faster
NWORKER=20 NPATIENT=1000
ELAPSED=(0.029094981) RATE=(34370.189 p/s) -- chunked
ELAPSED=(0.000505824) RATE=(1976972.262 p/s) -- atomic -- 57.520x faster
NWORKER=21 NPATIENT=1000
ELAPSED=(0.031570002) RATE=(31675.639 p/s) -- chunked
ELAPSED=(0.000901482) RATE=(1109284.549 p/s) -- atomic -- 35.020x faster
NWORKER=22 NPATIENT=1000
ELAPSED=(0.033848829) RATE=(29543.120 p/s) -- chunked
ELAPSED=(0.000575106) RATE=(1738809.862 p/s) -- atomic -- 58.857x faster
NWORKER=23 NPATIENT=1000
ELAPSED=(0.029385494) RATE=(34030.396 p/s) -- chunked
ELAPSED=(0.000793229) RATE=(1260669.853 p/s) -- atomic -- 37.045x faster
NWORKER=24 NPATIENT=1000
ELAPSED=(0.031210263) RATE=(32040.742 p/s) -- chunked
ELAPSED=(0.000643074) RATE=(1555030.879 p/s) -- atomic -- 48.533x faster
NWORKER=25 NPATIENT=1000
ELAPSED=(0.029140703) RATE=(34316.262 p/s) -- chunked
ELAPSED=(0.000715511) RATE=(1397602.482 p/s) -- atomic -- 40.727x faster
NWORKER=26 NPATIENT=1000
ELAPSED=(0.032022561) RATE=(31227.983 p/s) -- chunked
ELAPSED=(0.000705709) RATE=(1417014.463 p/s) -- atomic -- 45.376x faster
NWORKER=27 NPATIENT=1000
ELAPSED=(0.029134086) RATE=(34324.056 p/s) -- chunked
ELAPSED=(0.000724864) RATE=(1379569.210 p/s) -- atomic -- 40.192x faster
NWORKER=28 NPATIENT=1000
ELAPSED=(0.035466630) RATE=(28195.518 p/s) -- chunked
ELAPSED=(0.000987683) RATE=(1012470.644 p/s) -- atomic -- 35.909x faster
NWORKER=29 NPATIENT=1000
ELAPSED=(0.035837240) RATE=(27903.935 p/s) -- chunked
ELAPSED=(0.001032722) RATE=(968314.850 p/s) -- atomic -- 34.702x faster
NWORKER=30 NPATIENT=1000
ELAPSED=(0.036233530) RATE=(27598.746 p/s) -- chunked
ELAPSED=(0.001048557) RATE=(953691.602 p/s) -- atomic -- 34.556x faster
NWORKER=31 NPATIENT=1000
ELAPSED=(0.034758216) RATE=(28770.176 p/s) -- chunked
ELAPSED=(0.000810737) RATE=(1233445.583 p/s) -- atomic -- 42.872x faster
NWORKER=32 NPATIENT=1000
ELAPSED=(0.032050096) RATE=(31201.155 p/s) -- chunked
ELAPSED=(0.001110657) RATE=(900368.073 p/s) -- atomic -- 28.857x faster
NWORKER=33 NPATIENT=1000
ELAPSED=(0.028196867) RATE=(35464.933 p/s) -- chunked
ELAPSED=(0.000948129) RATE=(1054708.812 p/s) -- atomic -- 29.739x faster
NWORKER=34 NPATIENT=1000
ELAPSED=(0.036432115) RATE=(27448.310 p/s) -- chunked
ELAPSED=(0.000938635) RATE=(1065376.884 p/s) -- atomic -- 38.814x faster
NWORKER=35 NPATIENT=1000
ELAPSED=(0.029211664) RATE=(34232.901 p/s) -- chunked
ELAPSED=(0.001254896) RATE=(796878.827 p/s) -- atomic -- 23.278x faster
NWORKER=36 NPATIENT=1000
ELAPSED=(0.035125977) RATE=(28468.959 p/s) -- chunked
ELAPSED=(0.001015229) RATE=(984999.410 p/s) -- atomic -- 34.599x faster
NWORKER=37 NPATIENT=1000
ELAPSED=(0.027013535) RATE=(37018.480 p/s) -- chunked
ELAPSED=(0.000971639) RATE=(1029188.881 p/s) -- atomic -- 27.802x faster
NWORKER=38 NPATIENT=1000
ELAPSED=(0.027284315) RATE=(36651.094 p/s) -- chunked
ELAPSED=(0.001343600) RATE=(744269.135 p/s) -- atomic -- 20.307x faster
NWORKER=39 NPATIENT=1000
ELAPSED=(0.026986172) RATE=(37056.015 p/s) -- chunked
ELAPSED=(0.001386600) RATE=(721188.537 p/s) -- atomic -- 19.462x faster
best nworkers:
fnc_nworker=3 fnc_tscbest=0.021092976 -- chunked
fnc_nworker=1 fnc_tscbest=0.000046097 -- atomic
Upvotes: 2