AshR
AshR

Reputation: 32

program intermittently stuck with main reporting a different thread id as opposed to the thread itself

I am trying to figure out how multi-threading works, this is my code :

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <pthread.h>

static pthread_cond_t threadDied = PTHREAD_COND_INITIALIZER ; // cond var initialization
static pthread_mutex_t threadMutex = PTHREAD_MUTEX_INITIALIZER ; // mutex initialization
// this mutex will protect all of the below global vars

static int totThreads = 0 ; // total number of threads created
static int numLive = 0 ;   // Total no. of threads still alive .. or terminated but not joined
static int numUnjoined = 0 ; // no. of threads that have not yet been joined

enum tstate { // enumeration of thread states
    TS_ALIVE, // thread is alive
    TS_TERMINATED, // thread terminated, not yet joined
    TS_JOINED  // thread terminated and joined
};

static struct {  // info about each thread
    pthread_t tid ; // thread ID
    enum tstate state; // Thread state as per the above enum
    int sleepTime ;  // no. of seconds to live before terminating
} *thread ; // name of the struct .. well a pointer

static void *threadFunc (void *arg) { // default start function for each thread
    int idx = *(int *)arg  ; // since arg is of type void , we typecast it to * of type int and deref it
    int s ; // for ret val

    sleep(thread[idx].sleepTime) ;  // pretending as though thread is doing some work :/

    s = pthread_mutex_lock(&threadMutex);
    if (s!=0) {
        printf("whoops, couldn't acquire mutex\n") ;
        fflush(stdout);
        exit (-1) ;
    }

    numUnjoined ++ ;
    thread[idx].state = TS_TERMINATED ;

    s = pthread_mutex_unlock(&threadMutex) ;
    if ( s!=0 ) {
        printf("whoops, couldn't release mutex\n") ;
        fflush(stdout);
        exit (-2) ;
    }

    s = pthread_cond_signal(&threadDied) ; // signalling any listening thread to wake up !!
    if (s != 0) {
        printf("whoops, couldn't signal the main thread to reap\n");
        fflush(stdout);
        exit (-3) ;
    }
    printf("Thread %d has worked hard and is now terminating\n", idx);
    fflush(stdout);

    return NULL ;
}

int main(int argc, char *argv[]) {
    int s, idx ;

    if (argc < 2 || strcmp(argv[1], "--help") == 0) {
        printf("Usage : %s nsecs...\n", argv[0]);
        fflush(stdout);
        exit(-4) ;
    }
    thread = calloc(argc -1, sizeof(*thread) );
    if (thread == NULL) {
        printf("whoops, couldn't allocate memory of size %lu\n", (argc -1) * sizeof(*thread) );
        fflush(stdout);
        exit(-5);
    }

    // Let's create all the threads now !!

    for (idx =0 ; idx < argc -1 ; idx++ ) {
        thread[idx].sleepTime = atoi(argv[idx + 1 ]) ; // thread sleeps for the duration entered in the cmd line
        thread[idx].state = TS_ALIVE ;
        s = pthread_create(&thread[idx].tid, NULL, threadFunc, &idx);
        printf("Main created thread %d with tid : %lu \n", ( * (int *)&idx ), (unsigned long)thread[idx].tid);
        fflush(stdout);
        if (s != 0 ){
            printf("whoops couldn't create thread %lu\n",(unsigned long) (&thread[idx].tid) );
            fflush(stdout);
            exit(-6) ;
        }
        //sleep(1); // << -- if I don't add this sleep, then it just deadlocks
    }

    totThreads = argc -1 ;
    numLive = totThreads ;

    // Join terminated threads

    while (numLive > 0 ) {
        s = pthread_mutex_lock(&threadMutex) ;
        if (s!=0){
            printf("whoops, couldn't lock mutex for joining\n") ;
            fflush(stdout);
            exit(-7) ;
        }
        while (numUnjoined == 0) {
            s = pthread_cond_wait(&threadDied, &threadMutex) ;
            if (s!=0) {
                printf("whoops, couldn't wait for thread join\n") ;
                fflush(stdout);
                exit(-8) ;
            }
        }

        for (idx = 0 ; idx < totThreads ; idx++ ) {
            if (thread[idx].state == TS_TERMINATED) {
                s = pthread_join(thread[idx].tid, NULL) ;
                if (s!=0) {
                    printf("Failed thread join\n");
                    fflush(stdout);
                    exit(-9) ;
                }

                thread[idx].state = TS_JOINED ;
                numLive-- ;
                numUnjoined-- ;
                printf("Reaped thread %d (numLive=%d)\n", idx, numLive);
                fflush(stdout);
            }
        }

        s = pthread_mutex_unlock(&threadMutex) ;
        if (s!=0){
            printf("whopps, couldn't unlock mutex after joining\n");
            fflush(stdout);
            exit(-10) ;
        }
    }
    exit(EXIT_SUCCESS);

}

For a thread count of 1, this code works sometimes, at other times it just hangs :(

WORKING :

#./thread_multijoin 1

Main created thread 0 with tid : 139835063281408

Thread 0 has worked hard and is now terminating

Reaped thread 0 (numLive=0)

HANG :

#./thread_multijoin 1

Main created thread 0 with tid : 140301613573888

Thread 1 has worked hard and is now terminating

^C

NOTICE here that Main says "Thread 0 was created" ; whereas the thread itself says "Thread 1" ... why is there a mismatch ??

It definitely gets stuck when I have multiple threads :

#./thread_multijoin 1 2 2 1

Main created thread 0 with tid : 140259455936256

Main created thread 1 with tid : 140259447543552

Main created thread 2 with tid : 140259439150848

Main created thread 3 with tid : 140259430758144

Thread 4 has worked hard and is now terminating

Thread 0 has worked hard and is now terminating

Reaped thread 0 (numLive=3)

Reaped thread 3 (numLive=2)

Thread 3 has worked hard and is now terminating

Reaped thread 2 (numLive=1)

Thread 2 has worked hard and is now terminating

^C

the only thing I am understanding from this is that the thread ID's reported by main and the thread itself are different, so I am guessing due to parallel scheduling there is something going on with the thread counter ... can you guys help me narrow this down please?

Thanks in advance.

========================================

Thanks @mevets and @user3386109 for the answer :)

I tried doing what @mevets suggested : i,e

pthread_create(&thread[idx].tid, NULL, threadFunc, (void *)idx);

and

int idx = (int)arg ;

but got this error when compiling :

thread_multijoin.c: In function ‘threadFunc’:

thread_multijoin.c:32:15: error: cast from pointer to integer of different 
size [-Werror=pointer-to-int-cast]

int idx = (int)arg  ; // since arg is of type void , we typecast it to * of type int and deref it


thread_multijoin.c: In function ‘main’:

thread_multijoin.c:90:64: error: cast to pointer from integer of different 
size [-Werror=int-to-pointer-cast]

s = pthread_create(&thread[idx].tid, NULL, threadFunc, (void *)idx );

Upon researching further, found this thread : cast to pointer from integer of different size, pthread code

which suggested the use of intptr_t :

s = pthread_create(&thread[idx].tid, NULL, threadFunc, (void *)(intptr_t)idx );

and

int idx = (intptr_t)arg

That worked perfectly fine without errors . Thanks once again for your time, really appreciate it :)

PS : to use intptr_t , you need to use _GNU_SOURCE :

#define _GNU_SOURCE

Upvotes: 0

Views: 107

Answers (1)

mevets
mevets

Reputation: 10445

[ the thread id ]: You pass the address of idx into each thread, then dereference it to index the table. So each thread gets the same pointer argument. You probably wanted to:

        s = pthread_create(&thread[idx].tid, NULL, threadFunc, (void *)idx);

and int idx = (int)arg ; // since arg is of type void , we typecast it to * of type int and deref it

ie; not deref it, just pass it in a “void *” container.

Upvotes: 2

Related Questions