Invalid read of size 4 when separating thread

Question

I'm helping my friend to optimize his memory for this program:

#define _GNU_SOURCE
#include "myutils.h"
#include 

void * mySPMDMain(void *);
pthread_barrier_t mybarrier;

int main (int argc, char ** argv) {
    /* Initialize global data here */
    /* Start threads*/

    int i;

    if (argc != 3)
    {
        fprintf(stderr, "usage: ./psrs  
");
        exit(EXIT_FAILURE);
    }

    struct timeval start;
    struct timeval end;

    int N = atoi(argv[1]);
    int NUM_THREADS = atoi(argv[2]);
    int allChunkSize = N/NUM_THREADS;
    pthread_t ThreadID[NUM_THREADS];
    pthread_barrier_init(&mybarrier, NULL, NUM_THREADS);

    long int * originArray = (long int *) malloc(sizeof(long int)*N);

    for (i = 0; i pid;

    /* Parallel array to TCB */
    qsort(localTCB -> Chunk, localTCB -> ChunkSize, sizeof(long int), cmpfunc);
    BARRIER;

    // Timing

    /* Phase 1 */
    for (int i = 0; i< localTCB -> num_threads; ++i)
    {
        long int sample = localTCB->Chunk[i*localTCB -> offSet];
        localTCB->samples[i]=sample;
    }
    BARRIER;


    /* Phase 2 */
    MASTER {
        for (int i = 0; i < localTCB -> num_threads; ++i)
        {
            memcpy(&(localTCB->pivotArray[i*localTCB -> num_threads]),localTCB[i].samples, localTCB -> num_threads*sizeof(long int));
        }
        qsort(localTCB -> pivotArray, localTCB -> num_threads*localTCB -> num_threads, sizeof(long int), cmpfunc);
        for (int i = 0; i<(localTCB -> num_threads)-1; ++i)
        {   
            localTCB-> selectedPivot[i] = localTCB-> pivotArray[(i+1)*localTCB -> num_threads];
        }
        for (int i = 1; i num_threads; ++i)
        {
            memcpy(localTCB[i].selectedPivot,localTCB[0].selectedPivot,((localTCB -> num_threads)-1)*sizeof(long int));
        }
    }
    BARRIER;


    /* Phase 3 */
    for (int i = 0; i<(localTCB -> num_threads)-1; ++i)
    {
        localTCB -> sampleIndex[i] = binarySearch(localTCB->Chunk,0,localTCB->ChunkSize -1, localTCB->selectedPivot[i]);
    }
    for (int i = 0; i num_threads; ++i)
    {
        if(i == 0) localTCB -> eachStartIndex[i] = 0;
        else localTCB -> eachStartIndex[i] = localTCB -> sampleIndex[i-1];
    }
    for (int i = 0; i num_threads; ++i)
    {
        if (i == 0) localTCB -> passLength[i] = localTCB -> sampleIndex[i];
        localTCB -> passLength[i] = localTCB -> sampleIndex[i] - localTCB -> sampleIndex[i-1];
        if (i == (localTCB -> num_threads)-1) localTCB -> passLength[i] = localTCB -> ChunkSize - localTCB -> sampleIndex[i-1];
    }
    BARRIER;


    /* Phase 4 */
    MASTER {
        for (int i = 0; i num_threads; ++i)
        {
            for (int j = 0; j num_threads; ++j)
            {
                // each temp merge space  = localTCB[i].tmpMergeSpace[j]
                // each cpy start = localTCB[j].Chunk[localTCB[j].eachStartIndex[i]]
                // each cpy length = localTCB[j].passLength[i] * sizeof(long int)
                localTCB[i].tmpMergeSpace[j] = (long int*) malloc(sizeof(long int)*localTCB[j].passLength[i]);
                localTCB[i].mergeLength[j] = localTCB[j].passLength[i];
                memcpy(localTCB[i].tmpMergeSpace[j], &(localTCB[j].Chunk[localTCB[j].eachStartIndex[i]]), (localTCB[j].passLength[i]) * sizeof(long int));
            }
            localTCB[i].resultArr = (long int *) malloc(sizeof(long int)*sumTotal(localTCB[i].mergeLength, localTCB -> num_threads));
        }
    }
    BARRIER;
    multimerge(localTCB->tmpMergeSpace,localTCB->mergeLength,localTCB -> num_threads,localTCB->resultArr);
    BARRIER;
    //Timing


    return NULL;

} /* mySPMDMain*/

and the TCB struct is this:

typedef struct ThreadControlBlock {
    long int *Chunk;
    int *passLength;
    int *sampleIndex;
    int *eachStartIndex;
    long int **tmpMergeSpace;
    long int *pivotArray;
    long int *selectedPivot;
    long int *samples;
    int *mergeLength;
    long int * resultArr;
    int pid;
    int N;
    int num_threads;
    int ChunkSize;
    int offSet;
} TCB;

I used valgrind to check if there's any memory leak. valgrind returns that this program freed memory after execution but has a invalid read of size 4 when executing thread function on the main thread (with pid==0):

==6150== Memcheck, a memory error detector
==6150== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==6150== Using Valgrind-3.11.0 and LibVEX; rerun with -h for copyright info
==6150== Command: ./psrs 8000000 8
==6150== 
==6150== Thread 8:
==6150== Invalid read of size 4
==6150==    at 0x400DC2: mySPMDMain (psrs.c:169)
==6150==    by 0x4E3ADC4: start_thread (in /usr/lib64/libpthread-2.17.so)
==6150==    by 0x514673C: clone (in /usr/lib64/libc-2.17.so)
==6150==  Address 0x911cd5c is 4 bytes before a block of size 28 alloc'd
==6150==    at 0x4C29975: calloc (vg_replace_malloc.c:711)
==6150==    by 0x401188: main (psrs.c:48)
==6150== 
==6150== Invalid read of size 4
==6150==    at 0x400DBE: mySPMDMain (psrs.c:169)
==6150==    by 0x4E3ADC4: start_thread (in /usr/lib64/libpthread-2.17.so)
==6150==    by 0x514673C: clone (in /usr/lib64/libc-2.17.so)
==6150==  Address 0x911cd7c is 0 bytes after a block of size 28 alloc'd
==6150==    at 0x4C29975: calloc (vg_replace_malloc.c:711)
==6150==    by 0x401188: main (psrs.c:48)
==6150== 
==6150== Thread 1:
==6150== Invalid read of size 4
==6150==    at 0x400DC2: mySPMDMain (psrs.c:169)
==6150==    by 0x401266: main (psrs.c:64)
==6150==  Address 0x911a89c is 4 bytes before a block of size 28 alloc'd
==6150==    at 0x4C29975: calloc (vg_replace_malloc.c:711)
==6150==    by 0x401188: main (psrs.c:48)
==6150== 
==6150== Invalid read of size 4
==6150==    at 0x400DBE: mySPMDMain (psrs.c:169)
==6150==    by 0x401266: main (psrs.c:64)
==6150==  Address 0x911a8bc is 0 bytes after a block of size 28 alloc'd
==6150==    at 0x4C29975: calloc (vg_replace_malloc.c:711)
==6150==    by 0x401188: main (psrs.c:48)
==6150== 
==6150== 
==6150== HEAP SUMMARY:
==6150==     in use at exit: 0 bytes in 0 blocks
==6150==   total heap usage: 174 allocs, 174 frees, 320,014,664 bytes allocated
==6150== 
==6150== All heap blocks were freed -- no leaks are possible
==6150== 
==6150== For counts of detected and suppressed errors, rerun with: -v
==6150== ERROR SUMMARY: 16 errors from 4 contexts (suppressed: 0 from 0)

line 169: localTCB -> passLength[i] = localTCB -> sampleIndex[i] - localTCB -> sampleIndex[i-1];

line 63: myTCB[0].pid = 0;

line 48: myTCB[i].sampleIndex = calloc((NUM_THREADS-1), sizeof(int));

In this test run, NUM_THREADS is 8. If read correctly, myTCB[i].sampleIndex = calloc((NUM_THREADS-1), sizeof(int)); should read a memory of 4 * (8-1) = 28 bytes, I don't quite get where should this 4 bytes invalid read before this 28 bytes block.

Honza Remeš · Accepted Answer

Your line 169 is inside a for loop iterating with i from 0 up, but you are accessing element sampleIndex[i-1]. In the first iteration, this access is out of bounds.

Invalid read of size 4 when separating thread

Answers (2)

Related Questions