How to use clflush?

Question

I want to measure the time difference between accessing a table entry and accessing another entry after a clflush. Below you will find my attempt, I get almost no penalty for the above two operations. The table is of length 256 with 8 bits in each entry. I suspect my clflush is not working properly. I am compiling with -O3 flag in gcc.

            #include 
            #include 
            #include 
            #define ARRAYSIZE(arr) (sizeof(arr)/sizeof(arr[0]))

            #define REPEAT 10000

            unsigned char table[256]={103,198,105,115,81,255,74,236,41,205,186,171,242,251,227,70,124,194,84,248,27,232,231,141,118,90,46,99,51,159,201,154,102,50,13,183,49,88,163,90,37,93,5,23,88,233,94,212,171,178,205,198,155,180,84,17,14,130,116,65,33,61,220,135,112,233,62,161,65,225,252,103,62,1,126,151,234,220,107,150,143,56,92,42,236,176,59,251,50,175,60,84,236,24,219,92,2,26,254,67,251,250,170,58,251,41,209,230,5,60,124,148,117,216,190,97,137,249,92,187,168,153,15,149,177,235,241,179,5,239,247,0,233,161,58,229,202,11,203,208,72,71,100,189,31,35,30,168,28,123,100,197,20,115,90,197,94,75,121,99,59,112,100,36,17,158,9,220,170,212,172,242,27,16,175,59,51,205,227,80,72,71,21,92,187,111,34,25,186,155,125,245,11,225,26,28,127,35,248,41,248,164,27,19,181,202,78,232,152,50,56,224,121,77,61,52,188,95,78,119,250,203,108,5,172,134,33,43,170,26,85,162,190,112,181,115,59,4,92,211,54,148,179,175,226,240,228,158,79,50,21,73,253,130,78,169};



            inline void clflush(volatile void *p)
            {
                asm volatile ("clflush (%0)" :: "r"(p));
            }

            inline uint64_t rdtsc()
            {
                unsigned long a, d;
                asm volatile ("cpuid; rdtsc" : "=a" (a), "=d" (d) : : "ebx", "ecx");
                return a | ((uint64_t)d << 32);
            }

            inline int func(int *a) { 
                int i;
                for(i=0;i



update : I understand ther might be some problem due to table access. Here is another code that evicts a single variable instead of the whole table. This one shows significant inclrease in clock cycle when using the clflush(). Does it mean clflush() is working properly and the incrased time is due to access the variable from memory? 

            #include 
            #include 
            #define REPEAT 100000
            inline void clflush(volatile void *p)
            {
                asm volatile ("clflush (%0)" :: "r"(p));
            }

            inline uint64_t rdtsc()
            {
                unsigned long a, d;
                asm volatile ("rdtsc" : "=a" (a), "=d" (d));
                return a | ((uint64_t)d << 32);
            }

            volatile int i;

            inline void test()
            {
                uint64_t start, end,clock;
                volatile int j;
                long int rep;
                int k;

                clock=0;
                for(rep=0;rep

How to use clflush?

Answers (1)

Related Questions