Reputation: 181
So there's this code cache.c
#include <sys/times.h>
#include <stdio.h>
#define CACHE_MIN (1024) /* smallest cache (in words) */
#define CACHE_MAX (1024*1024) /* largest cache */
#define STRIDE_MIN 1 /* smallest stride (in words) */
#define STRIDE_MAX 128 /* largest stride */
#define SAMPLE 10 /* to get a larger time sample */
#define CLK_TCK 60 /* number clock cycles per second */
int x[CACHE_MAX]; /* array going to stride through */
double get_seconds () { /* routine to read time */
struct tms rusage;
times (&rusage); /* UNIX utility: time in clock ticks */
return (double) (rusage.tms_utime) / CLK_TCK;
}
int main () {
int register i, index, stride, limit, temp;
int steps, tsteps, csize;
double sec0, sec; /* timing variables */
for (csize = CACHE_MIN; csize <= CACHE_MAX; csize = csize * 2)
for (stride = STRIDE_MIN; stride <= STRIDE_MAX; stride = stride * 2) {
sec = 0; /* initialize timer */
limit = csize - stride + 1; /* cache size this loop */
steps = 0;
do { /* repeat until collect 1 second */
sec0 = get_seconds (); /* start timer */
for (i = SAMPLE * stride; i != 0; i = i - 1) /* larger sample */
for (index = 0; index < limit; index = index + stride)
x[index] = x[index] + 1; /* cache access */
steps = steps + 1; /* count while loop iterations */
sec = sec + (get_seconds () - sec0); /* end timer */
}
while (sec < 1.0); /* until collect 1 second */
/* Repeat empty loop to loop subtract overhead */
tsteps = 0; /* used to match number of while iterations */
do { /* repeat until same number of iterations as above */
sec0 = get_seconds (); /* start timer */
for (i = SAMPLE * stride; i != 0; i = i - 1) /* larger sample */
for (index = 0; index < limit; index = index + stride)
temp = temp + index; /* dummy code */
tsteps = tsteps + 1; /* count while iterations */
sec = sec - (get_seconds () - sec0); /* - overhead */
}
while (tsteps < steps); /* until equal to number of iterations */
if( stride==STRIDE_MIN ) printf("\n"); /* extra line to separate array sizes */
printf("Size(bytes): %7d Stride(bytes): %4d read+write: %4.0f ns\n",
csize * sizeof (int), stride * sizeof (int),
(double) sec*1e9 / (steps*SAMPLE*stride*((limit-1)/stride + 1)));
} /* end of both outer for loops */
}
When run, I get this output
Size(bytes): 4096 Stride(bytes): 4 read+write: 1 ns
Size(bytes): 4096 Stride(bytes): 8 read+write: 0 ns
Size(bytes): 4096 Stride(bytes): 16 read+write: 0 ns
Size(bytes): 4096 Stride(bytes): 32 read+write: 0 ns
Size(bytes): 4096 Stride(bytes): 64 read+write: 0 ns
Size(bytes): 4096 Stride(bytes): 128 read+write: 0 ns
Size(bytes): 4096 Stride(bytes): 256 read+write: 0 ns
Size(bytes): 4096 Stride(bytes): 512 read+write: 0 ns
Size(bytes): 8192 Stride(bytes): 4 read+write: 0 ns
Size(bytes): 8192 Stride(bytes): 8 read+write: 1 ns
Size(bytes): 8192 Stride(bytes): 16 read+write: 0 ns
Size(bytes): 8192 Stride(bytes): 32 read+write: 1 ns
Size(bytes): 8192 Stride(bytes): 64 read+write: 0 ns
Size(bytes): 8192 Stride(bytes): 128 read+write: 0 ns
Size(bytes): 8192 Stride(bytes): 256 read+write: 1 ns
Size(bytes): 8192 Stride(bytes): 512 read+write: 0 ns
Size(bytes): 16384 Stride(bytes): 4 read+write: 1 ns
Size(bytes): 16384 Stride(bytes): 8 read+write: 1 ns
Size(bytes): 16384 Stride(bytes): 16 read+write: 1 ns
Size(bytes): 16384 Stride(bytes): 32 read+write: 0 ns
Size(bytes): 16384 Stride(bytes): 64 read+write: 1 ns
Size(bytes): 16384 Stride(bytes): 128 read+write: 0 ns
Size(bytes): 16384 Stride(bytes): 256 read+write: 0 ns
Size(bytes): 16384 Stride(bytes): 512 read+write: 0 ns
Size(bytes): 32768 Stride(bytes): 4 read+write: 1 ns
Size(bytes): 32768 Stride(bytes): 8 read+write: 1 ns
Size(bytes): 32768 Stride(bytes): 16 read+write: 0 ns
Size(bytes): 32768 Stride(bytes): 32 read+write: 0 ns
Size(bytes): 32768 Stride(bytes): 64 read+write: 1 ns
Size(bytes): 32768 Stride(bytes): 128 read+write: 0 ns
Size(bytes): 32768 Stride(bytes): 256 read+write: 0 ns
Size(bytes): 32768 Stride(bytes): 512 read+write: 0 ns
Size(bytes): 65536 Stride(bytes): 4 read+write: 0 ns
Size(bytes): 65536 Stride(bytes): 8 read+write: 0 ns
Size(bytes): 65536 Stride(bytes): 16 read+write: 1 ns
Size(bytes): 65536 Stride(bytes): 32 read+write: 1 ns
Size(bytes): 65536 Stride(bytes): 64 read+write: 2 ns
Size(bytes): 65536 Stride(bytes): 128 read+write: 2 ns
Size(bytes): 65536 Stride(bytes): 256 read+write: 1 ns
Size(bytes): 65536 Stride(bytes): 512 read+write: 1 ns
Size(bytes): 131072 Stride(bytes): 4 read+write: 0 ns
Size(bytes): 131072 Stride(bytes): 8 read+write: 0 ns
Size(bytes): 131072 Stride(bytes): 16 read+write: 0 ns
Size(bytes): 131072 Stride(bytes): 32 read+write: 1 ns
Size(bytes): 131072 Stride(bytes): 64 read+write: 2 ns
Size(bytes): 131072 Stride(bytes): 128 read+write: 2 ns
Size(bytes): 131072 Stride(bytes): 256 read+write: 2 ns
Size(bytes): 131072 Stride(bytes): 512 read+write: 1 ns
Size(bytes): 262144 Stride(bytes): 4 read+write: 0 ns
Size(bytes): 262144 Stride(bytes): 8 read+write: 0 ns
Size(bytes): 262144 Stride(bytes): 16 read+write: 0 ns
Size(bytes): 262144 Stride(bytes): 32 read+write: 1 ns
Size(bytes): 262144 Stride(bytes): 64 read+write: 2 ns
Size(bytes): 262144 Stride(bytes): 128 read+write: 2 ns
Size(bytes): 262144 Stride(bytes): 256 read+write: 2 ns
Size(bytes): 262144 Stride(bytes): 512 read+write: 1 ns
Size(bytes): 524288 Stride(bytes): 4 read+write: 0 ns
Size(bytes): 524288 Stride(bytes): 8 read+write: 0 ns
Size(bytes): 524288 Stride(bytes): 16 read+write: 1 ns
Size(bytes): 524288 Stride(bytes): 32 read+write: 1 ns
Size(bytes): 524288 Stride(bytes): 64 read+write: 3 ns
Size(bytes): 524288 Stride(bytes): 128 read+write: 3 ns
Size(bytes): 524288 Stride(bytes): 256 read+write: 3 ns
Size(bytes): 524288 Stride(bytes): 512 read+write: 3 ns
Size(bytes): 1048576 Stride(bytes): 4 read+write: 1 ns
Size(bytes): 1048576 Stride(bytes): 8 read+write: 1 ns
Size(bytes): 1048576 Stride(bytes): 16 read+write: 1 ns
Size(bytes): 1048576 Stride(bytes): 32 read+write: 1 ns
Size(bytes): 1048576 Stride(bytes): 64 read+write: 3 ns
Size(bytes): 1048576 Stride(bytes): 128 read+write: 3 ns
Size(bytes): 1048576 Stride(bytes): 256 read+write: 3 ns
Size(bytes): 1048576 Stride(bytes): 512 read+write: 3 ns
Size(bytes): 2097152 Stride(bytes): 4 read+write: 1 ns
Size(bytes): 2097152 Stride(bytes): 8 read+write: 1 ns
Size(bytes): 2097152 Stride(bytes): 16 read+write: 1 ns
Size(bytes): 2097152 Stride(bytes): 32 read+write: 1 ns
Size(bytes): 2097152 Stride(bytes): 64 read+write: 3 ns
Size(bytes): 2097152 Stride(bytes): 128 read+write: 3 ns
Size(bytes): 2097152 Stride(bytes): 256 read+write: 3 ns
Size(bytes): 2097152 Stride(bytes): 512 read+write: 3 ns
Size(bytes): 4194304 Stride(bytes): 4 read+write: 1 ns
Size(bytes): 4194304 Stride(bytes): 8 read+write: 1 ns
Size(bytes): 4194304 Stride(bytes): 16 read+write: 1 ns
Size(bytes): 4194304 Stride(bytes): 32 read+write: 2 ns
Size(bytes): 4194304 Stride(bytes): 64 read+write: 3 ns
Size(bytes): 4194304 Stride(bytes): 128 read+write: 3 ns
Size(bytes): 4194304 Stride(bytes): 256 read+write: 3 ns
Size(bytes): 4194304 Stride(bytes): 512 read+write: 3 ns
Now I'm trying to find how fast a cache hit and miss are, and what the size of the first-level cache is along with the block size of the first-level cache.
Isn't the first-level cache size and block size just 4kb? I'm not sure on how to find the speed though, any ideas?
Upvotes: 1
Views: 298
Reputation: 3640
Size of CPU caches and cache lines is highly dependent from the particular CPU model and can vary significantly. Check the documentation for CPU that you use.
Upvotes: 1