Reputation: 5059
Need to write an application in C/C++ on Linux that receives a stream of bytes from a socket and process them. The total bytes could be close to 1TB. If I have unlimited amount memory, I will just put it all in the memory, so my application can easily process data. It's much easy to do many things on flat memory space, such as memmem(), memcmp() ... On a circular buffer, the application has to be extra smart to be aware of the circular buffer.
I have about 8G of memory, but luckily due to locality, my application never needs to go back by more than 1GB from the latest data it received. Is there a way to have a 1TB buffer, with only the latest 1GB data mapped to physical memory? If so, how to do it?
Any ideas? Thanks.
Upvotes: 1
Views: 582
Reputation: 39308
Here's an example. It sets up a full terabyte mapping, but initially inaccessible (PROT_NONE
). You, the programmer, maintain a window that can only extend and move upwards in memory. The example program uses a one and a half gigabyte window, advancing it in steps of 1,023,739,137 bytes (the mapping_use()
makes sure the available pages cover at least the desired region), and does actually modify every page in every window, just to be sure.
#define _GNU_SOURCE
#define _POSIX_C_SOURCE 200809L
#include <stdlib.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <stdio.h>
typedef struct mapping mapping;
struct mapping {
unsigned char *head; /* Start of currently accessible region */
unsigned char *tail; /* End of currently accessible region */
unsigned char *ends; /* End of region */
size_t page; /* Page size of this mapping */
};
/* Discard mapping.
*/
void mapping_free(mapping *const m)
{
if (m && m->ends > m->head) {
munmap(m->head, (size_t)(m->ends - m->head));
m->head = NULL;
m->tail = NULL;
m->ends = NULL;
m->page = 0;
}
}
/* Move the accessible part up in memory, to [from..to).
*/
int mapping_use(mapping *const m, void *const from, void *const to)
{
if (m && m->ends > m->head) {
unsigned char *const head = ((unsigned char *)from <= m->head) ? m->head :
((unsigned char *)from >= m->ends) ? m->ends :
m->head + m->page * (size_t)(((size_t)((unsigned char *)from - m->head)) / m->page);
unsigned char *const tail = ((unsigned char *)to <= head) ? head :
((unsigned char *)to >= m->ends) ? m->ends :
m->head + m->page * (size_t)(((size_t)((unsigned char *)to - m->head) + m->page - 1) / m->page);
if (head > m->head) {
munmap(m->head, (size_t)(head - m->head));
m->head = head;
}
if (tail > m->tail) {
#ifdef USE_MPROTECT
mprotect(m->tail, (size_t)(tail - m->tail), PROT_READ | PROT_WRITE);
#else
void *result;
do {
result = mmap(m->tail, (size_t)(tail - m->tail), PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_FIXED | MAP_PRIVATE | MAP_NORESERVE, -1, (off_t)0);
} while (result == MAP_FAILED && errno == EINTR);
if (result == MAP_FAILED)
return errno = ENOMEM;
#endif
m->tail = tail;
}
return 0;
}
return errno = EINVAL;
}
/* Initialize a mapping.
*/
int mapping_create(mapping *const m, const size_t size)
{
void *base;
size_t page, truesize;
if (!m || size < (size_t)1)
return errno = EINVAL;
m->head = NULL;
m->tail = NULL;
m->ends = NULL;
m->page = 0;
/* Obtain default page size. */
{
long value = sysconf(_SC_PAGESIZE);
page = (size_t)value;
if (value < 1L || (long)page != value)
return errno = ENOTSUP;
}
/* Round size up to next multiple of page. */
if (size % page)
truesize = size + page - (size % page);
else
truesize = size;
/* Create mapping. */
do {
errno = ENOTSUP;
base = mmap(NULL, truesize, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, (off_t)0);
} while (base == MAP_FAILED && errno == EINTR);
if (base == MAP_FAILED)
return errno;
/* Success. */
m->head = base;
m->tail = base;
m->ends = (unsigned char *)base + truesize;
m->page = page;
errno = 0;
return 0;
}
static void memtouch(void *const ptr, const size_t size)
{
if (ptr && size > 0) {
unsigned char *mem = (unsigned char *)ptr;
const size_t step = 2048;
size_t n = size / (size_t)step - 1;
mem[0]++;
mem[size-1]++;
while (n-->0) {
mem += step;
mem[0]++;
}
}
}
int main(void)
{
const size_t size = (size_t)1024 * (size_t)1024 * (size_t)1024 * (size_t)1024;
const size_t need = (size_t)1500000000UL;
const size_t step = (size_t)1023739137UL;
unsigned char *base;
mapping map;
size_t i;
if (mapping_create(&map, size)) {
fprintf(stderr, "Cannot create a %zu-byte mapping: %m.\n", size);
return EXIT_FAILURE;
}
printf("Have a %zu-byte mapping at %p to %p.\n", size, (void *)map.head, (void *)map.ends);
fflush(stdout);
base = map.head;
for (i = 0; i <= size - need; i += step) {
printf("Requesting %p to %p .. ", (void *)(base + i), (void *)(base + i + need));
fflush(stdout);
if (mapping_use(&map, base + i, base + i + need)) {
printf("Failed (%m).\n");
fflush(stdout);
return EXIT_FAILURE;
}
printf("received %p to %p.\n", (void *)map.head, (void *)map.tail);
fflush(stdout);
memtouch(base + i, need);
}
mapping_free(&map);
return EXIT_SUCCESS;
}
The approach is twofold. First, an inaccessible (PROT_NONE
) mapping is created to reserve the necessary virtual contiguous address space. If we omit this step, it would make it possible for a malloc()
call or similar to acquire pages within this range, which would defeat the entire purpose; a single terabyte-long mapping.
Second, when the accessible window extends into the region, either mprotect()
(if USE_MPROTECT
is defined), or mmap()
is used to make the required pages accessible. Pages no longer needed are completely unmapped.
Compile and run using
gcc -Wall -Wextra -std=c99 example.c -o example
time ./example
or, to use mmap()
only once and mprotect()
to move the window,
gcc -DUSE_MPROTECT=1 -Wall -Wextra -std=c99 example.c -o example
time ./example
Note that you probably don't want to run the test if you don't have at least 4GB of physical RAM.
On this particular machine (i5-4200U laptop with 4GB of RAM, 3.13.0-62-generic kernel on Ubuntu x86_64), quick testing didn't show any kind of performance difference between mprotect()
and mmap()
, in execution speed or resident set size.
If anyone bothers to compile and run the above, and finds that one of them has a repeatable benefit/drawback (resident set size or time used), I'd very much like to know about it. Please also define your kernel and CPU used.
I'm not sure which details I should expand on, since this is pretty straightforward, really, and the Linux man pages project man 2 mmap
and man 2 mprotect
pages are quite descriptive. If you have any questions on this approach or program, I'd be happy to try and elaborate.
Upvotes: 1