Reputation: 385
Suppose, I need to allocate a large Vector in a Linux64 C++ app. I do the following:
int main()
{
std::vector<int> v;
v.resize(2000);
std::cout << &v[0] << std::endl;
return 0;
}
This prints 0x7ffc2a177450 on my laptop, which is word-aligned. However, the Vector size is 2000*4B = 8kB, which is exactly 2 pages on my 4kB page Ubuntu system.
Question: How can I put my Vector in the beginning of the page, such that allocation yields in exactly 2 pages in the physical memory? Ideally, both pages in the physical memory should also be continuous. Thank you!
Upvotes: 2
Views: 2229
Reputation: 3101
This is a full fledged C++23-compliant page-aligned allocator for Posix and Linux:
#pragma once
#if defined(_WIN32)
#include <Windows.h>
#elif defined(__unix__)
#include <unistd.h>
#include <sys/mman.h>
#else
#error platform not supported
#endif
#include <cstddef>
#include <type_traits>
#include <memory>
#include <utility>
#include <atomic>
#include <cassert>
#include <new>
#if !defined(NDEBUG)
#include <bit>
#endif
#include <utility>
template <typename T>
struct virtual_allocator
{
using allocator_type = virtual_allocator;
using value_type = T;
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;
using propagate_on_container_move_assignment = std::false_type;
constexpr virtual_allocator() noexcept {}
constexpr virtual_allocator( virtual_allocator const & ) noexcept {}
template<typename T2>
constexpr virtual_allocator( virtual_allocator<T2> const & ) noexcept {}
static value_type *allocate( std::size_t n );
static void deallocate( value_type *p, std::size_t n ) noexcept;
static void reset( void *p, std::size_t n )
requires (sizeof(T) == std::bit_floor( sizeof(T) )) && std::is_trivial_v<T>;
static std::size_t get_page_size();
#if defined(__cpp_lib_allocate_at_least)
static std::allocation_result<value_type *> allocate_at_least( std::size_t n );
#endif
private:
using alloc_ret_t = std::pair<value_type *, std::size_t>;
static alloc_ret_t allocateAtLeast( std::size_t n );
static std::size_t getPageSize();
};
template <typename T>
typename virtual_allocator<T>::value_type *virtual_allocator<T>::allocate( std::size_t n )
{
return allocateAtLeast( n ).first;
}
template <typename T>
void virtual_allocator<T>::deallocate( value_type *p, std::size_t n ) noexcept
{
std::size_t pageSize = getPageSize();
assert(n * pageSize / pageSize == n);
n = n * pageSize + pageSize - 1 & -(ptrdiff_t)pageSize;
#if defined(_WIN32)
bool succ = (bool)VirtualFree( p, 0, MEM_RELEASE );
#elif defined(__unix__)
bool succ = !munmap( p, n );
#endif
assert(succ);
}
#if defined(__cpp_lib_allocate_at_least)
template <typename T>
std::allocation_result<typename virtual_allocator<T>::value_type *> virtual_allocator<T>::allocate_at_least( std::size_t n )
{
auto ret = allocateAtLeast( n );
return std::allocation_result<value_type *>( ret.first, ret.second );
}
#endif
template <typename T>
typename virtual_allocator<T>::alloc_ret_t virtual_allocator<T>::allocateAtLeast( std::size_t n )
{
using namespace std;
if( n * sizeof(value_type) / sizeof(value_type) != n )
throw bad_alloc();
size_t pageSize = getPageSize();
n = n * sizeof(value_type) + pageSize - 1 & -(ptrdiff_t)pageSize;
#if defined(_WIN32)
value_type *p = (value_type *)VirtualAlloc( nullptr, n, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE );
#elif defined(__unix__)
value_type *p = (value_type *)mmap( nullptr, n, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0 );
#endif
if( !p )
throw bad_alloc();
return alloc_ret_t( p, n / sizeof(value_type) );
}
template <typename T>
void virtual_allocator<T>::reset( void *p, std::size_t n )
requires (sizeof(T) == std::bit_floor( sizeof(T) )) && std::is_trivial_v<T>
{
using namespace std;
size_t pageSize = getPageSize();
if( ((size_t)p | n) & pageSize - 1 || n * sizeof(value_type) / sizeof(value_type) != n )
throw bad_alloc();
#if defined(_WIN32)
bool succ = (bool)VirtualAlloc( p, n, MEM_RESET, PAGE_READWRITE );
#elif defined(__unix__)
bool succ = !madvise( p, n, MADV_DONTNEED );
#endif
assert(succ);
}
template <typename T>
inline
std::size_t virtual_allocator<T>::get_page_size()
{
return getPageSize();
}
template <typename T>
inline
std::size_t virtual_allocator<T>::getPageSize()
{
using namespace std;
static atomic<size_t> aPageSize( 0 );
size_t pageSize = aPageSize.load( memory_order_relaxed );
if( !pageSize ) [[unlikely]]
{
#if defined(_WIN32)
SYSTEM_INFO si;
GetSystemInfo( &si );
pageSize = si.dwPageSize;
#elif defined(__unix__)
pageSize = sysconf( _SC_PAGESIZE );
#endif
assert(pageSize && pageSize == bit_floor( pageSize ));
aPageSize.store( pageSize, memory_order_relaxed );
}
return pageSize;
}
I had the pesonal requirement to reset certain pages, i.e. mark them discardable by the kernel. Therefore I have an additional reset() method which takes no value_type pointer but a bare void-pointer since I burden of calculation the page-alignments to the caller. Therefore I have an additional get_page_size() method which can be called if value_type has a power of two size and it is a trivial data type.
One nice feature of my allocator is that it supports C++23's allocate_at_least(). This method allocates n items similar to allocate() but not only returns a bare pointer but also an argument which reports how many items would actually fit in the whole block allocated. This is to prevent the offcut through page rounding.
Upvotes: 2
Reputation: 18051
You could define a page allocator:
#include <sys/user.h>
#include <sys/mman.h>
#include <cstddef>
#include <new>
template<class T>
struct page_allocator{
using value_type = T;
static auto mem_size_for(std::size_t n){
n = n * sizeof(T);
return (n & (PAGE_MASK)) + (n & ~(PAGE_MASK) ? PAGE_SIZE:0);
}
T* allocate(std::size_t n){
auto p = mmap(0, mem_size_for(n)
,PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (p==MAP_FAILED) throw std::bad_alloc{};
return static_cast<T*>(p);
}
void deallocate(T* p,std::size_t n){
munmap(p,mem_size_for(n));
}
constexpr bool operator==(page_allocator)noexcept{return true;}
constexpr bool operator!=(page_allocator)noexcept{return false;}
};
And use it this way:
int main()
{
std::vector<int,page_allocator<int>> v;
v.resize(PAGE_SIZE/sizeof(int));
std::cout << &v[0] << std::endl;
return 0;
}
Other option, using posix_memalign:
#include <cstddef>
#include <new>
#include <stdlib.h>
template<class T>
struct memalign_allocator{
using value_type = T;
T* allocate(std::size_t n){
void* p;
if (posix_memalign(&p,PAGE_SIZE,n*sizeof(T))) throw std::bad_alloc{};
return static_cast<T*>(p);
}
void deallocate(T* p,std::size_t n){
free(p);
}
constexpr bool operator==(memalign_allocator)noexcept{return true;}
constexpr bool operator!=(memalign_allocator)noexcept{return false;}
};
Using aligned_alloc would perform better.
Upvotes: 5