Tampler
Tampler

Reputation: 385

Page aligned memory allocation in Linux C++

Suppose, I need to allocate a large Vector in a Linux64 C++ app. I do the following:

int main()
{
  std::vector<int> v;
  v.resize(2000);

  std::cout << &v[0] << std::endl;

  return 0;
}

This prints 0x7ffc2a177450 on my laptop, which is word-aligned. However, the Vector size is 2000*4B = 8kB, which is exactly 2 pages on my 4kB page Ubuntu system.

Question: How can I put my Vector in the beginning of the page, such that allocation yields in exactly 2 pages in the physical memory? Ideally, both pages in the physical memory should also be continuous. Thank you!

Upvotes: 2

Views: 2229

Answers (2)

Bonita Montero
Bonita Montero

Reputation: 3101

This is a full fledged C++23-compliant page-aligned allocator for Posix and Linux:

#pragma once
#if defined(_WIN32)
    #include <Windows.h>
#elif defined(__unix__)
    #include <unistd.h>
    #include <sys/mman.h>
#else
    #error platform not supported
#endif
#include <cstddef>
#include <type_traits>
#include <memory>
#include <utility>
#include <atomic>
#include <cassert>
#include <new>
#if !defined(NDEBUG)
    #include <bit>
#endif
#include <utility>

template <typename T>
struct virtual_allocator
{
    using allocator_type = virtual_allocator;
    using value_type = T;
    using size_type = std::size_t;
    using difference_type = std::ptrdiff_t;
    using propagate_on_container_move_assignment = std::false_type;
    constexpr virtual_allocator() noexcept {}
    constexpr virtual_allocator( virtual_allocator const & ) noexcept {}
    template<typename T2>
    constexpr virtual_allocator( virtual_allocator<T2> const & ) noexcept {}
    static value_type *allocate( std::size_t n );
    static void deallocate( value_type *p, std::size_t n ) noexcept;
    static void reset( void *p, std::size_t n )
        requires (sizeof(T) == std::bit_floor( sizeof(T) )) && std::is_trivial_v<T>;
    static std::size_t get_page_size();
#if defined(__cpp_lib_allocate_at_least)
    static std::allocation_result<value_type *> allocate_at_least( std::size_t n );
#endif
private:
    using alloc_ret_t = std::pair<value_type *, std::size_t>;
    static alloc_ret_t allocateAtLeast( std::size_t n );
    static std::size_t getPageSize();
};

template <typename T>
typename virtual_allocator<T>::value_type *virtual_allocator<T>::allocate( std::size_t n )
{
    return allocateAtLeast( n ).first;
}

template <typename T>
void virtual_allocator<T>::deallocate( value_type *p, std::size_t n ) noexcept
{
    std::size_t pageSize = getPageSize();
    assert(n * pageSize / pageSize == n);
    n = n * pageSize + pageSize - 1 & -(ptrdiff_t)pageSize;
#if defined(_WIN32)
    bool succ = (bool)VirtualFree( p, 0, MEM_RELEASE );
#elif defined(__unix__)
    bool succ = !munmap( p, n );
#endif
    assert(succ);
}

#if defined(__cpp_lib_allocate_at_least)
template <typename T>
std::allocation_result<typename virtual_allocator<T>::value_type *> virtual_allocator<T>::allocate_at_least( std::size_t n )
{
    auto ret = allocateAtLeast( n );
    return std::allocation_result<value_type *>( ret.first, ret.second );
}
#endif

template <typename T>
typename virtual_allocator<T>::alloc_ret_t virtual_allocator<T>::allocateAtLeast( std::size_t n )
{
    using namespace std;
    if( n * sizeof(value_type) / sizeof(value_type) != n )
        throw bad_alloc();
    size_t pageSize = getPageSize();
    n = n * sizeof(value_type) + pageSize - 1 & -(ptrdiff_t)pageSize;
#if defined(_WIN32)
    value_type *p = (value_type *)VirtualAlloc( nullptr, n, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE );
#elif defined(__unix__)
    value_type *p = (value_type *)mmap( nullptr, n, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0 );
#endif
    if( !p )
        throw bad_alloc();
    return alloc_ret_t( p, n / sizeof(value_type) );
}

template <typename T>
void virtual_allocator<T>::reset( void *p, std::size_t n )
    requires (sizeof(T) == std::bit_floor( sizeof(T) )) && std::is_trivial_v<T>
{
    using namespace std;
    size_t pageSize = getPageSize();
    if( ((size_t)p | n) & pageSize - 1 || n * sizeof(value_type) / sizeof(value_type) != n )
        throw bad_alloc();
#if defined(_WIN32)
    bool succ = (bool)VirtualAlloc( p, n, MEM_RESET, PAGE_READWRITE );
#elif defined(__unix__)
    bool succ = !madvise( p, n, MADV_DONTNEED );
#endif
    assert(succ);
}

template <typename T>
inline
std::size_t virtual_allocator<T>::get_page_size()
{
    return getPageSize();
}

template <typename T>
inline
std::size_t virtual_allocator<T>::getPageSize()
{
    using namespace std;
    static atomic<size_t> aPageSize( 0 );
    size_t pageSize = aPageSize.load( memory_order_relaxed );
    if( !pageSize ) [[unlikely]]
    {
#if defined(_WIN32)
        SYSTEM_INFO si;
        GetSystemInfo( &si );
        pageSize = si.dwPageSize;
#elif defined(__unix__)
        pageSize = sysconf( _SC_PAGESIZE );
#endif
        assert(pageSize && pageSize == bit_floor( pageSize ));
        aPageSize.store( pageSize, memory_order_relaxed );
    }
    return pageSize;
}

I had the pesonal requirement to reset certain pages, i.e. mark them discardable by the kernel. Therefore I have an additional reset() method which takes no value_type pointer but a bare void-pointer since I burden of calculation the page-alignments to the caller. Therefore I have an additional get_page_size() method which can be called if value_type has a power of two size and it is a trivial data type.
One nice feature of my allocator is that it supports C++23's allocate_at_least(). This method allocates n items similar to allocate() but not only returns a bare pointer but also an argument which reports how many items would actually fit in the whole block allocated. This is to prevent the offcut through page rounding.

Upvotes: 2

Oliv
Oliv

Reputation: 18051

You could define a page allocator:

#include <sys/user.h>
#include <sys/mman.h>
#include <cstddef>
#include <new>
template<class T>
struct page_allocator{
    using value_type = T;
    static auto mem_size_for(std::size_t n){
      n = n * sizeof(T);
      return (n & (PAGE_MASK)) + (n & ~(PAGE_MASK) ? PAGE_SIZE:0);
      }
    T* allocate(std::size_t n){
      auto p = mmap(0, mem_size_for(n)
                   ,PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
      if (p==MAP_FAILED) throw std::bad_alloc{};
      return static_cast<T*>(p);
      }
    void deallocate(T* p,std::size_t n){
      munmap(p,mem_size_for(n));
      }
    constexpr bool operator==(page_allocator)noexcept{return true;}
    constexpr bool operator!=(page_allocator)noexcept{return false;}
  };

And use it this way:

int main()
{
  std::vector<int,page_allocator<int>> v;
  v.resize(PAGE_SIZE/sizeof(int));

  std::cout << &v[0] << std::endl;

  return 0;
}

Other option, using posix_memalign:

#include <cstddef>
#include <new>
#include <stdlib.h>
template<class T>
struct memalign_allocator{
    using value_type = T;
    T* allocate(std::size_t n){
      void* p;
      if (posix_memalign(&p,PAGE_SIZE,n*sizeof(T))) throw std::bad_alloc{};
      return static_cast<T*>(p);
      }
    void deallocate(T* p,std::size_t n){
      free(p);
      }
    constexpr bool operator==(memalign_allocator)noexcept{return true;}
    constexpr bool operator!=(memalign_allocator)noexcept{return false;}
  };

Using aligned_alloc would perform better.

Upvotes: 5

Related Questions