tzcoolman
tzcoolman

Reputation: 61

Large file (100GB) opening and reading chunk by chunk using memory mapping

I would like to open a file with the size of 100GB, and everytime use file mapping to read data from it chunk by chunk. It will always start mapping nothing when the offset is bigger than 2GB. I thought I might be the functions that dont support 64 bit addressing. But after I add large file support( including large file support definition, large file open option, and compiling with command -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE). However, same problem still occurs. Here is the simplified code:

#define _LARGEFILE_SOURCE
#define _LARGEFILE64_SOURCE
#define _FILE_OFFSET_BITS 64
#include <math.h>
#include <time.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include<fcntl.h>
#include<unistd.h>
#include<sys/stat.h>
#include<sys/time.h>
#include<sys/mman.h>
#include<sys/types.h>

#define PERMS 0600

int total_piece, PAGE, buffer, share, offset, count, chunk;

void get_size(char * strFileName)   
{  
    struct stat temp;  
    stat(strFileName, &temp);

    PAGE = getpagesize();             
    total_piece = temp.st_size/PAGE;
    chunk = 1024*1024*1024*0.4/PAGE; 

    if (temp.st_size%PAGE!=0)     
    total_piece++;
}

char *
mmaping (char *source)
{
  int src;
  char *sm;
  struct stat statbuf;

  if ((src = open (source, O_RDONLY)) < 0)  //I thought error comes from this line. So I tried to use large file support as following. But still the same. 
    {
      perror (" open source ");
      exit (EXIT_FAILURE);
    }
/*
  if ((src = open64(source, O_RDONLY|O_LARGEFILE, 0644))<0)  
    {
      perror (" open source ");
      exit (EXIT_FAILURE);
    }
*/
  if (fstat (src, &statbuf) < 0)
    {
      perror (" fstat source ");
      exit (EXIT_FAILURE);
    }

  printf("share->%d PAGES per node\n",share);

  if (share>=chunk)
  buffer = chunk;
  else
  buffer = share;

  printf("total pieces->%d\n",total_piece);
  printf("data left->%d\n",share);
  printf("buffer size->%d\n",buffer);
  printf("PAGE size->%d\n",PAGE);

  sm = mmap (0,buffer*PAGE, PROT_READ, MAP_SHARED | MAP_NORESERVE,src, offset*PAGE); 

  if (MAP_FAILED == sm)
    {
      perror (" mmap source ");
      exit (EXIT_FAILURE);
    }

  return sm;
}

main(int argc, char**argv){

   get_size(argv[1]);

   share = total_piece;

   offset = 0;

   while (share>0)
   {

      char *x = mmaping(argv[1]);

      printf("data->%0.30s\n",x); //bus error will occur when offset reaches 2GiB, which proves my thought: it maps          nothing.

      munmap(x,buffer*PAGE);  

      share-=buffer;

      offset+=buffer;

   }

   return 0;
}

Can anyone be nice and help me with this?

Upvotes: 1

Views: 2554

Answers (1)

janneb
janneb

Reputation: 37258

Certainly a variable of type "int", which is 32 bits on Linux, is not large enough to contain the size in bytes of a 100 GB file. For file sizes/offsets you need to use the type "off_t" instead (which, when enabling LFS support as you have done, is an alias for off64_t, a signed 64-bit integer).

Similarly, the "length" argument to mmap is of type size_t, not int.

To make code portable to both 32 and 64-bit targets, with and without LFS, you need to be careful about which integer types should be used where.

Upvotes: 5

Related Questions