Guilherme
Guilherme

Reputation: 453

Is the FILE struct consistent between operating systems and architectures?

For the following snippet of C code, LLVM will produce the IR below.

#include <stdio.h>
#include <stdlib.h>

int main(){
  printf("Hello world\n");
  fflush(NULL);
  return 0;
}
; ModuleID = 'a.c'
source_filename = "a.c"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.15.0"

%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
%struct.__sFILEX = type opaque
%struct.__sbuf = type { i8*, i32 }

@str = private unnamed_addr constant [12 x i8] c"Hello world\00", align 1

; Function Attrs: nounwind ssp uwtable
define i32 @main() local_unnamed_addr #0 {
  %1 = tail call i32 @puts(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str, i64 0, i64 0))
  %2 = tail call i32 @fflush(%struct.__sFILE* null)
  ret i32 0
}

; Function Attrs: nounwind
declare i32 @fflush(%struct.__sFILE* nocapture) local_unnamed_addr #1

; Function Attrs: nounwind
declare i32 @puts(i8* nocapture readonly) local_unnamed_addr #2

attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "darwin-stkchk-strong-link" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "probe-stack"="___chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "darwin-stkchk-strong-link" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "probe-stack"="___chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }

!llvm.module.flags = !{!0, !1, !2}
!llvm.ident = !{!3}

!0 = !{i32 2, !"SDK Version", [3 x i32] [i32 10, i32 15, i32 4]}
!1 = !{i32 1, !"wchar_size", i32 4}
!2 = !{i32 7, !"PIC Level", i32 2}
!3 = !{!"Apple clang version 11.0.3 (clang-1103.0.32.59)"}

Can I consider that the struct declaration for the corresponding FILE struct is consistent among different operating systems? Is there a programatically way to get the LLVM representation of this struct?

Upvotes: 0

Views: 414

Answers (2)

Jonathan Leffler
Jonathan Leffler

Reputation: 754450

The structure of the type behind FILE * is opaque to you as a user and variable between operating systems.

More formally, it depends on the C (or C++) compiler and the support library you are using, but that typically is one library per combination of hardware and operating system, except perhaps for systems where there is support for both 32-bit and 64-bit code.

Some programs (Perl) are prepared to poke around inside the structure on the different machines during the configuration phase, but they do so with inside knowledge. There is no standard structure except to the extent that a library like the GNU C Library might standardize it across machines. But assuming it is different everywhere is safest.

To counter the optimistic view proposed by Asadefa in their answer, here is the structure definition from AIX 7.2:

#if defined(__64BIT__) || defined(__ia64)
typedef struct {
    unsigned char   *_ptr;
    unsigned char   *_base;
    unsigned char   *_bufendp;
    char    *__newbase;
    void    *_lock;
    int _cnt;
    int _file;
    int __stdioid;
    short   _flag;
    short   _unused;
    long    _unused1[4];
} FILE;
#else /* 32-bit POWER */
typedef struct {
    unsigned char   *_ptr;
    int _cnt;
    unsigned char   *_base;
    unsigned char   *_bufendp;
    short   _flag;
    short   _file;
    int __stdioid;
    char    *__newbase;
    void    *_lock;
} FILE;
#endif /* __64BIT__ || __ia64 */

That differs between 32-bit and 64-bit on a single O/S, and is wholly unrelated to the structure on Linux and macOS.

The structure on Solaris 10 is different again — using some bit-fields which the others do not:

struct __FILE_TAG   /* needs to be binary-compatible with old versions */
{
#ifdef _STDIO_REVERSE
    unsigned char   *_ptr;  /* next character from/to here in buffer */
    int     _cnt;   /* number of available characters in buffer */
#else
    int     _cnt;   /* number of available characters in buffer */
    unsigned char   *_ptr;  /* next character from/to here in buffer */
#endif
    unsigned char   *_base; /* the buffer */
    unsigned char   _flag;  /* the state of the stream */
    unsigned char   _file; /* Old home of the file descriptor */
                /* Only fileno(3C) can retrieve the value now */
    unsigned    __orientation:2; /* the orientation of the stream */
    unsigned    __ionolock:1;   /* turn off implicit locking */
    unsigned    __seekable:1;   /* is file seekable? */
    unsigned    __extendedfd:1; /* enable extended FILE */
    unsigned    __xf_nocheck:1; /* no extended FILE runtime check */
    unsigned    __filler:10;
};

And the version I found on HP-UX 11.31 is different again:

   typedef struct {
    int      __cnt;
    unsigned char   *__ptr;
    unsigned char   *__base;
    unsigned short   __flag;
    unsigned char    __fileL;       /* low byte of file desc */
    unsigned char    __fileH;       /* high byte of file desc */
   } FILE;

Of course, if none of Solaris, HP-UX, AIX are on your radar screen, you may come to a different conclusion, but there are certainly vast differences between different systems.

Upvotes: 8

user12211554
user12211554

Reputation:

Here's the macOS implementaton:

typedef struct __sFILE {
    unsigned char *_p;  /* current position in (some) buffer */
    int _r;     /* read space left for getc() */
    int _w;     /* write space left for putc() */
    short   _flags;     /* flags, below; this FILE is free if 0 */
    short   _file;      /* fileno, if Unix descriptor, else -1 */
    struct  __sbuf _bf; /* the buffer (at least 1 byte, if !NULL) */
    int _lbfsize;   /* 0 or -_bf._size, for inline putc */

    /* operations */
    void    *_cookie;   /* cookie passed to io functions */
    int (* _Nullable _close)(void *);
    int (* _Nullable _read) (void *, char *, int);
    fpos_t  (* _Nullable _seek) (void *, fpos_t, int);
    int (* _Nullable _write)(void *, const char *, int);

    /* separate buffer for long sequences of ungetc() */
    struct  __sbuf _ub; /* ungetc buffer */
    struct __sFILEX *_extra; /* additions to FILE to not break ABI */
    int _ur;        /* saved _r when _r is counting ungetc data */

    /* tricks to meet minimum requirements even when malloc() fails */
    unsigned char _ubuf[3]; /* guarantee an ungetc() buffer */
    unsigned char _nbuf[1]; /* guarantee a getc() buffer */

    /* separate buffer for fgetln() when line crosses buffer boundary */
    struct  __sbuf _lb; /* buffer for fgetln() */

    /* Unix stdio files get aligned to block boundaries on fseek() */
    int _blksize;   /* stat.st_blksize (may be != _bf._size) */
    fpos_t  _offset;    /* current lseek offset (see WARNING) */
} FILE;

Here's the Linux implementation:

typedef struct __sFILE {
    unsigned char *_p;  /* current position in (some) buffer */
    int _r;     /* read space left for getc() */
    int _w;     /* write space left for putc() */
    short   _flags;     /* flags, below; this FILE is free if 0 */
    short   _file;      /* fileno, if Unix descriptor, else -1 */
    struct  __sbuf _bf; /* the buffer (at least 1 byte, if !NULL) */
    int _lbfsize;   /* 0 or -_bf._size, for inline putc */

    /* operations */
    void    *_cookie;   /* cookie passed to io functions */
    int (*_close)(void *);
    int (*_read)(void *, char *, int);
    fpos_t  (*_seek)(void *, fpos_t, int);
    int (*_write)(void *, const char *, int);

    /* extension data, to avoid further ABI breakage */
    struct  __sbuf _ext;
    /* data for long sequences of ungetc() */
    unsigned char *_up; /* saved _p when _p is doing ungetc data */
    int _ur;        /* saved _r when _r is counting ungetc data */

    /* tricks to meet minimum requirements even when malloc() fails */
    unsigned char _ubuf[3]; /* guarantee an ungetc() buffer */
    unsigned char _nbuf[1]; /* guarantee a getc() buffer */

    /* separate buffer for fgetln() when line crosses buffer boundary */
    struct  __sbuf _lb; /* buffer for fgetln() */

    /* Unix stdio files get aligned to block boundaries on fseek() */
    int _blksize;   /* stat.st_blksize (may be != _bf._size) */
    fpos_t  _offset;    /* current lseek offset */
} FILE;

_Nullable is a macOS feature, and I guess it can be ignored. It seems they are the same otherwise, because the code works the same with or without _Nullable. The FILE struct seems to be constant among the Linux and Mac platforms, except for unsigned char *_up; and struct __sFILEX *_extra;.

Upvotes: 1

Related Questions