Reputation: 3457
Consider the following struct:
struct example_t {
char * a;
char * b;
};
struct example_t test {
"Chocolate",
"Cookies"
};
I am aware of the implementation specific nature of the allocation of memory for the char*'s, but what of the string literals?
In this case, are there any guarantee from the C-standard with regards to the adjacent placement of "Chocolate" and "Cookies"?
In most implementations I tested the two literals are not padded, and are directly adjacent.
This allows the struct to be copied quickly with a memcpy, although I suspect this behavior is undefined. Does anyone have any information on this topic?
Upvotes: 3
Views: 232
Reputation: 12668
I'll try to show you an example of gcc behaviour where, even in that case you don't get strings aligned in memory:
#include <stdio.h>
#include <stdlib.h>
char *s = "Cookies";
struct test {
char *a, *b, *c, *d;
};
struct test t = {
"Chocolate",
"Cookies",
"Milk",
"Cookies",
};
#define D(x) __FILE__":%d:%s: " x, __LINE__, __func__
#define P(x) do{\
printf(D(#x " = [%#p] \"%s\"\n"), x, x); \
} while(0)
int main()
{
P(t.a);
P(t.b);
P(t.c);
P(t.d);
return 0;
}
In this case, as the compiler tries to reuse already seen string literals, the ones you use to assign to the structure fields don't get aligned.
This is the output of the program:
$ pru3
pru3.c:25:main: t.a = [0x8518] "Chocolate"
pru3.c:26:main: t.b = [0x8510] "Cookies"
pru3.c:27:main: t.c = [0x8524] "Milk"
pru3.c:28:main: t.d = [0x8510] "Cookies"
As you see, the pointers are even repeated for the "Cookies"
value.
The compiling here was made with default values, with:
gcc -o pru3 pru3.c
Upvotes: 0
Reputation: 724
Here is an example demonstrating a real-world scenario where the strings are not adjacent. GCC decides to reuse the string "Chocolate"
from earlier.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
const char *a = "Chocolate";
const char *b = "Spinach";
struct test_t {
const char *a;
const char *b;
};
struct test_t test = {"Chocolate", "Cookies"};
int main(void)
{
printf("%p %p\n", (const void *) a, (const void *) b);
printf("%p %p\n", (const void *) test.a, (const void *) test.b);
return EXIT_SUCCESS;
}
Output:
0x400614 0x40061e
0x400614 0x400626
Upvotes: 1
Reputation: 72657
No, there is no guarantee for adjacent placement.
One occasion where actual compilers will place them far apart is if the same string literal appears in different places (as read-only objects) and the string combining optimization is enabled.
Example:
char *foo = "foo";
char *baz = "baz";
struct example_t bar = {
"foo",
"bar"
}
may well end up in memory as "foo"
followed by "baz"
followed by "bar"
.
Upvotes: 1
Reputation: 19333
In your example, there are no absolute guarantees of the adjacency/placement of the two string literals with respect to each other. GCC in this case happens to demonstrate such behavior, but it has no obligation to exhibit this behavior.
In this example, we see no padding, and we can even use undefined behavior to demonstrate adjacency of string literals. This works with GCC, but using alternate libc
's or different compilers, you could get other behavior, such as detecting duplicate string literals across translation units and reducing redundancy to save memory in the final application.
Also, while the pointers you declared are of type char *
, the literals actually should be const char*
, since they will be stored in RODATA
, and writing to that memory will cause a segfault.
Code Listing
#include <stdio.h>
#include <string.h>
struct example_t {
char * a;
char * b;
char * c;
};
int main(void) {
struct example_t test = {
"Chocolate",
"Cookies",
"And milk"
};
size_t len = strlen(test.a) + strlen(test.b) + strlen(test.c) + ((3-1) * sizeof(char));
char* t= test.a;
int i;
for (i = 0; i< len; i++) {
printf("%c", t[i]);
}
return 0;
}
Sample output
./a.out
ChocolateCookiesAnd milk
Output of gcc -S
.file "test.c"
.section .rodata
.LC0:
.string "Chocolate"
.LC1:
.string "Cookies"
.LC2:
.string "And milk"
.text
.globl main
.type main, @function
main:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
pushq %rbx
subq $72, %rsp
.cfi_offset 3, -24
movq $.LC0, -48(%rbp)
movq $.LC1, -40(%rbp)
movq $.LC2, -32(%rbp)
movq -48(%rbp), %rax
movq %rax, %rdi
call strlen
movq %rax, %rbx
movq -40(%rbp), %rax
movq %rax, %rdi
call strlen
addq %rax, %rbx
movq -32(%rbp), %rax
movq %rax, %rdi
call strlen
addq %rbx, %rax
addq $2, %rax
movq %rax, -64(%rbp)
movq -48(%rbp), %rax
movq %rax, -56(%rbp)
movl $0, -68(%rbp)
jmp .L2
.L3:
movl -68(%rbp), %eax
movslq %eax, %rdx
movq -56(%rbp), %rax
addq %rdx, %rax
movzbl (%rax), %eax
movsbl %al, %eax
movl %eax, %edi
call putchar
addl $1, -68(%rbp)
.L2:
movl -68(%rbp), %eax
cltq
cmpq -64(%rbp), %rax
jb .L3
movl $0, %eax
addq $72, %rsp
popq %rbx
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu 4.8.4-2ubuntu1~14.04) 4.8.4"
.section .note.GNU-stack,"",@progbits
Upvotes: 4