Reputation: 1234
I have a requirement of dumping stack traces when my c++ Linux application crashes. I was successfully able to do this using backtrace()
and backtrace_symbols()
. Now, additionally I would like to get the line numbers of the crash. How do it do it?
Upvotes: 15
Views: 25053
Reputation: 175
OK, so I have a Xavier (NVIDIA Jetpack 4.4 Ubuntu 18.04.5) open in front of me when I tripped over this nice note. I was chagrined to discover that this code did not work on Xavier, which is basically an aarch64 architecture and some of these signal structures are different.
So I managed to cobble together a counterexample that works both on X86_64 and aarch64.
#include <stdio.h>
#include <signal.h>
#include <execinfo.h>
#include <stdlib.h>
#include <ucontext.h>
#include <string.h>
#ifdef __aarch64__
#define _PC pc
#define _SP sp
#elif defined(__x86_64__)
#define _PC gregs[REG_RSP]
#define _SP gregs[REG_RIP]
#else
#error architecture not supported
#endif
static void addr2line(void *traceP, void *messageP) {
char syscom[256];
#ifdef __aarch64__
char message[4096];
strcpy(message, (char *) messageP);
char *saveP = NULL, *token = strtok_r(message, "(", &saveP);
if(token) {
token = strtok_r(NULL, "+", &saveP);
if(token) {
char *term = strchr(token,')');
if(term)
*term = 0;
}
}
if(token==NULL) {
token = message;
}
sprintf(syscom,"addr2line %s -e sighandler", token);
#elif defined(__x86_64__)
sprintf(syscom,"addr2line %p -e sighandler", traceP); //last parameter is the name of this app
#endif
system(syscom);
}
void bt_sighandler(int sig, siginfo_t *psi, void *ctxarg) {
void *trace[16];
char **messages = (char **)NULL;
int i, trace_size = 0;
mcontext_t *ctxP = &((ucontext_t *) ctxarg)->uc_mcontext;
if (sig == SIGSEGV)
printf("Got signal %d, faulty address is %p, "
"from %p\n", sig, (void *) ctxP->_PC, (void *) ctxP->_SP);
else
printf("Got signal %d\n", sig);
trace_size = backtrace(trace, 16);
/* overwrite sigaction with caller's address */
trace[1] = (void *)ctxP->_SP;
messages = backtrace_symbols(trace, trace_size);
/* skip first stack frame (points here) */
printf("[bt] Execution path:\n");
for (i=1; i<trace_size; ++i)
{
printf("[bt] #%d %s\n", i, messages[i]);
addr2line(trace[i], messages[i]);
}
exit(0);
}
int func_a(int a, char b) {
char *p = (char *)0xdeadbeef;
a = a + b;
*p = 10; /* CRASH here!! */
return 2*a;
}
int func_b() {
int res, a = 5;
res = 5 + func_a(a, 't');
return res;
}
int main() {
/* Install our signal handler */
struct sigaction sa;
sa.sa_sigaction = bt_sighandler;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_RESTART|SA_SIGINFO;
sigaction(SIGSEGV, &sa, NULL);
sigaction(SIGUSR1, &sa, NULL);
/* ... add any other signal here */
/* Do something */
printf("%d\n", func_b());
}
Basically:
The code now runs on x86_64 and gives similiar, usable results.
xavier $ ./sighandler 3
Got signal 11, faulty address is 0x5563baadf4, from 0x7fd4439350
[bt] Execution path:
[bt] #1 [0x7fd4439350]
??:0
[bt] #2 ./sighandler(+0xdf4) [0x5563baadf4]
/home/jsaari/project/radar_ars/alfalfa/cuda/debug/sighandler.cpp:79
[bt] #3 ./sighandler(+0xe24) [0x5563baae24]
/home/jsaari/project/radar_ars/alfalfa/cuda/debug/sighandler.cpp:89
[bt] #4 ./sighandler(+0xea4) [0x5563baaea4]
/home/jsaari/project/radar_ars/alfalfa/cuda/debug/sighandler.cpp:109
[bt] #5 /lib/aarch64-linux-gnu/libc.so.6(__libc_start_main+0xe0) [0x7f9ae316e0]
??:0
[bt] #6 ./sighandler(+0xa94) [0x5563baaa94]
:?
x86_64 $ ./sighandler 3
Got signal 11, faulty address is 0x7ffe291d27d0, from 0x40095a
[bt] Execution path:
[bt] #1 ./sighandler() [0x40095a]
/home/jsaari/common/experiment/backtrace/sighandler.cpp:79
[bt] #2 ./sighandler() [0x40095a]
/home/jsaari/common/experiment/backtrace/sighandler.cpp:79
[bt] #3 ./sighandler() [0x400982]
/home/jsaari/common/experiment/backtrace/sighandler.cpp:89
[bt] #4 ./sighandler() [0x4009f4]
/home/jsaari/common/experiment/backtrace/sighandler.cpp:109
[bt] #5 /lib64/libc.so.6(__libc_start_main+0xf5) [0x7f294a36d495]
??:0
[bt] #6 ./sighandler() [0x4006d9]
??:?
IMHO - Now that NVIDIA has acquired ARM, ARM WILL TAKE OVER THE INTERNET-OF-THINGS (if it hasn't already), and Intel is likely to start struggling.
Apparently, there is an address scrambling going on as documented here:
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=860394
Apparently, this is caused by gcc building ONLY position-independent code. If you build the app with "-no-pie", then the trace[i] entries work with addr2line.
I have not found how to manage this aside from the process I coded.
From this posting:
How to find load relocation for a PIE binary?
The relocation value can be obtained from
#include <link.h>
. . .
uintptr_t relocation = _r_dump.r_map->l_addr;
The relocation value can be subtracted from trace[i] to get an address that addr2line can make use of (result on xavier aarch64 box I'm using).
For a non-relocation binary, the value of relocation is "0" (result on the x86_64 box I'm using).
Upvotes: 0
Reputation: 61
system()
needs #include <stdlib.h>
Few other things seem missing.
$ g++-8 -g -o dump dump.cpp
dump.cpp: In function ‘void bt_sighandler(int, sigcontext)’:
dump.cpp:15:43: error: ‘struct sigcontext’ has no member named
‘eip’; did > you mean ‘rip’?
"from %p\n", sig, ctx.cr2, ctx.eip);
^~~
rip
dump.cpp:21:26: error: ‘struct sigcontext’ has no member
named ‘eip’;
did you mean ‘rip’?
trace[1] = (void *)ctx.eip;
^~~
rip
dump.cpp: In function ‘int main()’:
dump.cpp:64:19: error: invalid conversion from ‘void*’ to
‘__sighandler_t’ > {aka ‘void (*)(int)’} [-fpermissive]
sa.sa_handler = (void *)bt_sighandler;
^~~~~~~~~~~~~~~~~~~~~
Upvotes: 2
Reputation: 541
As Saqlain pointed out, addr2line can be used to get the line number.
If a library is preferred, take a look at the LPT kit. Instructions on how to install are at here. LPT relies on the bfd library.
Upvotes: 1
Reputation: 1
It is only possible if the program has been compiled with debugging information (i.e. with gcc -Wall -g
or with g++ -Wall -g
). Without -g
the executable does not contain any source line information. And if using gcc
you can compile with both optimization & debugging information (e.g. g++ -Wall -g -O2
) but sometimes the line location would be "surprising".
The -Wall
flag asks GCC to show all warnings. It is very useful (hence my recommendation to use it) but unrelated to -g
or debugging information.
As to how to extract the line number, the simplest way would be to fork a gdb
process. Alternatively, you could get the debugging information (in DWARF format) and parse it, perhaps using libdwarf
from the ELF tool chain. I am not sure it is worth the trouble...
To just get the backtrace, you might simply run your program thru gdb
perhaps as gdb --args yourprogram itsarguments
...
you could also use the libbacktrace from inside a recent GCC (actually it is Ian Taylor's libbacktrace), which is designed to solve your problem (it is "interpreting" the DWARF format of the current executable, which you would compile with g++ -O -g
).
Upvotes: 3
Reputation: 17928
I took help from
http://www.linuxjournal.com/files/linuxjournal.com/linuxjournal/articles/063/6391/6391l2.html and http://www.linuxjournal.com/article/6391?page=0,0 to come up with sample code which shows how you can achieve this.
Basically it is about putting a stack backtrace inside a signal handler and having the latter catch all the "bad" signals your program can receive (SIGSEGV, SIGBUS, SIGILL, SIGFPE and the like). This way, if your program unfortunately crashes and you were not running it with a debugger, you can get a stack trace and know where the fault happened. This technique also can be used to understand where your program is looping in case it stops responding...
Below code runs the external program addr2line for every address in the trace to convert it into a file name and a line number.
The source code below prints line numbers for all local functions. If a function from another library is called, you might see a couple of ??:0 instead of file names.
#include <stdio.h>
#include <signal.h>
#include <execinfo.h>
void bt_sighandler(int sig, struct sigcontext ctx) {
void *trace[16];
char **messages = (char **)NULL;
int i, trace_size = 0;
if (sig == SIGSEGV)
printf("Got signal %d, faulty address is %p, "
"from %p\n", sig, ctx.cr2, ctx.eip);
else
printf("Got signal %d\n", sig);
trace_size = backtrace(trace, 16);
/* overwrite sigaction with caller's address */
trace[1] = (void *)ctx.eip;
messages = backtrace_symbols(trace, trace_size);
/* skip first stack frame (points here) */
printf("[bt] Execution path:\n");
for (i=1; i<trace_size; ++i)
{
printf("[bt] #%d %s\n", i, messages[i]);
char syscom[256];
sprintf(syscom,"addr2line %p -e sighandler", trace[i]); //last parameter is the name of this app
system(syscom);
}
exit(0);
}
int func_a(int a, char b) {
char *p = (char *)0xdeadbeef;
a = a + b;
*p = 10; /* CRASH here!! */
return 2*a;
}
int func_b() {
int res, a = 5;
res = 5 + func_a(a, 't');
return res;
}
int main() {
/* Install our signal handler */
struct sigaction sa;
sa.sa_handler = (void *)bt_sighandler;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_RESTART;
sigaction(SIGSEGV, &sa, NULL);
sigaction(SIGUSR1, &sa, NULL);
/* ... add any other signal here */
/* Do something */
printf("%d\n", func_b());
}
This code should be compiled as: gcc sighandler.c -o sighandler -rdynamic
The program outputs:
Got signal 11, faulty address is 0xdeadbeef, from 0x8048975
[bt] Execution path:
[bt] #1 ./sighandler(func_a+0x1d) [0x8048975]
/home/karl/workspace/stacktrace/sighandler.c:44
[bt] #2 ./sighandler(func_b+0x20) [0x804899f]
/home/karl/workspace/stacktrace/sighandler.c:54
[bt] #3 ./sighandler(main+0x6c) [0x8048a16]
/home/karl/workspace/stacktrace/sighandler.c:74
[bt] #4 /lib/tls/i686/cmov/libc.so.6(__libc_start_main+0xe6) [0x3fdbd6]
??:0
[bt] #5 ./sighandler() [0x8048781]
??:0
Upvotes: 20