Linux Applications Debugging Techniques/Resource leaks
Zombie threads
editAny thread that has terminated but has not been joined or detached will leak OS resources until the process terminates. Unfortunately, neither /proc nor gdb will show you these zombie threads, at least not on some kernels.
One way to get them is with a gdb canned command:
#
#
#
define trace_call
b $arg0
commands
bt full
continue
end
end
document trace_call
Trace specified call with call stack to screen. Example:
set breakpoint pending on
set pagination off
set logging on
trace_call __pthread_create_2_1
end
Using host libthread_db library "/lib/i686/cmov/libthread_db.so.1".
(gdb) trace_call __pthread_create_2_1
Function "__pthread_create_2_1" not defined.
Breakpoint 1 (__pthread_create_2_1) pending.
(gdb) trace_call __pthread_create_2_0
Function "__pthread_create_2_0" not defined.
Breakpoint 2 (__pthread_create_2_0) pending.
(gdb) r
Starting program: /home/amelinte/projects/articole/wikibooks/debug/plock foo bar bax
[Thread debugging using libthread_db enabled]
Breakpoint 3 at 0xb7f9b746
Pending breakpoint "__pthread_create_2_1" resolved
Breakpoint 4 at 0xb7f9c395
Pending breakpoint "__pthread_create_2_0" resolved
[New Thread 0xb7e48ad0 (LWP 8635)]
[Switching to Thread 0xb7e48ad0 (LWP 8635)]
Breakpoint 3, 0xb7f9b746 in pthread_create@@GLIBC_2.1 () from /lib/i686/cmov/libpthread.so.0
#0 0xb7f9b746 in pthread_create@@GLIBC_2.1 () from /lib/i686/cmov/libpthread.so.0
No symbol table info available.
#1 0x08048a7f in main (argc=4, argv=0xbfceb714) at plock.c:97
s = 0
tnum = 0
opt = -1
num_threads = 3
tinfo = (struct thread_info *) 0x833b008
attr = {__size = '\0' <repeats 13 times>, "\020", '\0' <repeats 21 times>, __align = 0}
stack_size = -1
res = (void *) 0x0
[New Thread 0xb7e47b90 (LWP 8638)]
Thread 1: top of stack near 0xb7e473c8; argv_string=foo
Another way is to use (again) an interposition library:
/*
* Hook library. Usage:
* gcc -c -g -Wall -fPIC libhook.c -o libhook.o
* ld -o libhook.so libhook.o -shared -ldl
* LD_PRELOAD=./libhook.so program arguments
*
* Copyright 2012 Aurelian Melinte.
* Released under GPL 3.0 or later.
*/
#define _GNU_SOURCE
#include <dlfcn.h>
#include <signal.h>
#include <execinfo.h>
#include <errno.h>
#include <stdlib.h>
#include <stdio.h> /*printf*/
#include <unistd.h>
#include <pthread.h>
#include <assert.h>
typedef int (*lp_pthread_mutex_func)(pthread_mutex_t *mutex);
typedef int (*pthread_create_func)(pthread_t *thread,
const pthread_attr_t *attr,
void *(*start_routine) (void *), void *arg);
static pthread_create_func _pthread_create_hook = NULL;
static int
hook_one(pthread_create_func *fptr, const char *fname)
{
char *msg = NULL;
assert(fname != NULL);
if (*fptr == NULL) {
printf("dlsym : wrapping %s\n", fname);
*fptr = dlsym(RTLD_NEXT, fname);
printf("next_%s = %p\n", fname, *fptr);
if ((*fptr == NULL) || ((msg = dlerror()) != NULL)) {
printf("dlsym %s failed : %s\n", fname, msg);
return -1;
} else {
printf("dlsym: wrapping %s done\n", fname);
return 0;
}
} else {
return 0;
}
}
static void
hook_funcs(void)
{
if (_pthread_create_hook == NULL) {
int rc = hook_one(&_pthread_create_hook, "pthread_create");
if (NULL == _pthread_create_hook || rc != 0) {
printf("Failed to hook.\n");
exit(EXIT_FAILURE);
}
}
}
/*
*
*/
int
pthread_create(pthread_t *thread,
const pthread_attr_t *attr,
void *(*start_routine) (void *), void *arg)
{
#define SIZE 40
void *buffer[SIZE] = {0};
int nptrs = 0;
int rc = EINVAL;
rc = _pthread_create_hook(thread, attr, start_routine, arg);
printf("*** pthread_create:\n");
nptrs = backtrace(buffer, SIZE);
backtrace_symbols_fd(buffer, nptrs, STDOUT_FILENO);
return rc;
}
/*
*
*/
void _init() __attribute__((constructor));
void
_init()
{
printf("*** _init().\n");
hook_funcs();
}
void _fini() __attribute__((destructor));
void
_fini()
{
printf("*** _fini().\n");
}
The output is a bit rough but it can be refined down to file and line by replacing backtrace_symbols_fd() with appropriate code:
*** pthread_create:
./libhook.so(pthread_create+0x8c)[0x400215d3]
./plock[0x8048a7f]
/lib/i686/cmov/libc.so.6(__libc_start_main+0xe0)[0x4006f450]
./plock[0x8048791]
File descriptors
editAs just about anything is a file (folders, sockets, pipes, etc.), just about anything can result in a file descriptor that needs to be closed. /proc can help:
# tree /proc/26041
/proc/26041
...
|-- fd # Open files descriptors
| |-- 0 -> /dev/pts/21
| |-- 1 -> /dev/pts/21
| |-- 2 -> /dev/pts/21
| `-- 3 -> socket:[113497835]
|-- fdinfo
| |-- 0
| |-- 1
| |-- 2
| `-- 3
...
The trace_call command for gdb can help with the call stack.
If gdb is not available on the machine, an interposition library hooking open(), pipe(), socket(), etc. can be built.
Other tools that can be used:
- lsof
- fuser
Ports
editWhich process is using a port? As root:
# netstat -tlnp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name
tcp 0 0 0.0.0.0:36510 0.0.0.0:* LISTEN -
tcp 0 0 127.0.0.1:2207 0.0.0.0:* LISTEN 3438/python
...
# lsof
COMMAND PID USER FD TYPE DEVICE SIZE NODE NAME
init 1 root cwd DIR 253,0 4096 2 /
...
python 3438 root 4u IPv4 11416 TCP localhost.localdomain:2207 (LISTEN)
# lsof -i :2207
COMMAND PID USER FD TYPE DEVICE SIZE NODE NAME
python 3438 root 4u IPv4 11416 TCP localhost.localdomain:2207 (LISTEN)
Other tools:
- fuser
IPC
editFor semaphores, shared memory and message queues.
- ipcs
- ipcrm
# ipcs -spt
------ Semaphore Operation/Change Times --------
semid owner last-op last-changed
187826177 aurelian_m Fri Feb 10 09:37:26 2012 Fri Feb 10 09:33:39 2012
187858946 aurelian_m Fri Feb 10 09:52:11 2012 Fri Feb 10 09:50:44 2012
DYI: an interposition resource counter
editlibmemleak can be easily modified to keep track of whatever resources are leaking. Hook the right API (e.g. open()/close()).