Add

Sunday 1 December 2013

Strace: Practical Troubleshooting

strace: practical troubleshooting

Debugging / diagnostic tool widely used by DevOps team, and its pocket knife for administrators in problem solving where they don’t have any access to source code, directly examining system and system calls at kernel level for any particular process just by using process ID and getting output to particular file with strace is art of sysadmins.


Once process is attached it will display all the system calls and signals resulting from that process, simple to use like bash commands.


NOTE: System calls – In Unix shell act as interface between user and kernel, similarly kernel offers interface for any executing programs to use hardware resource thus creating a abstract layer between application programs and hardware. Ex:  read() write () open() connect() .. etc


Advantage – security, portability, uniform high level programming language for developers.     


How does it really help in troubleshooting??

·      Application fails at start itself – hence no logs available troubleshoot.

·      To see how much of your hardware resource used by process.

·      Debugging output – to know what exactly that application is doing at system level.

·      Unresponsive application ( job / script ).

·      Extract the data from process.

 


Installation:

#yum install strace                   [ Cent OS / Fedora / Redhat ]

#apt-get install strace              [ debian OS – Ubuntu ]


Command usage:

strace                    <<  options  >>              <<  arguments  >>                   << PURPOSE >>
                              -p                                     << process PID >>                  Attaching process
                              -o                                     << output file >>                     get output in a file
                              -c                                                                                      Summary and statistics
-f F                                                                                    fallow forks
                              -e                 << read, access, connect etc.>>                 capture only specific system calls



Steps how to attach process to strace and get output in a file.

STEP 1: Collecting the process ID<< parent or child >>

# sudo ps -ef | grep << process_name >>




STEP 2: Attaching the process to strace command and writing output in file.
         
         # strace -o /tmp/strace.txt -p pid





STEP 3: understanding the strace output.

# grep or tail -f  /tmp/strace.txt for any particular pattern.




Using Strace : While restarting apache-httpd process.


#[root@LAMPLAB ~]# strace -Ff -o output.txt -e open /etc/init.d/httpd restart
#
#finding out the log files used by apache httpd process
#
#cat output.txt | grep 'log'
[pid 13595] open("/etc/httpd/modules/mod_log_config.so", O_RDONLY) = 4
[pid 13595] open("/etc/httpd/modules/mod_logio.so", O_RDONLY) = 4
[pid 13595] open("/etc/httpd/logs/error_log", O_WRONLY|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 10
[pid 13595] open("/etc/httpd/logs/ssl_error_log", O_WRONLY|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 11
[pid 13595] open("/etc/httpd/logs/access_log", O_WRONLY|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 12
[pid 13595] open("/etc/httpd/logs/cm4msaa7.com", O_WRONLY|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 13
[pid 13595] open("/etc/httpd/logs/ssl_access_log", O_WRONLY|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 14
[pid 13595] open("/etc/httpd/logs/ssl_request_log", O_WRONLY|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 15
[pid 13595] open("/etc/httpd/modules/mod_log_config.so", O_RDONLY) = 9
[pid 13595] open("/etc/httpd/modules/mod_logio.so", O_RDONLY) = 9
[pid 13596] open("/etc/httpd/logs/error_log", O_WRONLY|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 10
[pid 13596] open("/etc/httpd/logs/ssl_error_log", O_WRONLY|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 11
open("/etc/httpd/logs/access_log", O_WRONLY|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 12
open("/etc/httpd/logs/cm4msaa7.com", O_WRONLY|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 13
open("/etc/httpd/logs/ssl_access_log", O_WRONLY|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 14
open("/etc/httpd/logs/ssl_request_log", O_WRONLY|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 15

#In all cases where a system call fails, strace will return a line with “= -1” in the output,

#[root@LAMPLAB ~]#cat output.txt | grep '= -1'
[pid 13748] open("/etc/selinux/config", O_RDONLY|O_LARGEFILE) = -1 ENOENT (No such file or directory)
[pid 13748] open("/usr/lib/perl5/5.8.8/i386-linux-thread-multi/CORE/tls/i686/sse2/libperl.so", O_RDONLY) = -1 ENOENT (No such file or directory)
[pid 13748] open("/usr/lib/perl5/5.8.8/i386-linux-thread-multi/CORE/tls/i686/libperl.so", O_RDONLY) = -1 ENOENT (No such file or directory)
[pid 13748] open("/usr/lib/perl5/5.8.8/i386-linux-thread-multi/CORE/tls/sse2/libperl.so", O_RDONLY) = -1 ENOENT (No such file or directory)
[pid 13748] open("/usr/lib/perl5/5.8.8/i386-linux-thread-multi/CORE/tls/libperl.so", O_RDONLY) = -1 ENOENT (No such file or directory)
[pid 13748] open("/usr/lib/perl5/5.8.8/i386-linux-thread-multi/CORE/i686/sse2/libperl.so", O_RDONLY) = -1 ENOENT (No such file or directory)
[pid 13748] open("/usr/lib/perl5/5.8.8/i386-linux-thread-multi/CORE/i686/libperl.so", O_RDONLY) = -1 ENOENT (No such file or directory)
[pid 13748] open("/usr/lib/perl5/5.8.8/i386-linux-thread-multi/CORE/sse2/libperl.so", O_RDONLY) = -1 ENOENT (No such file or directory)
[pid 13748] open("/usr/lib/perl5/5.8.8/i386-linux-thread-multi/CORE/libnsl.so.1", O_RDONLY) = -1 ENOENT (No such file or directory)
[pid 13748] open("/usr/lib/perl5/5.8.8/i386-linux-thread-multi/CORE/libutil.so.1", O_RDONLY) = -1 ENOENT (No such file or directory)
[pid 13748] open("/etc/gai.conf", O_RDONLY) = -1 ENOENT (No such file or directory)
[pid 13748] open("/etc/httpd/logs/error_log", O_WRONLY|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = -1 EACCES (Permission denied)


 While opening a file using cat.


[root@LAMPLAB ~]# strace -fF -o helloout.txt cat hello.txt
Hi Hello
Welcome to strace.
Enjoy the course, Thanks all.
[root@LAMPLAB ~]# 
[root@LAMPLAB ~]# 
[root@LAMPLAB ~]# cat helloout.txt
2359  execve("/bin/cat", ["cat", "hello.txt"], [/* 23 vars */]) = 0
2359  brk(0)                            = 0x8262000
2359  mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb77a7000
2359  access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
2359  open("/etc/ld.so.cache", O_RDONLY) = 3
2359  fstat64(3, {st_mode=S_IFREG|0644, st_size=25975, ...}) = 0
2359  mmap2(NULL, 25975, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb77a0000
2359  close(3)                          = 0
2359  open("/lib/libc.so.6", O_RDONLY)  = 3
2359  read(3, "\177ELF\1\1\1\3\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0@n\1\0004\0\0\0"..., 512) = 512
2359  fstat64(3, {st_mode=S_IFREG|0755, st_size=1902708, ...}) = 0
2359  mmap2(NULL, 1665416, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x20c000
2359  mprotect(0x39c000, 4096, PROT_NONE) = 0
2359  mmap2(0x39d000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x190) = 0x39d000
2359  mmap2(0x3a0000, 10632, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x3a0000
2359  close(3)                          = 0
2359  mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb779f000
2359  set_thread_area({entry_number:-1 -> 6, base_addr:0xb779f6c0, limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0, useable:1}) = 0
2359  mprotect(0x39d000, 8192, PROT_READ) = 0
2359  mprotect(0xde5000, 4096, PROT_READ) = 0
2359  munmap(0xb77a0000, 25975)         = 0
2359  brk(0)                            = 0x8262000
2359  brk(0x8283000)                    = 0x8283000
2359  open("/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE) = 3
2359  fstat64(3, {st_mode=S_IFREG|0644, st_size=99158544, ...}) = 0
2359  mmap2(NULL, 2097152, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb759f000
2359  close(3)                          = 0
2359  fstat64(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 4), ...}) = 0
2359  open("hello.txt", O_RDONLY|O_LARGEFILE) = 3
2359  fstat64(3, {st_mode=S_IFREG|0644, st_size=58, ...}) = 0
2359  read(3, "Hi Hello\nWelcome to strace.\nEnjo"..., 32768) = 58
2359  write(1, "Hi Hello\nWelcome to strace.\nEnjo"..., 58) = 58
2359  read(3, "", 32768)                = 0
2359  close(3)                          = 0
2359  close(1)                          = 0
2359  close(2)                          = 0
2359  exit_group(0)                     = ?


This example shows summary report for cat process

[root@LAMPLAB ~]#strace -c cat hello.txt
Hi Hello
Welcome to strace.
Enjoy the course, Thanks all.
% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 30.73    0.003000        3000         1           execve
 26.88    0.002624         375         7           mmap2
 15.99    0.001561         390         4           open
 10.24    0.001000         333         3           brk
 10.23    0.000999         200         5           fstat64
  5.92    0.000578         578         1           set_thread_area
  0.00    0.000000           0         3           read
  0.00    0.000000           0         1           write
  0.00    0.000000           0         6           close
  0.00    0.000000           0         1         1 access
  0.00    0.000000           0         1           munmap
  0.00    0.000000           0         3           mprotect
------ ----------- ----------- --------- --------- ----------------
100.00    0.009762                    36         1 total