关于这个话题,我们一般是为了处理一下生产环境中程序出现死循环或者死锁等问题。我们一般想到的方法就是gdb attach上一个运行中的进程。但是这个需要手动交互。通过网上查找和实践,可以有以下几种选择:
描述 | 优点/缺点 | |
pstack | 依赖系统中的gdb,会是程序短暂的停止运行。 | 优点:不需要对原有程序做任何改变,直接可以产看运行时。 缺点: 依赖gdb |
gcore | 依赖系统中的gdb,会是程序短暂的停止运行。 | 优点:不需要对原有程序做任何改变,直接可以产看运行时。 缺点: 依赖gdb |
fork() | 需要改造原有程序,增加事件代码触发fork()动作 | 优点:不依赖gdb。 缺点: 需要修改源程序 |
- [root@localhost ~]# cat /usr/bin/gstack
- #!/bin/sh
-
- if test $# -ne 1; then
-
- echo "Usage: `basename $0 .sh`
" 1>&2 -
- exit 1
- fi
-
- if test ! -r /proc/$1; then
- echo "Process $1 not found." 1>&2
- exit 1
- fi
-
-
- # GDB doesn't allow "thread apply all bt" when the process isn't
- # threaded; need to peek at the process to determine if that or the
- # simpler "bt" should be used.
-
- backtrace="bt"
- if test -d /proc/$1/task ; then
- # Newer kernel; has a task/ directory.
- if test `/bin/ls /proc/$1/task | /usr/bin/wc -l` -gt 1 2>/dev/null ; then
- backtrace="thread apply all bt"
- fi
- elif test -f /proc/$1/maps ; then
- # Older kernel; go by it loading libpthread.
- if /bin/grep -e libpthread /proc/$1/maps > /dev/null 2>&1 ; then
- backtrace="thread apply all bt"
- fi
- fi
- GDB=${GDB:-/usr/bin/gdb}
- if $GDB -nx --quiet --batch --readnever > /dev/null 2>&1; then
- readnever=--readnever
- else
- readnever=
- fi
-
- # Run GDB, strip out unwanted noise.
- $GDB --quiet $readnever -nx /proc/$1/exe $1 <<EOF 2>&1 |
- $backtrace
- EOF
- /bin/sed -n \
- -e 's/^(gdb) //' \
- -e '/^#/p' \
- -e '/^Thread/p'
-
- [root@localhost ~]#
2. gcore
- root@xxx:/App/Log# cat /usr/bin/gcore
- #!/bin/sh
-
- # Copyright (C) 2003-2016 Free Software Foundation, Inc.
-
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation; either version 3 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see
. -
- ##############################
- # check /opt/tmp/corefile and left lastest gcore files!
- # check /App/corefile and left lastest gcore.tar.gz files!
- ##############################
- function check_gcore_files () {
- dir_list=(/opt/tmp/corefile /App/corefile)
- for item in ${dir_list[*]}
- do
- echo "$item"
- cd $item
-
- corecounts=0
- for file in $(ls -t gcore-*)
- do
- #echo file=$file
- corecounts=`expr $corecounts + 1`;
- #echo corecounts=$corecounts
-
- # rm the more file
- if [ $corecounts -gt 5 ]; then
- rm $file
- echo "rm $file"
- fi
- done
-
- #go back path:
- cd -
- done
- }
-
-
- #
- # Script to generate a core file of a running program.
- # It starts up gdb, attaches to the given PID and invokes the gcore command.
- #
-
- if [ "$#" -eq "0" ]
- then
- echo "usage: $0 [-o filename] pid"
- exit 2
- fi
-
- # Need to check for -o option, but set default basename to "core".
- name_tail=`date +"%Y-%m-%d-%H.%M.%S"`
- tmp_name=gcore-"$name_tail"
-
-
- name=gcore-"$name_tail"
-
- if [ "$1" = "-o" ]
- then
- if [ "$#" -lt "3" ]
- then
- # Not enough arguments.
- echo "usage: gcore [-o filename] pid"
- exit 2
- fi
- name=$2
-
- # Shift over to start of pid list
- shift; shift
- fi
-
- echo "tmpfile:$tmp_name, outfile:$name"
-
- # Attempt to fetch the absolute path to the gcore script that was
- # called.
- #binary_path=`dirname "$0"`
- binary_path="/usr/bin"
-
- if test "x$binary_path" = x. ; then
- # We got "." back as a path. This means the user executed
- # the gcore script locally (i.e. ./gcore) or called the
- # script via a shell interpreter (i.e. sh gcore).
- binary_basename=`basename "$0"`
-
- # If the gcore script was called like "sh gcore" and the script
- # lives in the current directory, "which" will not give us "gcore".
- # So first we check if the script is in the current directory
- # before using the output of "which".
- if test -f "$binary_basename" ; then
- # We have a local gcore script in ".". This covers the case of
- # doing "./gcore" or "sh gcore".
- binary_path="."
- else
- # The gcore script was not found in ".", which means the script
- # was called from somewhere else in $PATH by "sh gcore".
- # Extract the correct path now.
- binary_path_from_env=`which "$0"`
- binary_path=`dirname "$binary_path_from_env"`
- fi
- fi
-
- # Check if the GDB binary is in the expected path. If not, just
- # quit with a message.
- if [ ! -f "$binary_path"/gdb ]; then
- echo "gcore: GDB binary (${binary_path}/gdb) not found"
- exit 1
- fi
-
- # Initialise return code.
- rc=0
- echo "---------------------------"
- # Loop through pids
- for pid in $*
- do
- # `
- # available but not accessible as GDB would get stopped on SIGTTIN.
- date
- $binary_path/gdb
- -ex "set pagination off" -ex "set height 0" -ex "set width 0" \
- -ex "attach $pid" -ex "gcore /tmp/$tmp_name.$pid" -ex detach -ex quit
-
- if [ -r "/tmp/$tmp_name.$pid" ] ; then
- rc=0
- echo "------------------"
- date
- tar -czvPf $name.$pid.tar.gz "/tmp/$tmp_name.$pid"
- echo "------------------"
- date
- echo "------------------"
- rm -rf "/tmp/$tmp_name.$pid"
- date
- else
- echo "gcore: failed to create $name.$pid"
- rc=1
- break
- fi
- check_gcore_files
-
- done
- echo "------------------"
- exit $rc
Note: 我们可以一些参数控制gcore参数的coredump文件的大小
![](https://1000bd.com/contentImg/2024/04/18/5a27e80bc4570acd.png)
3. 使用fork() (代码略)
参考: