• bionic-信号量&backtrace


    Bionic 简介_zhengyad123的博客-CSDN博客_bionic linux

    Bionic库是Android的基础库之一,也是连接Android系统和Linux系统内核的桥梁。

    bionic/libc/platform/bionic/reserved_signals.h

    33  #include
    34  
    35  #include "macros.h"
    36  
    37  // Realtime signals reserved for internal use:
    38  //   32 (__SIGRTMIN + 0)        POSIX timers
    39  //   33 (__SIGRTMIN + 1)        libbacktrace
    40  //   34 (__SIGRTMIN + 2)        libcore
    41  //   35 (__SIGRTMIN + 3)        debuggerd
    42  //   36 (__SIGRTMIN + 4)        platform profilers (heapprofd, traced_perf)
    43  //   37 (__SIGRTMIN + 5)        coverage (libprofile-extras)
    44  //   38 (__SIGRTMIN + 6)        heapprofd ART managed heap dumps
    45  //   39 (__SIGRTMIN + 7)        fdtrack
    46  //   40 (__SIGRTMIN + 8)        android_run_on_all_threads (bionic/pthread_internal.cpp)
    47  
    48  #define BIONIC_SIGNAL_POSIX_TIMERS (__SIGRTMIN + 0)
    49  #define BIONIC_SIGNAL_BACKTRACE (__SIGRTMIN + 1)
    50  #define BIONIC_SIGNAL_DEBUGGER (__SIGRTMIN + 3)
    51  #define BIONIC_SIGNAL_PROFILER (__SIGRTMIN + 4)
    52  #define BIONIC_SIGNAL_ART_PROFILER (__SIGRTMIN + 6)
    53  #define BIONIC_SIGNAL_FDTRACK (__SIGRTMIN + 7)
    54  #define BIONIC_SIGNAL_RUN_ON_ALL_THREADS (__SIGRTMIN + 8)
    55  

    栈的回溯又分为两种:APCS(ARM Procedure Call Standard)与unwind。

    arm上backtrace的分析与实现原理 - 腾讯云开发者社区-腾讯云

    system/unwinding/libbacktrace/BacktraceCurrent.h
    20  #include
    21  #include
    22  
    23  #include
    24  
    25  // The signal used to cause a thread to dump the stack.
    26  #if defined(__GLIBC__)
    27  // In order to run the backtrace_tests on the host, we can't use
    28  // the internal real time signals used by GLIBC. To avoid this,
    29  // use SIGRTMIN for the signal to dump the stack.
    30  #define THREAD_SIGNAL SIGRTMIN
    31  #else
    32  #define THREAD_SIGNAL (__SIGRTMIN+1)
    33  #endif
    34  


    #define SIGRTMIN (__libc_current_sigrtmin())

    int __libc_current_sigrtmin() {
    34    return __SIGRTMIN + __SIGRT_RESERVED;
    35  }

    bionic 库中__SIGRTMIN 为32 ,预留实时信号数为 __SIGRT_RESERVED ,所以 SIGRTMIN 为41

    android native 层可以通过ProcessCallStack
     ProcessCallStack pcs;
                    pcs.update();
                    pcs.dump(fd);

    再通过 system/unwinding/libbacktrace/Backtrace.cpp    里面通过 UnwindStackCurrent 获取,是通过发送 THREAD_SIGNAL  信号给到进程获取

    android java 层通过Thread.getStackTrace 获取,最终
    art/runtime/thread.cc    中 Thread::CreateInternalStackTrace  获取 

    art 里面进程创建:

    https://www.jianshu.com/p/00793196ed37

    java 层对应的是 Thread (java_peer) ,对应 native 层 thread.cc (child_thread),最终pthread_create 创建线程,内核调用clone ,art 使用 child_thread 表示线程,java 层使用java_peer表示,内核创建对应的是 new_pthread 。

    void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {

    ...

    Thread* child_thread = new Thread(is_daemon);

    pthread_t new_pthread;
     pthread_create_result = pthread_create(&new_pthread,
                                               &attr,
                                               Thread::CreateCallback,
                                               child_thread);
      

    }                   

      

    linux 信号处理

    线程会有自己的悬挂信号队列, 并且线程组也有一个信号悬挂队列

    在这里插入图片描述

    在内核返回用户空间前一刻,内核都会检查信号队列,有信号会发起信号队列处理,主要流程如上

    核心函数do_notify_resume() -> do_signal() ->handle_signal 处理用户注册的处理函数。

    当然在回到用户空间处理时,需要修改调用栈,在用户空间注册函数执行完,调用sigreturn 系统调用,需要恢复到处理信号函数之前的堆栈,从而可以

    kernel-4.19/arch/arm64/kernel/signal.c 

    asmlinkage void do_notify_resume(struct pt_regs *regs,
    909                   unsigned long thread_flags){

    ...

    if (thread_flags & _TIF_SIGPENDING)
    934                  do_signal(regs);

    ...

    }
     

     static void do_signal(struct pt_regs *regs){


    if (get_signal(&ksig)) {   //说明用户注册的信号出路函数信号
    877          /*
    878           * Depending on the signal settings, we may need to revert the
    879           * decision to restart the system call, but skip this if a
    880           * debugger has chosen to restart at a different PC.
    881           */
    882          if (regs->pc == restart_addr &&
    883              (retval == -ERESTARTNOHAND ||
    884               retval == -ERESTART_RESTARTBLOCK ||
    885               (retval == -ERESTARTSYS &&
    886                !(ksig.ka.sa.sa_flags & SA_RESTART)))) {
    887              regs->regs[0] = -EINTR;
    888              regs->pc = continue_addr;
    889          }
    890  
    891          handle_signal(&ksig, regs); 
    892          return;
    893      }


    ....

    }

    init 忽略掉SIGKILL and SIGSTOP

    89  static bool sig_task_ignored(struct task_struct *t, int sig, bool force)
    90  {
    91      void __user *handler;
    92  
    93      handler = sig_handler(t, sig);
    94  
    95      /* SIGKILL and SIGSTOP may not be sent to the global init */
    96      if (unlikely(is_global_init(t) && sig_kernel_only(sig)))      
    97          return true;
    98  
    99      if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
    100          handler == SIG_DFL && !(force && sig_kernel_only(sig)))
    101          return true;
    102  
    103      /* Only allow kernel generated signals to this kthread */
    104      if (unlikely((t->flags & PF_KTHREAD) &&
    105               (handler == SIG_KTHREAD_KERNEL) && !force))
    106          return true;
    107  
    108      return sig_handler_ignored(handler, sig);
    109  }

    在do_signal函数中,会去判断每个被挂起的信号,如果这个信号在用户态没有设置处理函数,那么内核会按照信号的默认处理行为处理信号,如果这个信号在用户态设置了处理函数,那么需要返回用户态去调用这个函数,在内核中会在用户态的栈中插入一个调用那个处理函数的栈针,调用完用户态处理函数后返回内核态,继续do_signal函数的处理。

    linux 发送信号

    kernel-4.19/kernel/signal.c

    # sig就是信号代码。

    #  info就是发送信号的时候附加信息

    #  t目标进程task_struct

    #  pid_type是接受信号对象的类型 

    1232  static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
    1233              enum pid_type type)
    1234  {
    1235      int from_ancestor_ns = 0;
    1236  
    1237  #ifdef CONFIG_PID_NS
    1238      from_ancestor_ns = si_fromuser(info) &&
    1239                 !task_pid_nr_ns(current, task_active_pid_ns(t));
    1240  #endif
    1241  
    1242      return __send_signal(sig, info, t, type, from_ancestor_ns);
    1243  }

    __send_signal(){

    1115      if(1) {
    1116  
    1117          if((sig == SIGHUP || sig == 33 || sig == SIGKILL || sig == SIGSTOP || sig == SIGABRT || sig == SIGTERM || sig == SIGCONT) && is_key_process(t)) {
    1118              printk("Some other process %d:%s want to send sig:%d to pid:%d tgid:%d comm:%s\n", current->pid, current->comm,sig, t->pid, t->tgid, t->comm);
    1119          }
    1120      }


    q = __sigqueue_alloc(sig, t, GFP_ATOMIC, override_rlimit);
    1160      if (q) {
    1161          list_add_tail(&q->list, &pending->list);
    1162          switch ((unsigned long) info) {
    1163          case (unsigned long) SEND_SIG_NOINFO:   //用户空间发生的信号
    1164              clear_siginfo(&q->info);
    1165              q->info.si_signo = sig;
    1166              q->info.si_errno = 0;
    1167              q->info.si_code = SI_USER;
    1168              q->info.si_pid = task_tgid_nr_ns(current,
    1169                              task_active_pid_ns(t));
    1170              q->info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
    1171              break;
    1172          case (unsigned long) SEND_SIG_PRIV:
    1173              clear_siginfo(&q->info);
    1174              q->info.si_signo = sig;
    1175              q->info.si_errno = 0;
    1176              q->info.si_code = SI_KERNEL;   //内核空间发生的信号
    1177              q->info.si_pid = 0;
    1178              q->info.si_uid = 0;
    1179              break;
    1180          default:
    1181              copy_siginfo(&q->info, info);
    1182              if (from_ancestor_ns)
    1183                  q->info.si_pid = 0;
    1184              break;
    1185          }
    1186  
    1187          userns_fixup_signal_uid(&q->info, t);
    1188  
    1189      }


    1209      signalfd_notify(t, sig);
    1210      sigaddset(&pending->signal, sig);


    complete_signal(sig, t, type);

    }
     

    Signal Catcher

    Signal Catcher线程接受到kernel系统底层的消息进行dump当前虚拟机的信息并且设置每个线程的标志位(check_point)和请求线程状态为挂起,当线程运行过程中进行上下文切换时会检查该标记。等到线程都挂起后,开始遍历Dump每个线程的堆栈和线程数据后再唤醒线程

    "Signal Catcher" daemon prio=10 tid=2 Runnable
      | group="system" sCount=0 ucsCount=0 flags=0 obj=0x12d80180 self=0xb40000791080f000
      | sysTid=1678 nice=-20 cgrp=default sched=0/0 handle=0x790f7fbcb0
      | state=R schedstat=( 7676736981 897590846 13817 ) utm=496 stm=270 core=6 HZ=100
      | stack=0x790f704000-0x790f706000 stackSize=991KB
      | held mutexes= "mutator lock"(shared held)
      native: #00 pc 00000000005374cc  /apex/com.android.art/lib64/libart.so (art::DumpNativeStack(std::__1::basic_ostream >&, int, BacktraceMap*, char const*, art::ArtMethod*, void*, bool)+128) (BuildId: 97fdb979efb7d2b596fa4fceabaad95b)
      native: #01 pc 00000000006f0d94  /apex/com.android.art/lib64/libart.so (art::Thread::DumpStack(std::__1::basic_ostream >&, bool, BacktraceMap*, bool) const+236) (BuildId: 97fdb979efb7d2b596fa4fceabaad95b)
      native: #02 pc 00000000006fe620  /apex/com.android.art/lib64/libart.so (art::DumpCheckpoint::Run(art::Thread*)+208) (BuildId: 97fdb979efb7d2b596fa4fceabaad95b)
      native: #03 pc 00000000003641d4  /apex/com.android.art/lib64/libart.so (art::ThreadList::RunCheckpoint(art::Closure*, art::Closure*)+440) (BuildId: 97fdb979efb7d2b596fa4fceabaad95b)
      native: #04 pc 00000000006fcdc0  /apex/com.android.art/lib64/libart.so (art::ThreadList::Dump(std::__1::basic_ostream >&, bool)+280) (BuildId: 97fdb979efb7d2b596fa4fceabaad95b)
      native: #05 pc 00000000006fc7b4  /apex/com.android.art/lib64/libart.so (art::ThreadList::DumpForSigQuit(std::__1::basic_ostream >&)+292) (BuildId: 97fdb979efb7d2b596fa4fceabaad95b)
      native: #06 pc 00000000006d57e4  /apex/com.android.art/lib64/libart.so (art::Runtime::DumpForSigQuit(std::__1::basic_ostream >&)+184) (BuildId: 97fdb979efb7d2b596fa4fceabaad95b)
      native: #07 pc 00000000006e1928  /apex/com.android.art/lib64/libart.so (art::SignalCatcher::HandleSigQuit()+468) (BuildId: 97fdb979efb7d2b596fa4fceabaad95b)
      native: #08 pc 0000000000573c40  /apex/com.android.art/lib64/libart.so (art::SignalCatcher::Run(void*)+264) (BuildId: 97fdb979efb7d2b596fa4fceabaad95b)
      native: #09 pc 00000000000eb630  /apex/com.android.runtime/lib64/bionic/libc.so (__pthread_start(void*)+208) (BuildId: 6940bde3287fe65faab4354aae30b200)
      native: #10 pc 000000000007e210  /apex/com.android.runtime/lib64/bionic/libc.so (__start_thread+64) (BuildId: 6940bde3287fe65faab4354aae30b200)
      (no managed stack frames)

    <4>[53270.460373] -(7)[1678:Signal Catcher]Some other process 1678:Signal Catcher want to send sig:33 to pid:1666 tgid:1666 comm:system_server
    <4>[53270.472326] -(6)[1678:Signal Catcher]Some other process 1678:Signal Catcher want to send sig:33 to pid:1679 tgid:1666 comm:perfetto_hprof_
    <4>[53270.482760] -(6)[1678:Signal Catcher]Some other process 1678:Signal Catcher want to send sig:33 to pid:1680 tgid:1666 comm:ADB-JDWP Connec
    <4>[53270.484789] -(6)[1678:Signal Catcher]Some other process 1678:Signal Catcher want to send sig:33 to pid:1681 tgid:1666 comm:Jit thread pool
    <4>[53270.485597] -(6)[1678:Signal Catcher]Some other process 1678:Signal Catcher want to send sig:33 to pid:1682 tgid:1666 comm:HeapTaskDaemon
    <4>[53270.487248] -(6)[1678:Signal Catcher]Some other process 1678:Signal Catcher want to send sig:33 to pid:1686 tgid:1666 comm:binder:1666_1
    <4>[53270.490018] -(6)[1678:Signal Catcher]Some other process 1678:Signal Catcher want to send sig:33 to pid:1687 tgid:1666 comm:binder:1666_2
    <4>[53270.490620] -(6)[1678:Signal Catcher]Some other process 1678:Signal Catcher want to send sig:33 to pid:1696 tgid:1666 comm:Verification th
    <4>[53270.491026] -(6)[1678:Signal Catcher]Some other process 1678:Signal Catcher want to send sig:33 to pid:1698 tgid:1666 comm:State
    <4>[53270.491692] -(6)[1678:Signal Catcher]Some other process 1678:Signal Catcher want to send sig:33 to pid:1699 tgid:1666 comm:android.fg

    art/runtime/native_stack_dump.cc


    318  void DumpNativeStack(std::ostream& os,
    319                       pid_t tid,
    320                       BacktraceMap* existing_map,
    321                       const char* prefix,
    322                       ArtMethod* current_method,
    323                       void* ucontext_ptr,
    324                       bool skip_frames) {
    325    // Historical note: This was disabled when running under Valgrind (b/18119146).
    326  
    327    BacktraceMap* map = existing_map;
    328    std::unique_ptr tmp_map;
    329    if (map == nullptr) {
    330      tmp_map.reset(BacktraceMap::Create(getpid()));  ###创建UnwindStackCurrent
    331      map = tmp_map.get();
    332    }
    333    std::unique_ptr backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
    334    backtrace->SetSkipFrames(skip_frames);
    335    if (!backtrace->Unwind(0, reinterpret_cast(ucontext_ptr))) {
    336      os << prefix << "(backtrace::Unwind failed for thread " << tid
    337         << ": " <<  backtrace->GetErrorString(backtrace->GetError()) << ")" << std::endl;
    338      return;
    339    } else if (backtrace->NumFrames() == 0) {
    340      os << prefix << "(no native stack frames for thread " << tid << ")" << std::endl;
    341      return;
    342    }

    343  
    344    // Check whether we have and should use addr2line.
    345    bool use_addr2line;
    346    if (kUseAddr2line) {
    347      // Try to run it to see whether we have it. Push an argument so that it doesn't assume a.out
    348      // and print to stderr.
    349      use_addr2line = (gAborting > 0) && RunCommand(FindAddr2line() + " -h");
    350    } else {
    351      use_addr2line = false;
    352    }
    ....


    system/unwinding/libbacktrace/Backtrace.cpp
    130  Backtrace* Backtrace::Create(pid_t pid, pid_t tid, BacktraceMap* map) {
    131    if (pid == BACKTRACE_CURRENT_PROCESS) {
    132      pid = getpid();
    133      if (tid == BACKTRACE_CURRENT_THREAD) {
    134        tid = android::base::GetThreadId();
    135      }
    136    } else if (tid == BACKTRACE_CURRENT_THREAD) {
    137      tid = pid;
    138    }
    139  
    140    if (pid == getpid()) {
    141      return new UnwindStackCurrent(pid, tid, map);
    142    } else {
    143      return new UnwindStackPtrace(pid, tid, map);
    144    }
    145  }
     

    UnwindStackCurrent  里面会发生 THREAD_SIGNAL  ,即信号33 

    接受到底层发生的 SIGQUIT ,接着发生 THREAD_SIGNAL  获取堆栈

    SignalCatcher::Run
    signal_catcher.cc

    void* SignalCatcher::Run(void* arg) {
      SignalCatcher* signal_catcher = reinterpret_cast(arg);
      CHECK(signal_catcher != nullptr);

      Runtime* runtime = Runtime::Current();
      // 将当前线程 attach 到当前的 JavaVM
      CHECK(runtime->AttachCurrentThread("Signal Catcher", true, runtime->GetSystemThreadGroup(),
                                         !runtime->IsAotCompiler()));

      Thread* self = Thread::Current();
      DCHECK_NE(self->GetState(), kRunnable);
      {
        MutexLock mu(self, signal_catcher->lock_);
        signal_catcher->thread_ = self;
        signal_catcher->cond_.Broadcast(self);
      }

      // Set up mask with signals we want to handle.
      SignalSet signals;
      signals.Add(SIGQUIT);
      signals.Add(SIGUSR1);

      while (true) {
        // 见 1.2.3
        int signal_number = signal_catcher->WaitForSignal(self, signals);
        if (signal_catcher->ShouldHalt()) {
          runtime->DetachCurrentThread();
          return nullptr;
        }

        switch (signal_number) {
        case SIGQUIT:
          signal_catcher->HandleSigQuit();
          break;
        case SIGUSR1:
          signal_catcher->HandleSigUsr1();
          break;
        default:
          LOG(ERROR) << "Unexpected signal %d" << signal_number;
          break;
        }
      }
    }

    PaletteWriteCrashThreadStacks  会连接tombstoned 
    106  void SignalCatcher::Output(const std::string& s) {
    107    ScopedThreadStateChange tsc(Thread::Current(), kWaitingForSignalCatcherOutput);
    108    palette_status_t status = PaletteWriteCrashThreadStacks(s.data(), s.size());
    109    if (status == PALETTE_STATUS_OK) {
    110      LOG(INFO) << "Wrote stack traces to tombstoned";
    111    } else {
    112      CHECK(status == PALETTE_STATUS_FAILED_CHECK_LOG);
    113      LOG(ERROR) << "Failed to write stack traces to tombstoned";
    114    }
    115  }
    116  
    117  void SignalCatcher::HandleSigQuit() {
    118    Runtime* runtime = Runtime::Current();
    119    std::ostringstream os;
    120    os << "\n"
    121        << "----- pid " << getpid() << " at " << GetIsoDate() << " -----\n";
    122  
    123    DumpCmdLine(os);
    124  
    125    // Note: The strings "Build fingerprint:" and "ABI:" are chosen to match the format used by
    126    // debuggerd. This allows, for example, the stack tool to work.
    127    std::string fingerprint = runtime->GetFingerprint();
    128    os << "Build fingerprint: '" << (fingerprint.empty() ? "unknown" : fingerprint) << "'\n";
    129    os << "ABI: '" << GetInstructionSetString(runtime->GetInstructionSet()) << "'\n";
    130  
    131    os << "Build type: " << (kIsDebugBuild ? "debug" : "optimized") << "\n";
    132  
    133    runtime->DumpForSigQuit(os);
    134  
    135    if ((false)) {
    136      std::string maps;
    137      if (android::base::ReadFileToString("/proc/self/maps", &maps)) {
    138        os << "/proc/self/maps:\n" << maps;
    139      }
    140    }
    141    os << "----- end " << getpid() << " -----\n";
    142    Output(os.str());
    143  }

    android SIGQUIT 

    「ANR」Android SIGQUIT(3) 信号拦截与处理_阿里巴巴终端技术的博客-CSDN博客_signal 拦截

    手把手教你高效监控ANR - 腾讯云开发者社区-腾讯云

  • 相关阅读:
    JVM 虚拟机 ----> Java 内存模型(JMM)
    如何使用Goland进行远程Go项目线上调试?
    文本美学:text-image打造视觉吸引力
    stable diffusion汉化
    【C++进阶之路】特殊类的设计
    灵性图书馆:好书推荐-《零极限》
    Kubernetes-in-action (一)
    【C++】引用
    IP营销赋能中国玩具—成都扬帆际海教育咨询有限公司
    Node.js学习笔记_No.03
  • 原文地址:https://blog.csdn.net/lei7143/article/details/126100081