root@:curtis# uname -a
Linux curtis-Aspire-E5-471G 5.15.0-52-generic #58~20.04.1-Ubuntu SMP Thu Oct 13 13:09:46 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux
root@:curtis# lsb_release -a
No LSB modules are available.
Distributor ID: Ubuntu
Description: Ubuntu 20.04.3 LTS
Release: 20.04
Codename: focal
Linux 通过proc
文件系统,将进程的栈信息透给用户态,调用链如下所示。
root@:curtis# cat /proc/self/stack
[<0>] proc_pid_stack+0x9a/0xf0
[<0>] proc_single_show+0x52/0xc0
[<0>] seq_read_iter+0x124/0x450
[<0>] seq_read+0xfd/0x150
[<0>] vfs_read+0xa0/0x1a0
[<0>] ksys_read+0x67/0xf0
[<0>] __x64_sys_read+0x1a/0x20
[<0>] do_syscall_64+0x5c/0xc0
[<0>] entry_SYSCALL_64_after_hwframe+0x61/0xcb
从调用栈上来看,最终调用的是函数proc_pid_stack
。
#ifdef CONFIG_STACKTRACE
#define MAX_STACK_TRACE_DEPTH 64
static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
unsigned long *entries;
int err;
/*
* The ability to racily run the kernel stack unwinder on a running task
* and then observe the unwinder output is scary; while it is useful for
* debugging kernel issues, it can also allow an attacker to leak kernel
* stack contents.
* Doing this in a manner that is at least safe from races would require
* some work to ensure that the remote task can not be scheduled; and
* even then, this would still expose the unwinder as local attack
* surface.
* Therefore, this interface is restricted to root.
*/
if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
return -EACCES;
entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),
GFP_KERNEL);
if (!entries)
return -ENOMEM;
err = lock_trace(task);
if (!err) {
unsigned int i, nr_entries;
nr_entries = stack_trace_save_tsk(task, entries,
MAX_STACK_TRACE_DEPTH, 0);
for (i = 0; i < nr_entries; i++) {
seq_printf(m, "[<0>] %pB\n", (void *)entries[i]);
}
unlock_trace(task);
}
kfree(entries);
return err;
}
#endif
从函数的定义来看需要将内核调试选项CONFIG_STACKTRACE
打开,核心程序调用的是stack_trace_save_tsk
函数,为非导出函数,如何使用未导出函数之前的文章有介绍过。
/**
* stack_trace_save_tsk - Save a task stack trace into a storage array
* @task: The task to examine
* @store: Pointer to storage array
* @size: Size of the storage array
* @skipnr: Number of entries to skip at the start of the stack trace
*
* Return: Number of trace entries stored
*/
unsigned int stack_trace_save_tsk(struct task_struct *task,
unsigned long *store, unsigned int size,
unsigned int skipnr)
{
struct stack_trace trace = {
.entries = store,
.max_entries = size,
/* skip this function if they are tracing us */
.skip = skipnr + (current == task),
};
save_stack_trace_tsk(task, &trace);
return trace.nr_entries;
}
主要代码逻辑
#include
#include
#include
#include
#include "trace.h"
#define MAX_STACK_TRACE_DEPTH 64
unsigned int (*stack_trace_save_tsk_ptr)(struct task_struct *task,
unsigned long *store, unsigned int size,
unsigned int skipnr);
int print_stack(struct task_struct *task)
{
unsigned long *entries;
unsigned int i, nr_entries;
entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries), GFP_KERNEL);
if (!entries)
return -ENOMEM;
nr_entries = stack_trace_save_tsk_ptr(task, entries,
MAX_STACK_TRACE_DEPTH, 0);
printk("PID = %d, COMM = %s\n", task->pid, task->comm);
for (i = 0; i < nr_entries; i++) {
printk(" [<0>] %pB\n", (void *)entries[i]);
}
kfree(entries);
return 0;
}
int query_stack(void)
{
int ret = 0;
struct task_struct *g, *t;
do_each_thread(g, t) {
print_stack(t);
} while_each_thread(g, t);
return ret;
}
static int __init stack_trace_init(void)
{
int ret = 0;
ret = init_kallsyms_lookup_func();
if (ret < 0) {
printk("get kallsyms_lookup_name addr failed\n");
return -1;
}
stack_trace_save_tsk_ptr = find_func("stack_trace_save_tsk");
if (stack_trace_save_tsk_ptr == NULL) {
printk("get stack_trace_save_tsk addr failed\n");
return -1;
}
ret = query_stack();
if (ret < 0) {
printk("query stack failed\n");
return ret;
}
printk("stack trace init\n");
return 0;
}
static void __exit stack_trace_exit(void)
{
printk("stack trace exit\n");
}
module_init(stack_trace_init);
module_exit(stack_trace_exit);
MODULE_LICENSE("GPL");
调用栈打印示例。
[781162.407668] PID = 107085, COMM = sudo
[781162.407670] [<0>] do_sys_poll+0x486/0x610
[781162.407675] [<0>] __x64_sys_ppoll+0xac/0xe0
[781162.407679] [<0>] do_syscall_64+0x5c/0xc0
[781162.407684] [<0>] entry_SYSCALL_64_after_hwframe+0x61/0xcb
[781162.407696] PID = 107086, COMM = insmod
[781162.407698] [<0>] print_stack+0x58/0x90 [trace]
[781162.407705] [<0>] query_stack+0x2d/0x70 [trace]
[781162.407712] [<0>] stack_trace_init+0x55/0x1000 [trace]
[781162.407719] [<0>] do_one_initcall+0x48/0x1e0
[781162.407726] [<0>] do_init_module+0x52/0x230
[781162.407733] [<0>] load_module+0x138d/0x1610
[781162.407739] [<0>] __do_sys_finit_module+0xbf/0x120
[781162.407746] [<0>] __x64_sys_finit_module+0x1a/0x20
[781162.407752] [<0>] do_syscall_64+0x5c/0xc0
[781162.407757] [<0>] entry_SYSCALL_64_after_hwframe+0x61/0xcb