#include#include #include static int __init test_init(void) { printk("<1>Hello, World! from the kernel space...\n"); return 0; } static void __exit test_exit(void) { printk("<1>Good Bye, World! leaving kernel space...\n"); } module_init(test_init); // 注册模块 module_exit(test_exit); // 注销模块 MODULE_LICENSE("GPL");
obj-m := test.o CURRENT_PATH := $(shell pwd) LINUX_KERNEL := $(shell uname -r) LINUX_KERNEL_PATH := /usr/src/linux-headers-$(LINUX_KERNEL) all: make -C $(LINUX_KERNEL_PATH) M=$(CURRENT_PATH) modules clean: make -C $(LINUX_KERNEL_PATH) M=$(CURRENT_PATH) clean
sudo apt-get install libelf-dev
sudo insmod test.ko
lsmod | grep test
sudo rmmod test
因为修改.ko文件是修改的linux内核文件,所以被bios的安全启动保护给禁止了而已。
重启进入BIOS,修改安全启动选项,禁用安全启动选项,开机后重新sudo运行之前的指令。
Netfilter是linux内核中的数据包过滤框架,2.4版本及其后的内核包含该框架,该框架使数据包过滤、网络地址转换(NAT)和其他数据包修改功能成为可能。Netfilter框架由之前的ipfwadm和ipchains系统改进并重新设计而来,iptables工具与其紧密关联并依赖其在内核完成相应功能。
Netfilter框架由一组内核中的hook点组成,内核模块可以在网络栈的这些hook点上注册回调函数,当数据包穿过网络栈上相应的hook点时注册的回调函数将被调用,回调函数可以对参数中的数据包做需要的处理并裁决数据包后续的处理方式。
但是需要注意的是,不同的内核版本,API 也不同,确实是比较难受。
netfilter是一个通用的数据包过滤框架,支持多种协议族的数据包过滤。
本文目前只关注IPv4
IPV4下共5个hook点
各个hook点调用时机如下:

* Responses from hook functions. */ #define NF_DROP 0 #define NF_ACCEPT 1 #define NF_STOLEN 2 #define NF_QUEUE 3 #define NF_REPEAT 4 #define NF_STOP 5 #define NF_MAX_VERDICT NF_STOP /* we overload the higher bits for encoding auxiliary data such as the queue * number or errno values. Not nice, but better than additional function * arguments. */ #define NF_VERDICT_MASK 0x000000ff /* extra verdict flags have mask 0x0000ff00 */ #define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000 /* queue number (NF_QUEUE) or errno (NF_DROP) */ #define NF_VERDICT_QMASK 0xffff0000 #define NF_VERDICT_QBITS 16 #define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE) #define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP)
NFQUEUE是iptables和ip6tables的一种target,用于将数据包委托给用户态应用程序裁决如何处理数据包。用户态应用程序可以使用libnetfilter_queue库连接到该队列获取包含了数据包的内核消息,并必须为数据包做出一个裁决。
下面是一个例子,将udp目的端口10010的发出包置入队列10010交由用户态应用程序裁决。
sudo iptables -A OUTPUT -p udp --dport 10010 -j NFQUEUE --queue-num 10010
原理
iptables依赖内核netfilter框架完成功能,是netfilter框架的一个用户态工具。NFQUEUE同样依赖netfilter框架并且需要内核包含nfnetlink_queue子系统(2.6.14及以后的内核版本)。
当一个数据包命中规则到达NFQUEUE target,数据包在内核中被放入以数字序号区分的队列,队列由固定长度的链表实现,链表中保存数据包及元数据(内核skb结构),当数据包收到用户态裁决时会从队列中释放,每一个数据包必须有一个裁决,队列满时新到达的数据包将被内核做drop处理。
用户态应用程序可以读取多个数据包以做出裁决,数据包的裁决可以与读取顺序无关,过慢的裁决对导致内核队列满,内核将drop新的数据包。
内核和用户态程序使用nfnetlink协议通信。这是一个完全基于消息的协议,不包含任何共享内存。当一个数据包入队列,内核向socket发送一个nfnetlink格式的消息,消息包含数据包数据和相关信息,用户态程序读取这个socket就可以获取消息。用户态程序裁决一个数据包时,需要组织一个nfnetlink格式的消息,消息中包含数据包在队列中的索引号,然后将消息发送给socket。
使用libnetfilter_queue
sudo apt install libnetfilter_queue-devel
样例
nfqueue_example.c 代码中读取队列号10010中的数据,对所有读取到的tcp数据包做ACCEPT裁决,对udp目的端口10086的数据包做DROP裁决,对udp目的端口10010的数据包将udp数据全部修改为字符'h'并以'\n'结尾后返回修改后的数据及ACCEPT裁决。
#include#include #include #include #include #include #include #include #include #include #include #include #include #include #include #define QUEUE_NUM 10010 static uint16_t checksum(uint32_t sum, uint16_t *buf, int size) { while (size > 1) { sum += *buf++; size -= sizeof(uint16_t); } if (size) sum += *(uint8_t *)buf; sum = (sum >> 16) + (sum & 0xffff); sum += (sum >>16); return (uint16_t)(~sum); } static uint16_t checksum_tcpudp_ipv4(struct iphdr *iph) { uint32_t sum = 0; uint32_t iph_len = iph->ihl*4; uint32_t len = ntohs(iph->tot_len) - iph_len; uint8_t *payload = (uint8_t *)iph + iph_len; sum += (iph->saddr >> 16) & 0xFFFF; sum += (iph->saddr) & 0xFFFF; sum += (iph->daddr >> 16) & 0xFFFF; sum += (iph->daddr) & 0xFFFF; sum += htons(iph->protocol); // 这是一个bug //sum += htons(IPPROTO_TCP); sum += htons(len); return checksum(sum, (uint16_t *)payload, len); } static void udp_compute_checksum_ipv4(struct udphdr *udph, struct iphdr *iph) { /* checksum field in header needs to be zero for calculation. */ udph->check = 0; udph->check = checksum_tcpudp_ipv4(iph); } int cb(struct nfq_q_handle *qh, struct nfgenmsg *nfmsg, struct nfq_data *nfad, void *data) { uint32_t id; struct nfqnl_msg_packet_hdr *ph; unsigned char *payload; int r; struct iphdr *iph; struct udphdr *udph; struct tcphdr *tcph; char saddr_str[16]; char daddr_str[16]; struct in_addr tmp_in_addr; ph = nfq_get_msg_packet_hdr(nfad); if (ph) { id = ntohl(ph->packet_id); r = nfq_get_payload(nfad, &payload); if (r >= sizeof(*iph)) { iph = (struct iphdr *)payload; tmp_in_addr.s_addr = iph->saddr; strcpy(saddr_str, inet_ntoa(tmp_in_addr)); tmp_in_addr.s_addr = iph->daddr; strcpy(daddr_str, inet_ntoa(tmp_in_addr)); if (iph->protocol == IPPROTO_UDP) { if (iph->ihl * 4 + sizeof(*udph) <= r) { udph = (struct udphdr *)(payload + iph->ihl * 4); if (ntohs(udph->dest) == 10010) { if (iph->ihl * 4 + sizeof(*udph) < r && ntohs(iph->tot_len) - iph->ihl * 4 == ntohs(udph->len) && ntohs(udph->len) - sizeof(*udph) > 0) { int offset = iph->ihl * 4 + sizeof(*udph); memset(payload + offset, 'h', ntohs(udph->len) - sizeof(*udph)); memset(payload + iph->ihl * 4 + ntohs(udph->len) - 1, '\n', 1); // 由于修改了udp数据包内容,因此需要重新计算udp校验和。 // libnetfilter_queue在udp校验和计算时有bug,因此计算校验和代码拷贝到本文件后修复使用。 //nfq_udp_compute_checksum_ipv4(udph, iph); udp_compute_checksum_ipv4(udph, iph); printf("ACCEPT & modified protocol:udp %s:%u -> %s:%u\n", saddr_str, ntohs(udph->source), daddr_str, ntohs(udph->dest)); } else { printf("ACCEPT & !modified protocol:udp %s:%u -> %s:%u\n", saddr_str, ntohs(udph->source), daddr_str, ntohs(udph->dest)); } nfq_set_verdict(qh, id, NF_ACCEPT, r, payload); } else if (ntohs(udph->dest) == 10086) { printf("DROP protocol:udp %s:%u -> %s:%u\n", saddr_str, ntohs(udph->source), daddr_str, ntohs(udph->dest)); nfq_set_verdict(qh, id, NF_DROP, 0, NULL); } else { printf("ACCEPT protocol:udp %s:%u -> %s:%u\n", saddr_str, ntohs(udph->source), daddr_str, ntohs(udph->dest)); nfq_set_verdict(qh, id, NF_ACCEPT, 0, NULL); } } else { printf("ACCEPT protocol:udp %s -> %s\n", saddr_str, daddr_str); nfq_set_verdict(qh, id, NF_ACCEPT, 0, NULL); } } else if (iph->protocol == IPPROTO_TCP) { if (iph->ihl * 4 + sizeof(*tcph) <= r) { tcph = (struct tcphdr *)(payload + iph->ihl *4); printf("ACCEPT protocol:tcp %s:%u -> %s:%u\n", saddr_str, ntohs(tcph->source), daddr_str, ntohs(tcph->dest)); nfq_set_verdict(qh, id, NF_ACCEPT, 0, NULL); } else { printf("ACCEPT protocol:tcp %s -> %s\n", saddr_str, daddr_str); nfq_set_verdict(qh, id, NF_ACCEPT, 0, NULL); } } else { printf("ACCEPT protocol:%d %s -> %s\n", iph->protocol, saddr_str, daddr_str); nfq_set_verdict(qh, id, NF_ACCEPT, 0, NULL); } } else { printf("ACCEPT unknown protocol\n"); nfq_set_verdict(qh, id, NF_ACCEPT, 0, NULL); } } return 0; } int main() { struct nfq_handle *h; struct nfq_q_handle *qh; int r; char buf[10240]; h = nfq_open(); if (h == NULL) { perror("nfq_open error"); goto end; } if (nfq_unbind_pf(h, AF_INET) != 0) { perror("nfq_unbind_pf error"); goto end; } if (nfq_bind_pf(h, AF_INET) != 0) { perror("nfq_bind_pf error"); goto end; } qh = nfq_create_queue(h, QUEUE_NUM, &cb, NULL); if (qh == NULL) { perror("nfq_create_queue error"); goto end; } if (nfq_set_mode(qh, NFQNL_COPY_PACKET, 0xffff) != 0) { perror("nfq_set_mod error"); goto end; } while(1) { r = recv(nfq_fd(h), buf, sizeof(buf), 0); if (r == 0) { printf("recv return 0. exit"); break; } else if (r < 0) { perror("recv error"); break; } else { nfq_handle_packet(h, buf, r); } } end: if (qh) nfq_destroy_queue(qh); if (h) nfq_close(h); return 0; }
编译代码并运行
gcc -Wall -g -l netfilter_queue nfqueue_example.c sudo ./a.out
在另外的会话中设置iptables。在filter表OUTPUT链中添加规则,将tcp和udp目的端口10010、10086的数据包放入NFQUEUE队列号10010中。
sudo iptables -A OUTPUT -p udp --dport 10010 -j NFQUEUE --queue-num 10010 sudo iptables -A OUTPUT -p udp --dport 10086 -j NFQUEUE --queue-num 10010 sudo iptables -A OUTPUT -p tcp --dport 10010 -j NFQUEUE --queue-num 10010 sudo iptables -A OUTPUT -p tcp --dport 10086 -j NFQUEUE --queue-num 10010
Ps:测试环境使用完成后记得清理iptables规则。
注册回调函数需要nf_hook_ops结构体,里面定义了注册的回调函数、协议族、hook点和优先级(升序排列执行)。
struct nf_hook_ops {
struct list_head list;
/* User fills in from here down. */
nf_hookfn *hook;
struct module *owner;
void *priv;
u_int8_t pf;
unsigned int hooknum;
/* Hooks are ordered in ascending priority. */
int priority;
/* Reserved for use in the future RHEL versions. Set to zero. */
unsigned long __rht_reserved1;
unsigned long __rht_reserved2;
unsigned long __rht_reserved3;
unsigned long __rht_reserved4;
unsigned long __rht_reserved5;
};
struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
所有的回调函数都注册在二维数组全局变量nf_hooks中。list_head是linux内核中常用的双向链表结构,这里不关注。数组的第一个维度是注册回调函数的协议族,第二个维度是注册回调函数的hook点,也就是每一个协议族的每一个hook点都是一个双向链表连接的一组回调函数。NF_MAX_HOOKS值为8,不确定什么协议族有8个hook点。
nf_register_hook根据协议族和hook点确定nf_hooks中的链表,遍历链表根据nf_hook_ops中的优先级插入到链表的合适位置。nf_register_hooks只是nf_register_hook的循环包装,
注销回调函数时只是将该nf_hook_ops结构从链表中移出。
PS: ubuntu 18.04.2 之后没有nf_register_hook,而是换成了nf_register_net_hook,注销函数也改成了 nf_unregister_net_hook(&init_net, reg)
以IPV4协议族收包为例,在确定网络层协议为IPV4协议后,内核进入ip_rcv函数。经过一系列检查后,最后运行到NF_HOOK,选择NFPROTO_IPV4协议族、NF_INET_PRE_ROUTINGhook点,同时设定协议栈下一步处理函数ip_rcv_finish(这个函数将在该hook点回调函数处理完毕并允许下一步逻辑执行时被调用,参考前文裁决值部分。
由NF_HOOK开始的netfilter框架执行代码贴在这里,可以看到代码简短逻辑清晰,但能够支持内核模块(比如ip_tables模块)通过注册各种回调函数完成复杂功能。netfilter作为内核数据包过滤框架,很好的体现了“提供机制,而不是策略”的设计思想。
/*
* Main IP Receive routine.
*/
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
/*
* ....................
*/
return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb,
dev, NULL,
ip_rcv_finish);
}
/**
* nf_hook_thresh - call a netfilter hook
*
* Returns 1 if the hook has allowed the packet to pass. The function
* okfn must be invoked by the caller in this case. Any other return
* value indicates the packet has been consumed by the hook.
*/
static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
struct sock *sk,
struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sock *, struct sk_buff *),
int thresh)
{
if (nf_hooks_active(pf, hook)) {
struct nf_hook_state state;
nf_hook_state_init(&state, hook, thresh, pf,
indev, outdev, sk, okfn);
return nf_hook_slow(skb, &state);
}
return 1;
}
static inline int
NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sock *sk,
struct sk_buff *skb, struct net_device *in,
struct net_device *out,
int (*okfn)(struct sock *, struct sk_buff *), int thresh)
{
int ret = nf_hook_thresh(pf, hook, sk, skb, in, out, okfn, thresh);
if (ret == 1)
ret = okfn(sk, skb);
return ret;
}
static inline int
NF_HOOK(uint8_t pf, unsigned int hook, struct sock *sk, struct sk_buff *skb,
struct net_device *in, struct net_device *out,
int (*okfn)(struct sock *, struct sk_buff *))
{
return NF_HOOK_THRESH(pf, hook, sk, skb, in, out, okfn, INT_MIN);
}
unsigned int nf_iterate(struct list_head *head,
struct sk_buff *skb,
struct nf_hook_state *state,
struct nf_hook_ops **elemp)
{
unsigned int verdict;
/*
* The caller must not block between calls to this
* function because of risk of continuing from deleted element.
*/
list_for_each_entry_continue_rcu((*elemp), head, list) {
if (state->thresh > (*elemp)->priority)
continue;
/* Optimization: we don't need to hold module
reference here, since function can't sleep. --RR */
repeat:
verdict = (*elemp)->hook(*elemp, skb, state->in, state->out,
state);
if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
if (unlikely((verdict & NF_VERDICT_MASK)
> NF_MAX_VERDICT)) {
NFDEBUG("Evil return from %p(%u).\n",
(*elemp)->hook, state->hook);
continue;
}
#endif
if (verdict != NF_REPEAT)
return verdict;
goto repeat;
}
}
return NF_ACCEPT;
}
/* Returns 1 if okfn() needs to be executed by the caller,
* -EPERM for NF_DROP, 0 otherwise. */
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
{
struct nf_hook_ops *elem;
unsigned int verdict;
int ret = 0;
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();
elem = list_entry_rcu(&nf_hooks[state->pf][state->hook],
struct nf_hook_ops, list);
next_hook:
verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state,
&elem);
if (verdict == NF_ACCEPT || verdict == NF_STOP) {
ret = 1;
} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
kfree_skb(skb);
ret = NF_DROP_GETERR(verdict);
if (ret == 0)
ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
int err = nf_queue(skb, elem, state,
verdict >> NF_VERDICT_QBITS);
if (err < 0) {
if (err == -ECANCELED)
goto next_hook;
if (err == -ESRCH &&
(verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
goto next_hook;
kfree_skb(skb);
}
}
rcu_read_unlock();
return ret;
}
4.3以下的内核版本是通过nf_register_hook来注册,nf_unregister_hook来注销;
4.3-4.13之间版本,nf_register_hook里面会调用nf_register_net_hook来逐个net命名空间注册,此时可以使用这俩函数中的任一个来注册,注销对应nf_unregister_hook/nf_unregister_net_hook;
4.13及以上版本内核是通过nf_register_net_hook/nf_unregister_net_hook来注册/注销,删掉了nf_register_hook函数。

少了一个struct nf_hook_ops 结构参数,但是多了一个自定义万能参数, 如果定义了多个促发点,可以使用void* ptr 的参数判断是促发了哪一个促发点。
#include#include #include #include #include #include #include MODULE_LICENSE("GPL"); MODULE_AUTHOR("David Zong"); MODULE_VERSION("0.1"); #define MY_QUEUE_NUM 10010 static char *hook_name(int hook) { switch (hook) { case NF_INET_PRE_ROUTING: return "pre_routing"; case NF_INET_LOCAL_IN: return "local_in"; case NF_INET_FORWARD: return "forward"; case NF_INET_LOCAL_OUT: return "local_out"; case NF_INET_POST_ROUTING: return "post_routing"; } return NULL; } unsigned int generic_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { printk(KERN_INFO "generic_hook coming\n"); struct iphdr *iph; struct udphdr *udph; struct udphdr buf; iph = ip_hdr(skb); if (iph->protocol != IPPROTO_UDP) { return NF_ACCEPT; } if (skb->len < iph->ihl * 4 + sizeof(*udph)) { return NF_ACCEPT; } if (skb_headlen(skb) < iph->ihl * 4 + sizeof(*udph)) { udph = (void *)iph + iph->ihl * 4; } else { skb_copy_bits(skb, iph->ihl * 4, &buf, sizeof(*udph)); udph = &buf; } if (ntohs(udph->source) == 10086 || ntohs(udph->dest) == 10086) { if (priv== NF_INET_POST_ROUTING || priv== NF_INET_LOCAL_IN) { printk("%-15s udp drop %pI4:%d -> %pI4:%d\n", hook_name((int)priv), &iph->saddr, ntohs(udph->source), &iph->daddr, ntohs(udph->dest)); return NF_DROP; } else { printk("%-15s udp accept %pI4:%d -> %pI4:%d\n", hook_name((int)priv), &iph->saddr, ntohs(udph->source), &iph->daddr, ntohs(udph->dest)); return NF_ACCEPT; } } else if (ntohs(udph->source) == 10010 || ntohs(udph->dest) == 10010) { if (priv== NF_INET_POST_ROUTING || priv== NF_INET_LOCAL_IN) { printk("%-15s udp queue %pI4:%d -> %pI4:%d\n", hook_name((int)priv), &iph->saddr, ntohs(udph->source), &iph->daddr, ntohs(udph->dest)); return NF_QUEUE_NR(MY_QUEUE_NUM); } else { printk("%-15s udp accept %pI4:%d -> %pI4:%d\n", hook_name((int)priv), &iph->saddr, ntohs(udph->source), &iph->daddr, ntohs(udph->dest)); return NF_ACCEPT; } } return NF_ACCEPT; } static struct nf_hook_ops my_hooks[] = { { .hook = generic_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_LAST, .priv = NF_INET_PRE_ROUTING, }, { .hook = generic_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_LAST, .priv = NF_INET_PRE_ROUTING, }, { .hook = generic_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_FORWARD, .priority = NF_IP_PRI_LAST, .priv = NF_INET_PRE_ROUTING, }, { .hook = generic_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_LAST, .priv = NF_INET_PRE_ROUTING, }, { .hook = generic_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_LAST, .priv = NF_INET_PRE_ROUTING, }, }; int init_module(void) { printk("my init.\n"); return nf_register_net_hooks(&init_net,my_hooks, ARRAY_SIZE(my_hooks)); } void cleanup_module(void) { printk("my exit.\n"); nf_unregister_net_hooks(&init_net,my_hooks, ARRAY_SIZE(my_hooks)); }
obj-m := test.o
PWD:=$(shell pwd)
KVER:=$(shell uname -r)
KDIR:=/lib/modules/$(KVER)/build
EXTRA_CFLAGS += -Wall -g
all:
$(MAKE) -C $(KDIR) M=$(PWD) modules
clean:
$(MAKE) -C $(KDIR) M=$(PWD) clean
使用nc 命令启动UDP服务器
nc -l -u 10086
使用python 脚本做客户端
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import socket
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
addr = ("192.168.10.7", 10086)
while True:
data = input("Please input your name: ")
if not data:
continue
s.sendto(data.encode(), addr)
response, addr = s.recvfrom(1024)
print(response.decode())
if data == "exit":
print("Session is over from the server %s:%s\n" % addr)
break
s.close()
结果
