Kubernetes 的 POD 默认不支持多网卡设置,我们通过 Multus CNI 来为 Pod 建立多网络接口。
Multus 安装很简单
$ git clone https://github.com/intel/multus-cni.git && cd multus-cni
$ cat ./images/multus-daemonset.yml | kubectl apply -f -
启动一个 Multus 守护程序集,这会在每个节点上运行一个 pod,它在每个节点上放置一个 Multus 二进制文件/opt/cni/bin
读取按字典顺序(按字母顺序)的第一个配置文件/etc/cni/net.d,并在每个节点上为 Multus 创建一个新的配置文件/etc/cni/net.d/00-multus.conf,此配置是自动生成的,并且基于默认网络配置(假定为按字母顺序排列的第一个配置)
在每个节点上创建一个/etc/cni/net.d/multus.d目录,其中包含 Multus 访问 Kubernetes API 的身份验证信息。
安装 network-attachment-definition CRD,这个 CRD 存储着其他 CNI 的信息,在 Multus 看到使用此资源时,会调用设置的 CNI 去配置网络。
其中第二点,安装 Multus 会将原有的 CNI 作为代理的一部分放到自己的 CNI 配置中,当 work 时,也会使用之前的 CNI 创建第一张网卡。
如图:
配置变化, 可以看到 原有的 CNI 配置完全存在于 Multus 的 delegates 里,通过配置变化,containerd 调用 CNI 时实际调用的是 Multus 的 CNI。
/etc/cni/net.d/01-kube-ovn.conflist
{
"name":"kube-ovn",
"cniVersion":"0.3.1",
"plugins":[
{
"type":"kube-ovn",
"server_socket":"/run/openvswitch/kube-ovn-daemon.sock"
},
{
"type":"portmap",
"capabilities":{
"portMappings":true
}
}
]
}
cat /etc/cni/net.d/00-multus.conf | jq
{
"capabilities": {
"portMappings": true
},
"cniVersion": "0.3.1",
"delegates": [
{
"cniVersion": "0.3.1",
"name": "kube-ovn",
"plugins": [
{
"server_socket": "/run/openvswitch/kube-ovn-daemon.sock",
"type": "kube-ovn"
},
{
"capabilities": {
"portMappings": true
},
"type": "portmap"
}
]
}
],
"logLevel": "debug",
"logToStderr": true,
"kubeconfig": "/etc/cni/net.d/multus.d/multus.kubeconfig",
"name": "multus-cni-network",
"type": "multus"
}
希望两个网卡都使用 kube-ovn 为 pod 创建
Network-Attachment-Definition 创建
apiVersion: v1
kind: Namespace
metadata:
name: mec-nets
---
apiVersion: "k8s.cni.cncf.io/v1"
kind: NetworkAttachmentDefinition
metadata:
name: attachnet1
namespace: mec-nets
spec:
config: '{
"cniVersion": "0.3.0",
"type": "kube-ovn",
"server_socket": "/run/openvswitch/kube-ovn-daemon.sock",
"provider": "attachnet1.mec-nets.ovn"
}
创建 Pod
apiVersion: v1
kind: Pod
metadata:
name: pod9
annotations:
ovn.kubernetes.io/logical_switch: subnet-ipv4-ns-demo-100
k8s.v1.cni.cncf.io/networks: mec-nets/attachnet1 # 选择 NetworkAttachmentDefinition
attachnet1.mec-nets.ovn.kubernetes.io/logical_switch: subnet-ipv4-ns-demo-100
spec:
containers:
- name: pod9
command: ["/bin/ash", "-c", "trap : TERM INT; sleep 36000 & wait"]
image: rancher/curl
nodeSelector:
kubernetes.io/hostname: eci2
符合 CNI 的标准规范,在 containerd 调用 plugin (multus 二进制) 后,multus 进行处理
pkg/multus/miltus.go L557
func CmdAdd(args *skel.CmdArgs, exec invoke.Exec, kubeClient *k8s.ClientInfo) (cnitypes.Result, error) {
n, err := types.LoadNetConf(args.StdinData)
}
n, err := types.LoadNetConf(args.StdinData)
_, kc, err := k8s.TryLoadPodDelegates(pod, n, kubeClient, resourceMap)
if err != nil {
return nil, cmdErr(k8sArgs, "error loading k8s delegates k8s args: %v", err)
}
v1.multus-cni.io/default-network
是否携带 v1.multus-cni.io/default-network 键值对
pkg/k8sclient/k8sclient.go L323
func TryLoadPodDelegates(pod *v1.Pod, conf *types.NetConf, clientInfo *ClientInfo, resourceMap map[string]*types.ResourceInfo) (int, *ClientInfo, error) {
var err error
......
delegate, err := tryLoadK8sPodDefaultNetwork(clientInfo, pod, conf)
if err != nil {
return 0, nil, logging.Errorf("TryLoadPodDelegates: error in loading K8s cluster default network from pod annotation: %v", err)
}
if delegate != nil {
logging.Debugf("TryLoadPodDelegates: Overwrite the cluster default network with %v from pod annotations", delegate)
conf.Delegates[0] = delegate
}
k8s.v1.cni.cncf.io/networks
是否携带 k8s.v1.cni.cncf.io/networks 键值对, 可以看到 该配置可以多个。
networks, err := GetPodNetwork(pod)
parsePodNetworkAnnotation
拆解 k8s.v1.cni.cncf.io/networks
对应的值用于初始化 types.NetworkSelectionElement
并追加至切片:
func GetPodNetwork(pod *v1.Pod) ([]*types.NetworkSelectionElement, error) {
logging.Debugf("GetPodNetwork: %v", pod)
netAnnot := pod.Annotations[networkAttachmentAnnot]
defaultNamespace := pod.ObjectMeta.Namespace
if len(netAnnot) == 0 {
return nil, &NoK8sNetworkError{"no kubernetes network found"}
}
networks, err := parsePodNetworkAnnotation(netAnnot, defaultNamespace)
if err != nil {
return nil, err
}
return networks, nil
}
像上面示例 pod9 中 mec-nets/attachnet1, network 的 Name 是 attachnet1, Namespace 是 mec-nets, InterfaceRequest 可以命名网卡名称, 如果要 eth10 则命名为 mec-nets/attachnet1@eth10
if networks != nil {
delegates, err := GetNetworkDelegates(clientInfo, pod, networks, conf, resourceMap)
if err != nil {
if _, ok := err.(*NoK8sNetworkError); ok {
return 0, clientInfo, nil
}
return 0, nil, logging.Errorf("TryLoadPodDelegates: error in getting k8s network for pod: %v", err)
}
if err = conf.AddDelegates(delegates); err != nil {
return 0, nil, err
}
return len(delegates), clientInfo, err
}
getKubernetesDelegate
pkg/k8sclient/k8sclient.go L254
func getKubernetesDelegate(client *ClientInfo, net *types.NetworkSelectionElement, confdir string, pod *v1.Pod, resourceMap map[string]*types.ResourceInfo) (*types.DelegateNetConf, map[string]*types.ResourceInfo, error) {
logging.Debugf("getKubernetesDelegate: %v, %v, %s, %v, %v", client, net, confdir, pod, resourceMap)
customResource, err := client.NetClient.NetworkAttachmentDefinitions(net.Namespace).Get(context.TODO(), net.Name, metav1.GetOptions{})
if err != nil {
errMsg := fmt.Sprintf("cannot find a network-attachment-definition (%s) in namespace (%s): %v", net.Name, net.Namespace, err)
if client != nil {
client.Eventf(pod, v1.EventTypeWarning, "NoNetworkFound", errMsg)
}
return nil, resourceMap, logging.Errorf("getKubernetesDelegate: " + errMsg)
}
......
configBytes, err := netutils.GetCNIConfig(customResource, confdir)
if err != nil {
return nil, resourceMap, err
}
delegate, err := types.LoadDelegateNetConf(configBytes, net, deviceID, resourceName)
if err != nil {
return nil, resourceMap, err
}
return delegate, resourceMap, nil
}
解析后得到 NetworkAttachmentDefinition 的 spec 字段中的配置字符串:
{
"cniVersion": "0.3.0",
"type": "kube-ovn",
"server_socket": "/run/openvswitch/kube-ovn-daemon.sock",
"provider": "attachnet1.mec-nets.ovn"
}
使用 该 NAD 的 CNI delegate 使用 kube-ovn CNI。
在补充完配置结构体后,遍历它的 Delegates 字段:
/pkg/multus/multus.go L612
var result, tmpResult cnitypes.Result
var netStatus []nettypes.NetworkStatus
for idx, delegate := range n.Delegates {
ifName := getIfname(delegate, args.IfName, idx)
attachnet1/mec-nets@eth10
这种已配置的,会根据索引 + net 确定网卡名func getIfname(delegate *types.DelegateNetConf, argif string, idx int) string {
logging.Debugf("getIfname: %v, %s, %d", delegate, argif, idx)
if delegate.IfnameRequest != "" {
return delegate.IfnameRequest
}
if delegate.MasterPlugin {
// master plugin always uses the CNI-provided interface name
return argif
}
// Otherwise construct a unique interface name from the delegate's
// position in the delegate list
return fmt.Sprintf("net%d", idx)
}
if delegate.ConfListPlugin {
result, err = conflistAdd(rt, delegate.Bytes, multusNetconf, exec)
if err != nil {
return nil, err
}
} else {
result, err = confAdd(rt, delegate.Bytes, multusNetconf, exec)
if err != nil {
return nil, err
}
}
func conflistAdd(rt *libcni.RuntimeConf, rawnetconflist []byte, multusNetconf *types.NetConf, exec invoke.Exec) (cnitypes.Result, error) {
logging.Debugf("conflistAdd: %v, %s", rt, string(rawnetconflist))
// In part, adapted from K8s pkg/kubelet/dockershim/network/cni/cni.go
binDirs := filepath.SplitList(os.Getenv("CNI_PATH"))
binDirs = append([]string{multusNetconf.BinDir}, binDirs...)
cniNet := libcni.NewCNIConfigWithCacheDir(binDirs, multusNetconf.CNIDir, exec)
confList, err := libcni.ConfListFromBytes(rawnetconflist)
if err != nil {
return nil, logging.Errorf("conflistAdd: error converting the raw bytes into a conflist: %v", err)
}
result, err := cniNet.AddNetworkList(context.Background(), confList, rt)
if err != nil {
return nil, err
}
return result, nil
}
func conflistAdd(rt *libcni.RuntimeConf, rawnetconflist []byte, multusNetconf *types.NetConf, exec invoke.Exec) (cnitypes.Result, error) {
logging.Debugf("conflistAdd: %v, %s", rt, string(rawnetconflist))
// In part, adapted from K8s pkg/kubelet/dockershim/network/cni/cni.go
binDirs := filepath.SplitList(os.Getenv("CNI_PATH"))
binDirs = append([]string{multusNetconf.BinDir}, binDirs...)
cniNet := libcni.NewCNIConfigWithCacheDir(binDirs, multusNetconf.CNIDir, exec)
confList, err := libcni.ConfListFromBytes(rawnetconflist)
if err != nil {
return nil, logging.Errorf("conflistAdd: error converting the raw bytes into a conflist: %v", err)
}
result, err := cniNet.AddNetworkList(context.Background(), confList, rt)
if err != nil {
return nil, err
}
return result, nil
}
conflistAdd: &{24a1efa35675f1382fa12a5849c0597bf4bcb7e4c1d7764dc2b5f3701b57b11c /var/run/netns/cni-3b58095c-52a9-c2c4-d9ba-ad30f44fee13 eth0 [[IgnoreUnknown true] [K8S_POD_NAMESPACE default] [K8S_POD_NAME pod9] [K8S_POD_INFRA_CONTAINER_ID 24a1efa35675f1382fa12a5849c0597bf4bcb7e4c1d7764dc2b5f3701b57b11c] [K8S_POD_UID 3da6f436-bb79-461e-8595-6779f972ca76] [K8S_POD_NAMESPACE default] [K8S_POD_NAME pod9] [K8S_POD_INFRA_CONTAINER_ID 24a1efa35675f1382fa12a5849c0597bf4bcb7e4c1d7764dc2b5f3701b57b11c] [K8S_POD_UID 3da6f436-bb79-461e-8595-6779f972ca76] [IgnoreUnknown 1]] map[] }, {"cniVersion":"0.3.1","name":"kube-ovn","plugins":[{"server_socket":"/run/openvswitch/kube-ovn-daemon.sock","type":"kube-ovn"},{"capabilities":{"portMappings":true},"type":"portmap"}]}
confAdd: &{24a1efa35675f1382fa12a5849c0597bf4bcb7e4c1d7764dc2b5f3701b57b11c /var/run/netns/cni-3b58095c-52a9-c2c4-d9ba-ad30f44fee13 net1 [[IgnoreUnknown true] [K8S_POD_NAMESPACE default] [K8S_POD_NAME pod9] [K8S_POD_INFRA_CONTAINER_ID 24a1efa35675f1382fa12a5849c0597bf4bcb7e4c1d7764dc2b5f3701b57b11c] [K8S_POD_UID 3da6f436-bb79-461e-8595-6779f972ca76] [K8S_POD_NAMESPACE default] [K8S_POD_NAME pod9] [K8S_POD_INFRA_CONTAINER_ID 24a1efa35675f1382fa12a5849c0597bf4bcb7e4c1d7764dc2b5f3701b57b11c] [K8S_POD_UID 3da6f436-bb79-461e-8595-6779f972ca76] [IgnoreUnknown 1]] map[] }, {"cniVersion":"0.3.0","name":"attachnet1","provider":"attachnet1.mec-nets.ovn","server_socket":"/run/openvswitch/kube-ovn-daemon.sock","type":"kube-ovn"}