一,什么是kni,为什么要有kni
Kni(Kernel NIC Interface)内核网卡接口,是DPDK允许用户态和内核态交换报文的解决方案,模拟了一个虚拟的网口,提供dpdk的应用程序和linux内核之间通讯。kni接口允许报文从用户态接收后转发到linu协议栈去。
为什么要弄一个kni接口,虽然dpdk的高速转发性能很出色,但是也有自己的一些缺点,比如没有协议栈就是其中一项缺陷,当然也可能当时设计时就将没有将协议栈考虑进去,毕竟协议栈需要将报文转发处理,可能会使
处理报文的能力大大降低。
直接上图:
上图是kni的mbuf使用流程图,也可以看出报文的流向,因为报文在代码中其实就是一个个内存指针。其中rx_q右边是用户态,左边是内核态。最后通过调用netif_rx()将报文送入linux协议栈,这其中需要将dpdk的mbuf转换成skb_buf。
当linux向kni端口发送报文时,调用回调函数kni_net_tx(),然后报文经过转换之后发送到端口上。
二:主要代码分析:
1,和igb uio模块一样,kni模块分成内核以及用户态代码,内核模块在编译出来之后为rte_kni.ko,首先插入内核,dpdk提供了一个用户态的例子。首先看下kni内核模块代码:
在kni_misc.c中,ko代码入口为
module_init(kni_init);
可以看到函数从kni_init进入:
1 static int __init
2 kni_init(void)
3 {
4 int rc;
5
6 KNI_PRINT("######## DPDK kni module loading ########\n");
7
8 if (kni_parse_kthread_mode() < 0) { //kni的线程模式、单线程还是多线程
9 KNI_ERR("Invalid parameter for kthread_mode\n");
10 return -EINVAL;
11 }
12
13 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
14 rc = register_pernet_subsys(&kni_net_ops);
15 #else
16 rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
17 #endif
18 if (rc)
19 return -EPERM;
20
21 rc = misc_register(&kni_misc);
22 if (rc != 0) {
23 KNI_ERR("Misc registration failed\n");
24 goto out;
25 }
26
27 /* Configure the lo mode according to the input parameter */
28 kni_net_config_lo_mode(lo_mode);
29
30 KNI_PRINT("######## DPDK kni module loaded ########\n");
31
32 return 0;
33
34 out:
35 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
36 unregister_pernet_subsys(&kni_net_ops);
37 #else
38 register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
39 #endif
40 return rc;
41 }
代码比较简单,首先选择kni的线程模式,分为单线程还是多线程,所谓单线程是指所有的kni端口收发都由一个线程守护,多线程只是每一个kni端口分为由一个线程守护,这部分是在插入模块时带入参数选择。
接着调用注册函数misc_register,将kni注册为一个混杂设备。其中kni_misc结构体里面定义了该混杂设备的一些操作
1 static struct miscdevice kni_misc = {
2 .minor = MISC_DYNAMIC_MINOR,
3 .name = KNI_DEVICE,
4 .fops = &kni_fops,
5 };
这里主要看.fops里面的结构体
1 static struct file_operations kni_fops = {
2 .owner = THIS_MODULE,
3 .open = kni_open,
4 .release = kni_release,
5 .unlocked_ioctl = (void *)kni_ioctl,
6 .compat_ioctl = (void *)kni_compat_ioctl,
7 };
这里涉及的主要操作有kni_open,kni_release,以及kni_ioctl,分别对应几个函数
1 static int
2 kni_open(struct inode *inode, struct file *file)
3 {
4 struct net *net = current->nsproxy->net_ns;
5 struct kni_net *knet = net_generic(net, kni_net_id);
6
7 /* kni device can be opened by one user only per netns */
8 if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
9 return -EBUSY;
10
11 /* Create kernel thread for single mode */
12 if (multiple_kthread_on == 0) {
13 KNI_PRINT("Single kernel thread for all KNI devices\n");
14 /* Create kernel thread for RX */
15 knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,
16 "kni_single");
17 if (IS_ERR(knet->kni_kthread)) {
18 KNI_ERR("Unable to create kernel threaed\n");
19 return PTR_ERR(knet->kni_kthread);
20 }
21 } else
22 KNI_PRINT("Multiple kernel thread mode enabled\n");
23
24 file->private_data = get_net(net);
25 KNI_PRINT("/dev/kni opened\n");
26
27 return 0;
28 }
kni_open时如果是单线程模式则会创建一个内核线程,并打开dev/kni,这个时候在host的dev下能看到kni文件夹
1 static int
2 kni_ioctl(struct inode *inode,
3 unsigned int ioctl_num,
4 unsigned long ioctl_param)
5 {
6 int ret = -EINVAL;
7 struct net *net = current->nsproxy->net_ns;
8
9 KNI_DBG("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
10
11 /*
12 * Switch according to the ioctl called
13 */
14 switch (_IOC_NR(ioctl_num)) {
15 case _IOC_NR(RTE_KNI_IOCTL_TEST):
16 /* For test only, not used */
17 break;
18 case _IOC_NR(RTE_KNI_IOCTL_CREATE):
19 ret = kni_ioctl_create(net, ioctl_num, ioctl_param);
20 break;
21 case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
22 ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
23 break;
24 default:
25 KNI_DBG("IOCTL default\n");
26 break;
27 }
28
29 return ret;
30 }
kni_ioctl函数是与用户态通信的一个接口,主要是的是kni_ioctl_create函数:
1 static int
2 kni_ioctl_create(struct net *net,
3 unsigned int ioctl_num, unsigned long ioctl_param)
4 {
5 struct kni_net *knet = net_generic(net, kni_net_id);
6 int ret;
7 struct rte_kni_device_info dev_info;
8 struct pci_dev *pci = NULL;
9 struct pci_dev *found_pci = NULL;
10 struct net_device *net_dev = NULL;
11 struct net_device *lad_dev = NULL;
12 struct kni_dev *kni, *dev, *n;
13
14 printk(KERN_INFO "KNI: Creating kni...\n");
15 /* Check the buffer size, to avoid warning */
16 if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
17 return -EINVAL;
18
19 /* Copy kni info from user space */
20 ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
21 if (ret) {
22 KNI_ERR("copy_from_user in kni_ioctl_create");
23 return -EIO;
24 }
25
26 /**
27 * Check if the cpu core id is valid for binding,
28 * for multiple kernel thread mode.
29 */
30 if (multiple_kthread_on && dev_info.force_bind &&
31 !cpu_online(dev_info.core_id)) {
32 KNI_ERR("cpu %u is not online\n", dev_info.core_id);
33 return -EINVAL;
34 }
35
36 /* Check if it has been created */
37 down_read(&knet->kni_list_lock);
38 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
39 if (kni_check_param(dev, &dev_info) < 0) {
40 up_read(&knet->kni_list_lock);
41 return -EINVAL;
42 }
43 }
44 up_read(&knet->kni_list_lock);
45
46 net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
47 #ifdef NET_NAME_UNKNOWN
48 NET_NAME_UNKNOWN,
49 #endif
50 kni_net_init);
51 if (net_dev == NULL) {
52 KNI_ERR("error allocating device \"%s\"\n", dev_info.name);
53 return -EBUSY;
54 }
55
56 dev_net_set(net_dev, net);
57
58 kni = netdev_priv(net_dev);
59
60 kni->net_dev = net_dev;
61 kni->group_id = dev_info.group_id;
62 kni->core_id = dev_info.core_id;
63 strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
64
65 /* Translate user space info into kernel space info */
66 kni->tx_q = phys_to_virt(dev_info.tx_phys);
67 kni->rx_q = phys_to_virt(dev_info.rx_phys);
68 kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
69 kni->free_q = phys_to_virt(dev_info.free_phys);
70
71 kni->req_q = phys_to_virt(dev_info.req_phys);
72 kni->resp_q = phys_to_virt(dev_info.resp_phys);
73 kni->sync_va = dev_info.sync_va;
74 kni->sync_kva = phys_to_virt(dev_info.sync_phys);
75
76 kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys);
77 kni->mbuf_va = dev_info.mbuf_va;
78
79 #ifdef RTE_KNI_VHOST
80 kni->vhost_queue = NULL;
81 kni->vq_status = BE_STOP;
82 #endif
83 kni->mbuf_size = dev_info.mbuf_size;
84
85 KNI_PRINT("tx_phys: 0x%016llx, tx_q addr: 0x%p\n",
86 (unsigned long long) dev_info.tx_phys, kni->tx_q);
87 KNI_PRINT("rx_phys: 0x%016llx, rx_q addr: 0x%p\n",
88 (unsigned long long) dev_info.rx_phys, kni->rx_q);
89 KNI_PRINT("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n",
90 (unsigned long long) dev_info.alloc_phys, kni->alloc_q);
91 KNI_PRINT("free_phys: 0x%016llx, free_q addr: 0x%p\n",
92 (unsigned long long) dev_info.free_phys, kni->free_q);
93 KNI_PRINT("req_phys: 0x%016llx, req_q addr: 0x%p\n",
94 (unsigned long long) dev_info.req_phys, kni->req_q);
95 KNI_PRINT("resp_phys: 0x%016llx, resp_q addr: 0x%p\n",
96 (unsigned long long) dev_info.resp_phys, kni->resp_q);
97 KNI_PRINT("mbuf_phys: 0x%016llx, mbuf_kva: 0x%p\n",
98 (unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva);
99 KNI_PRINT("mbuf_va: 0x%p\n", dev_info.mbuf_va);
100 KNI_PRINT("mbuf_size: %u\n", kni->mbuf_size);
101
102 KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x\n",
103 dev_info.bus,
104 dev_info.devid,
105 dev_info.function,
106 dev_info.vendor_id,
107 dev_info.device_id);
108
109 pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL);
110
111 /* Support Ethtool */
112 while (pci) {
113 KNI_PRINT("pci_bus: %02x:%02x:%02x \n",
114 pci->bus->number,
115 PCI_SLOT(pci->devfn),
116 PCI_FUNC(pci->devfn));
117
118 if ((pci->bus->number == dev_info.bus) &&
119 (PCI_SLOT(pci->devfn) == dev_info.devid) &&
120 (PCI_FUNC(pci->devfn) == dev_info.function)) {
121 found_pci = pci;
122 switch (dev_info.device_id) {
123 #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
124 #include <rte_pci_dev_ids.h>
125 ret = igb_kni_probe(found_pci, &lad_dev);
126 break;
127 #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) \
128 case (dev):
129 #include <rte_pci_dev_ids.h>
130 ret = ixgbe_kni_probe(found_pci, &lad_dev);
131 break;
132 default:
133 ret = -1;
134 break;
135 }
136
137 KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p\n",
138 pci, lad_dev);
139 if (ret == 0) {
140 kni->lad_dev = lad_dev;
141 kni_set_ethtool_ops(kni->net_dev);
142 } else {
143 KNI_ERR("Device not supported by ethtool");
144 kni->lad_dev = NULL;
145 }
146
147 kni->pci_dev = found_pci;
148 kni->device_id = dev_info.device_id;
149 break;
150 }
151 pci = pci_get_device(dev_info.vendor_id,
152 dev_info.device_id, pci);
153 }
154 if (pci)
155 pci_dev_put(pci);
156
157 if (kni->lad_dev)
158 memcpy(net_dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
159 else
160 /*
161 * Generate random mac address. eth_random_addr() is the newer
162 * version of generating mac address in linux kernel.
163 */
164 //random_ether_addr(net_dev->dev_addr);
165 memcpy(net_dev->dev_addr, &dev_info.kni_mac,ETH_ALEN);
166
167
168 ret = register_netdev(net_dev);
169 if (ret) {
170 KNI_ERR("error %i registering device \"%s\"\n",
171 ret, dev_info.name);
172 kni_dev_remove(kni);
173 return -ENODEV;
174 }
175
176 #ifdef RTE_KNI_VHOST
177 kni_vhost_init(kni);
178 #endif
179
180 /**
181 * Create a new kernel thread for multiple mode, set its core affinity,
182 * and finally wake it up.
183 */
184 if (multiple_kthread_on) {
185 kni->pthread = kthread_create(kni_thread_multiple,
186 (void *)kni,
187 "kni_%s", kni->name);
188 if (IS_ERR(kni->pthread)) {
189 kni_dev_remove(kni);
190 return -ECANCELED;
191 }
192 if (dev_info.force_bind)
193 kthread_bind(kni->pthread, kni->core_id);
194 wake_up_process(kni->pthread);
195 }
196
197 down_write(&knet->kni_list_lock);
198 list_add(&kni->list, &knet->kni_list_head);
199 up_write(&knet->kni_list_lock);
200
201 return 0;
202 }
ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));这条语句会拷贝从用户态传过来的消息,dev_info主要存放了虚拟kni网口的相关参数,接下来就会根据dev_info中的参数注册一个kni网口ret = register_netdev(net_dev);
这个函数完成创建,这样就虚拟出一个网口出来。其中165行是自己修改的,因为我发现按照文档提供的方法根本不能ping通报文,我将生成kni的mac地址修改成dpdk接管的网口mac即可贯通。原生态代码是随时生成一个mac。
2,用户态代码主要分析dpdk提供的example,
1 int
2 main(int argc, char** argv)
3 {
4 int ret;
5 uint8_t nb_sys_ports, port;
6 unsigned i;
7
8 /* Associate signal_hanlder function with USR signals */
9 signal(SIGUSR1, signal_handler);
10 signal(SIGUSR2, signal_handler);
11 signal(SIGRTMIN, signal_handler);
12 signal(SIGINT, signal_handler);
13
14 /* Initialise EAL */
15 ret = rte_eal_init(argc, argv);
16 if (ret < 0)
17 rte_exit(EXIT_FAILURE, "Could not initialise EAL (%d)\n", ret);
18 argc -= ret;
19 argv += ret;
20
21 /* Parse application arguments (after the EAL ones) */
22 ret = parse_args(argc, argv);
23 if (ret < 0)
24 rte_exit(EXIT_FAILURE, "Could not parse input parameters\n");
25
26 /* Create the mbuf pool */
27 pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF,
28 MEMPOOL_CACHE_SZ, 0, MBUF_DATA_SZ, rte_socket_id());
29 if (pktmbuf_pool == NULL) {
30 rte_exit(EXIT_FAILURE, "Could not initialise mbuf pool\n");
31 return -1;
32 }
33
34 /* Get number of ports found in scan */
35 nb_sys_ports = rte_eth_dev_count();
36 if (nb_sys_ports == 0)
37 rte_exit(EXIT_FAILURE, "No supported Ethernet device found\n");
38
39 /* Check if the configured port ID is valid */
40 for (i = 0; i < RTE_MAX_ETHPORTS; i++)
41 if (kni_port_params_array[i] && i >= nb_sys_ports)
42 rte_exit(EXIT_FAILURE, "Configured invalid "
43 "port ID %u\n", i);
44
45 /* Initialize KNI subsystem */
46 init_kni();
47
48 /* Initialise each port */
49 for (port = 0; port < nb_sys_ports; port++) {
50 /* Skip ports that are not enabled */
51 if (!(ports_mask & (1 << port)))
52 continue;
53 init_port(port);
54
55 if (port >= RTE_MAX_ETHPORTS)
56 rte_exit(EXIT_FAILURE, "Can not use more than "
57 "%d ports for kni\n", RTE_MAX_ETHPORTS);
58
59 kni_alloc(port);
60 }
61 check_all_ports_link_status(nb_sys_ports, ports_mask);
62
63 /* Launch per-lcore function on every lcore */
64 rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
65 RTE_LCORE_FOREACH_SLAVE(i) {
66 if (rte_eal_wait_lcore(i) < 0)
67 return -1;
68 }
69
70 /* Release resources */
71 for (port = 0; port < nb_sys_ports; port++) {
72 if (!(ports_mask & (1 << port)))
73 continue;
74 kni_free_kni(port);
75 }
76 #ifdef RTE_LIBRTE_XEN_DOM0
77 rte_kni_close();
78 #endif
79 for (i = 0; i < RTE_MAX_ETHPORTS; i++)
80 if (kni_port_params_array[i]) {
81 rte_free(kni_port_params_array[i]);
82 kni_port_params_array[i] = NULL;
83 }
84
85 return 0;
86 }
main函数进来进行一些eal的初始化,随后创建一个pktmbuf_pool,重点看一下init_kni();以及kni_alloc(port);rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);函数。其中init_kni()函数是初始化kni子系统
1 static void
2 init_kni(void)
3 {
4 unsigned int num_of_kni_ports = 0, i;
5 struct kni_port_params **params = kni_port_params_array;
6
7 /* Calculate the maximum number of KNI interfaces that will be used */
8 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
9 if (kni_port_params_array[i]) {
10 num_of_kni_ports += (params[i]->nb_lcore_k ?
11 params[i]->nb_lcore_k : 1);
12 }
13 }
14
15 /* Invoke rte KNI init to preallocate the ports */
16 rte_kni_init(num_of_kni_ports);
17 }
主要代码在rte_kni_init里面
1 void
2 rte_kni_init(unsigned int max_kni_ifaces)
3 {
4 uint32_t i;
5 struct rte_kni_memzone_slot *it;
6 const struct rte_memzone *mz;
7 #define OBJNAMSIZ 32
8 char obj_name[OBJNAMSIZ];
9 char mz_name[RTE_MEMZONE_NAMESIZE];
10
11 /* Immediately return if KNI is already initialized */
12 if (kni_memzone_pool.initialized) {
13 RTE_LOG(WARNING, KNI, "Double call to rte_kni_init()");
14 return;
15 }
16
17 if (max_kni_ifaces == 0) {
18 RTE_LOG(ERR, KNI, "Invalid number of max_kni_ifaces %d\n",
19 max_kni_ifaces);
20 rte_panic("Unable to initialize KNI\n");
21 }
22
23 /* Check FD and open */
24 if (kni_fd < 0) {
25 kni_fd = open("/dev/" KNI_DEVICE, O_RDWR);
26 if (kni_fd < 0)
27 rte_panic("Can not open /dev/%s\n", KNI_DEVICE);
28 }
29
30 /* Allocate slot objects */
31 kni_memzone_pool.slots = (struct rte_kni_memzone_slot *)
32 rte_malloc(NULL,
33 sizeof(struct rte_kni_memzone_slot) *
34 max_kni_ifaces,
35 0);
36 KNI_MEM_CHECK(kni_memzone_pool.slots == NULL);
37
38 /* Initialize general pool variables */
39 kni_memzone_pool.initialized = 1;
40 kni_memzone_pool.max_ifaces = max_kni_ifaces;
41 kni_memzone_pool.free = &kni_memzone_pool.slots[0];
42 rte_spinlock_init(&kni_memzone_pool.mutex);
43
44 /* Pre-allocate all memzones of all the slots; panic on error */
45 for (i = 0; i < max_kni_ifaces; i++) {
46
47 /* Recover current slot */
48 it = &kni_memzone_pool.slots[i];
49 it->id = i;
50
51 /* Allocate KNI context */
52 snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%d", i);
53 mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni),
54 SOCKET_ID_ANY, 0);
55 KNI_MEM_CHECK(mz == NULL);
56 it->m_ctx = mz;
57
58 /* TX RING */
59 snprintf(obj_name, OBJNAMSIZ, "kni_tx_%d", i);
60 mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
61 SOCKET_ID_ANY, 0);
62 KNI_MEM_CHECK(mz == NULL);
63 it->m_tx_q = mz;
64
65 /* RX RING */
66 snprintf(obj_name, OBJNAMSIZ, "kni_rx_%d", i);
67 mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
68 SOCKET_ID_ANY, 0);
69 KNI_MEM_CHECK(mz == NULL);
70 it->m_rx_q = mz;
71
72 /* ALLOC RING */
73 snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%d", i);
74 mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
75 SOCKET_ID_ANY, 0);
76 KNI_MEM_CHECK(mz == NULL);
77 it->m_alloc_q = mz;
78
79 /* FREE RING */
80 snprintf(obj_name, OBJNAMSIZ, "kni_free_%d", i);
81 mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
82 SOCKET_ID_ANY, 0);
83 KNI_MEM_CHECK(mz == NULL);
84 it->m_free_q = mz;
85
86 /* Request RING */
87 snprintf(obj_name, OBJNAMSIZ, "kni_req_%d", i);
88 mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
89 SOCKET_ID_ANY, 0);
90 KNI_MEM_CHECK(mz == NULL);
91 it->m_req_q = mz;
92
93 /* Response RING */
94 snprintf(obj_name, OBJNAMSIZ, "kni_resp_%d", i);
95 mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
96 SOCKET_ID_ANY, 0);
97 KNI_MEM_CHECK(mz == NULL);
98 it->m_resp_q = mz;
99
100 /* Req/Resp sync mem area */
101 snprintf(obj_name, OBJNAMSIZ, "kni_sync_%d", i);
102 mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
103 SOCKET_ID_ANY, 0);
104 KNI_MEM_CHECK(mz == NULL);
105 it->m_sync_addr = mz;
106
107 if ((i+1) == max_kni_ifaces) {
108 it->next = NULL;
109 kni_memzone_pool.free_tail = it;
110 } else
111 it->next = &kni_memzone_pool.slots[i+1];
112 }
113
114 return;
115
116 kni_fail:
117 rte_panic("Unable to allocate memory for max_kni_ifaces:%d. Increase the amount of hugepages memory\n",
118 max_kni_ifaces);
119 }
对上图中所有的fifo分配内存。
1 static int
2 kni_alloc(uint8_t port_id)
3 {
4 uint8_t i;
5 struct rte_kni *kni;
6 struct rte_kni_conf conf;
7 struct kni_port_params **params = kni_port_params_array;
8
9 if (port_id >= RTE_MAX_ETHPORTS || !params[port_id])
10 return -1;
11
12 params[port_id]->nb_kni = params[port_id]->nb_lcore_k ?
13 params[port_id]->nb_lcore_k : 1;
14
15 for (i = 0; i < params[port_id]->nb_kni; i++) {
16 /* Clear conf at first */
17 memset(&conf, 0, sizeof(conf));
18 if (params[port_id]->nb_lcore_k) {
19 snprintf(conf.name, RTE_KNI_NAMESIZE,
20 "vEth%u_%u", port_id, i);
21 conf.core_id = params[port_id]->lcore_k[i];
22 conf.force_bind = 1;
23 } else
24 snprintf(conf.name, RTE_KNI_NAMESIZE,
25 "vEth%u", port_id);
26 conf.group_id = (uint16_t)port_id;
27 conf.mbuf_size = MAX_PACKET_SZ;
28 rte_eth_macaddr_get(port_id, (struct ether_addr *)&conf.kni_mac);
29 /*
30 * The first KNI device associated to a port
31 * is the master, for multiple kernel thread
32 * environment.
33 */
34 if (i == 0) {
35 struct rte_kni_ops ops;
36 struct rte_eth_dev_info dev_info;
37
38 memset(&dev_info, 0, sizeof(dev_info));
39 rte_eth_dev_info_get(port_id, &dev_info);
40 conf.addr = dev_info.pci_dev->addr;
41 conf.id = dev_info.pci_dev->id;
42
43 memset(&ops, 0, sizeof(ops));
44 ops.port_id = port_id;
45 ops.change_mtu = kni_change_mtu;
46 ops.config_network_if = kni_config_network_interface;
47
48 kni = rte_kni_alloc(pktmbuf_pool, &conf, &ops);
49 } else
50 kni = rte_kni_alloc(pktmbuf_pool, &conf, NULL);
51
52 if (!kni)
53 rte_exit(EXIT_FAILURE, "Fail to create kni for "
54 "port: %d\n", port_id);
55 params[port_id]->kni[i] = kni;
56 }
57
58 return 0;
59 }
1 struct rte_kni *
2 rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
3 const struct rte_kni_conf *conf,
4 struct rte_kni_ops *ops)
5 {
6 int ret;
7 struct rte_kni_device_info dev_info;
8 struct rte_kni *ctx;
9 char intf_name[RTE_KNI_NAMESIZE];
10 char mz_name[RTE_MEMZONE_NAMESIZE];
11 const struct rte_memzone *mz;
12 const struct rte_mempool *mp;
13 struct rte_kni_memzone_slot *slot = NULL;
14
15 if (!pktmbuf_pool || !conf || !conf->name[0])
16 return NULL;
17
18 /* Check if KNI subsystem has been initialized */
19 if (kni_memzone_pool.initialized != 1) {
20 RTE_LOG(ERR, KNI, "KNI subsystem has not been initialized. Invoke rte_kni_init() first\n");
21 return NULL;
22 }
23
24 /* Get an available slot from the pool */
25 slot = kni_memzone_pool_alloc();
26 if (!slot) {
27 RTE_LOG(ERR, KNI, "Cannot allocate more KNI interfaces; increase the number of max_kni_ifaces(current %d) or release unusued ones.\n",
28 kni_memzone_pool.max_ifaces);
29 return NULL;
30 }
31
32 /* Recover ctx */
33 ctx = slot->m_ctx->addr;
34 snprintf(intf_name, RTE_KNI_NAMESIZE, "%s", conf->name);
35
36 if (ctx->in_use) {
37 RTE_LOG(ERR, KNI, "KNI %s is in use\n", ctx->name);
38 return NULL;
39 }
40 memset(ctx, 0, sizeof(struct rte_kni));
41 if (ops)
42 memcpy(&ctx->ops, ops, sizeof(struct rte_kni_ops));
43
44 memset(&dev_info, 0, sizeof(dev_info));
45 dev_info.bus = conf->addr.bus;
46 dev_info.devid = conf->addr.devid;
47 dev_info.function = conf->addr.function;
48 dev_info.vendor_id = conf->id.vendor_id;
49 dev_info.device_id = conf->id.device_id;
50 dev_info.core_id = conf->core_id;
51 dev_info.force_bind = conf->force_bind;
52 dev_info.group_id = conf->group_id;
53 dev_info.mbuf_size = conf->mbuf_size;
54
55 snprintf(ctx->name, RTE_KNI_NAMESIZE, "%s", intf_name);
56 snprintf(dev_info.name, RTE_KNI_NAMESIZE, "%s", intf_name);
57
58 RTE_LOG(INFO, KNI, "pci: %02x:%02x:%02x \t %02x:%02x\n",
59 dev_info.bus, dev_info.devid, dev_info.function,
60 dev_info.vendor_id, dev_info.device_id);
61 /* TX RING */
62 mz = slot->m_tx_q;
63 ctx->tx_q = mz->addr;
64 kni_fifo_init(ctx->tx_q, KNI_FIFO_COUNT_MAX);
65 dev_info.tx_phys = mz->phys_addr;
66
67 /* RX RING */
68 mz = slot->m_rx_q;
69 ctx->rx_q = mz->addr;
70 kni_fifo_init(ctx->rx_q, KNI_FIFO_COUNT_MAX);
71 dev_info.rx_phys = mz->phys_addr;
72
73 /* ALLOC RING */
74 mz = slot->m_alloc_q;
75 ctx->alloc_q = mz->addr;
76 kni_fifo_init(ctx->alloc_q, KNI_FIFO_COUNT_MAX);
77 dev_info.alloc_phys = mz->phys_addr;
78
79 /* FREE RING */
80 mz = slot->m_free_q;
81 ctx->free_q = mz->addr;
82 kni_fifo_init(ctx->free_q, KNI_FIFO_COUNT_MAX);
83 dev_info.free_phys = mz->phys_addr;
84
85 /* Request RING */
86 mz = slot->m_req_q;
87 ctx->req_q = mz->addr;
88 kni_fifo_init(ctx->req_q, KNI_FIFO_COUNT_MAX);
89 dev_info.req_phys = mz->phys_addr;
90
91 /* Response RING */
92 mz = slot->m_resp_q;
93 ctx->resp_q = mz->addr;
94 kni_fifo_init(ctx->resp_q, KNI_FIFO_COUNT_MAX);
95 dev_info.resp_phys = mz->phys_addr;
96
97 /* Req/Resp sync mem area */
98 mz = slot->m_sync_addr;
99 ctx->sync_addr = mz->addr;
100 dev_info.sync_va = mz->addr;
101 dev_info.sync_phys = mz->phys_addr;
102
103
104 /* MBUF mempool */
105 snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT,
106 pktmbuf_pool->name);
107 mz = rte_memzone_lookup(mz_name);
108 KNI_MEM_CHECK(mz == NULL);
109 mp = (struct rte_mempool *)mz->addr;
110 /* KNI currently requires to have only one memory chunk */
111 if (mp->nb_mem_chunks != 1)
112 goto kni_fail;
113
114 dev_info.mbuf_va = STAILQ_FIRST(&mp->mem_list)->addr;
115 dev_info.mbuf_phys = STAILQ_FIRST(&mp->mem_list)->phys_addr;
116 ctx->pktmbuf_pool = pktmbuf_pool;
117 ctx->group_id = conf->group_id;
118 ctx->slot_id = slot->id;
119 ctx->mbuf_size = conf->mbuf_size;
120
121 dev_info.kni_mac = conf->kni_mac;
122
123 ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
124 KNI_MEM_CHECK(ret < 0);
125
126 ctx->in_use = 1;
127
128 /* Allocate mbufs and then put them into alloc_q */
129 kni_allocate_mbufs(ctx);
130
131 return ctx;
132
133 kni_fail:
134 if (slot)
135 kni_memzone_pool_release(&kni_memzone_pool.slots[slot->id]);
136
137 return NULL;
138 }
其中ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);就是讲dev_info传入内核。
1 static int
2 main_loop(__rte_unused void *arg)
3 {
4 uint8_t i, nb_ports = rte_eth_dev_count();
5 int32_t f_stop;
6 const unsigned lcore_id = rte_lcore_id();
7 enum lcore_rxtx {
8 LCORE_NONE,
9 LCORE_RX,
10 LCORE_TX,
11 LCORE_MAX
12 };
13 enum lcore_rxtx flag = LCORE_NONE;
14
15 for (i = 0; i < nb_ports; i++) {
16 if (!kni_port_params_array[i])
17 continue;
18 if (kni_port_params_array[i]->lcore_rx == (uint8_t)lcore_id) {
19 flag = LCORE_RX;
20 break;
21 } else if (kni_port_params_array[i]->lcore_tx ==
22 (uint8_t)lcore_id) {
23 flag = LCORE_TX;
24 break;
25 }
26 }
27
28 if (flag == LCORE_RX) {
29 RTE_LOG(INFO, APP, "Lcore %u is reading from port %d\n",
30 kni_port_params_array[i]->lcore_rx,
31 kni_port_params_array[i]->port_id);
32 while (1) {
33 f_stop = rte_atomic32_read(&kni_stop);
34 if (f_stop)
35 break;
36 kni_ingress(kni_port_params_array[i]);
37 }
38 } else if (flag == LCORE_TX) {
39 RTE_LOG(INFO, APP, "Lcore %u is writing to port %d\n",
40 kni_port_params_array[i]->lcore_tx,
41 kni_port_params_array[i]->port_id);
42 while (1) {
43 f_stop = rte_atomic32_read(&kni_stop);
44 if (f_stop)
45 break;
46 kni_egress(kni_port_params_array[i]);
47 }
48 } else
49 RTE_LOG(INFO, APP, "Lcore %u has nothing to do\n", lcore_id);
50
51 return 0;
52 }
进入循环收发包,
1 static void
2 kni_ingress(struct kni_port_params *p)
3 {
4 uint8_t i, port_id;
5 unsigned nb_rx, num;
6 uint32_t nb_kni;
7 struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
8
9 if (p == NULL)
10 return;
11
12 nb_kni = p->nb_kni;
13 port_id = p->port_id;
14 for (i = 0; i < nb_kni; i++) {
15 /* Burst rx from eth */
16 nb_rx = rte_eth_rx_burst(port_id, 0, pkts_burst, PKT_BURST_SZ);
17 if (unlikely(nb_rx > PKT_BURST_SZ)) {
18 RTE_LOG(ERR, APP, "Error receiving from eth\n");
19 return;
20 }
21 /* Burst tx to kni */
22 num = rte_kni_tx_burst(p->kni[i], pkts_burst, nb_rx);
23 kni_stats[port_id].rx_packets += num;
24 //if(kni_stats[port_id].rx_packets != 0 && kni_stats[port_id].rx_packets%20 == 0 && num > 0)
25 // printf("recv packet num : %"PRIu64"\n",kni_stats[port_id].rx_packets);
26 rte_kni_handle_request(p->kni[i]);
27 if (unlikely(num < nb_rx)) {
28 /* Free mbufs not tx to kni interface */
29 kni_burst_free_mbufs(&pkts_burst[num], nb_rx - num);
30 kni_stats[port_id].rx_dropped += nb_rx - num;
31 }
32 }
33 }
1 static void
2 kni_egress(struct kni_port_params *p)
3 {
4 uint8_t i, port_id;
5 unsigned nb_tx, num;
6 uint32_t nb_kni;
7 struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
8
9 if (p == NULL)
10 return;
11
12 nb_kni = p->nb_kni;
13 port_id = p->port_id;
14 for (i = 0; i < nb_kni; i++) {
15 /* Burst rx from kni */
16 num = rte_kni_rx_burst(p->kni[i], pkts_burst, PKT_BURST_SZ);
17 if (unlikely(num > PKT_BURST_SZ)) {
18 RTE_LOG(ERR, APP, "Error receiving from KNI\n");
19 return;
20 }
21 /* Burst tx to eth */
22 nb_tx = rte_eth_tx_burst(port_id, 0, pkts_burst, (uint16_t)num);
23 kni_stats[port_id].tx_packets += nb_tx;
24 if (unlikely(nb_tx < num)) {
25 /* Free mbufs not tx to NIC */
26 kni_burst_free_mbufs(&pkts_burst[nb_tx], num - nb_tx);
27 kni_stats[port_id].tx_dropped += num - nb_tx;
28 }
29 }
30 }
代码就守护在这个kni网口进行收发包。篇幅有限,后面再整理。