这篇文章会用-小段代码描述以下的内容:
- 两个Domain之间的内存页共享, Doamin0和DomainU
- 在这个页中设置一个共享ring
- 为共享ring设置event channel
- 在Dom0和DomU之间来回传递一些信息
介绍
在xen中的虚拟机被称为Domain. Domain0(Dom0)是特别的并拥有与正实设备交互的设备驱动, 例如网卡.这个驱动被称为后端驱动. 在我们的例子中这被称为后端domain.
在被称为 DomainU(DomU)的用户Domain有一个相应的前端驱动, 其是虚拟设备的接口,为和真实设备通信在DomU中前端驱动要连接后端驱动. 在我们以下的例子中, 这个DomU被称为前端Domain.
Xen为共享Domain间的内存提供了授权表(Grant Tables). 设备驱动使用授权表工作. 每个Domain有它自己的授权表, 并与xen共享. 在这个表中的条目由授权引用(grant references)所标识. 授权引用在Domain间传递, 且所引用的共享页由授权表所指向, domain也设置一个共享环结构(ring structure), 其用于在domain间有郊共享数据.
对于分离前/后端驱动, 前端分配一个用于共享通信 ring 的内存页, 授权它给后端domain, 并放授权引用到xenstore,
这样后端就能 map 这个页. 有共享ring这个页是一个主页, 用于传递更多的授权引用. 共享页由块设备和其它同步接收数
据的设备所使用, 异步接收数据的网络设备, 使用已知的 page flipping 方法, 这个页的所有权在Domain 间转移.
以上图展示了这个共享ring, 这个ring所有的公有和私有指针. "Request Producer" 和 "Responser Producer"是两个公共变量, 其会被共享这个页面的两者都看到. "Response Consumer" 是一个在前端(由前端设备所维护, 即DomU) ring 结构中指针. "Request Consumer" 是一个在后端(由后端设备维护, 即Dom0) ring 结构中的指针.
API 使用
在DomU kerenl中的前端驱动广告一个页面用于共享, 这通过 hypervisor 函数调用("hypercall"), (gnttab_grant_foreign_access 系统调用)完成. hypercall 通知 hypervisor 其它 domain 允许访问这个页. DomU然后传递引用 ID 给远端的 Domain 它是"授权"可访问的. 在我们的代码中, 这个访问授权给了 Dom0. 一但远端 domain 完成操作, 那么本地 domain 应调用gnttab_end_foreign_access删除授权.
网络设备和类似接收异步接收数据的其它设备. 使用已知的 page flipping 方法. 当 页翻转时, 在本地 domain kernel中的驱动会广告一个用于转移的页, 经由 gnttab_grant_foregin_transfer 调用完成. 这个调用会通知 hypervisor其它domain可以接收这个页. 转移给远程domain的这个本地domain执行 free page. (经由producer/consumer ring).
DomU Code:
1 /* This file is run in the DomU Kernel.
2 * It grants a page with a shared ring structure on it to the Dom0.
3 * The grant reference and Event Channel is passed manually. Should be done via XenStore
4 * or some other out of band mechanism.
5 * Compile the code using
6 * make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
7 * Run it as follows
8 * insmod xen-eg.ko
9 * Pick up the grant ref and event channel that comes out in /var/log/messages and pass as
10 * insmod parameters in the Dom0 module.
11 * Run the Dom0 program as follows
12 * insmod dom0.ko domid=<domid> gref=<gref>
13 * <domid> is the domainid of DomU as seen in "xm list"
14 * <gref> is grant refefrence when xen.ko is insmod
15 */
16 #include <linux/module.h>
17 #include <linux/kernel.h>
18 #include <linux/types.h>
19 #include <xen/interface/xen.h>
20 #include <xen/interface/io/ring.h>
21 #include <xen/grant_table.h>
22 #include <asm/pgtable.h>
23 #include <asm/sync_bitops.h>
24 #include <xen/gnttab.h>
25 #include <xen/evtchn.h>
26 #include <asm/uaccess.h>
27 #include <linux/proc_fs.h>
28 int page;
29 struct as_request {
30 unsigned int id; /* private guest value echoed in resp */
31 unsigned int status;
32 unsigned int operation;
33 };
34 struct as_response {
35 unsigned int id; /* copied from request */
36 unsigned int status;
37 unsigned int operation; /* copied from request */
38 };
39 // The following makes the as_sring, as_back_ring, as_back_ring "types"
40 DEFINE_RING_TYPES(as, struct as_request, struct as_response);
41 struct info_t {
42 struct as_front_ring ring;
43 grant_ref_t gref;
44 int irq;
45 int port;
46 } info;
47 #define DOM0_ID 0
48 // Related the proc fs entries
49 static struct proc_dir_entry *proc_dir = NULL;
50 static struct proc_dir_entry *proc_file = NULL;
51 char proc_data[20];
52 /* Send an request via the shared ring to Dom0, following by an INT */
53 int send_request_to_dom0() {
54 struct as_request *ring_req;
55 int notify;
56 static int reqid=9;
57 /* Write a request into the ring and update the req-prod pointer */
58 ring_req = RING_GET_REQUEST(&(info.ring), info.ring.req_prod_pvt);
59 ring_req->id = reqid;
60 ring_req->operation = reqid;
61 ring_req->status = reqid;
62 printk("\nxen:DomU: Fill in IDX-%d, with id=%d, op=%d, st=%d",
63 info.ring.req_prod_pvt, ring_req->id, ring_req->operation,
64 ring_req->status);
65 reqid++;
66 info.ring.req_prod_pvt += 1;
67 // Send a reqest to backend followed by an int if needed
68 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&(info.ring), notify);
69 if (notify) {
70 printk("\nxen:DomU: Sent a req to Dom0");
71 notify_remote_via_irq(info.irq);
72 } else {
73 printk("\nxen:DomU: No notify req to Dom0");
74 notify_remote_via_irq(info.irq);
75 }
76 printk("...\n");
77 return 0;
78 }
79 ssize_t file_write (struct file *filp, const char __user *buff,
80 unsigned long len, void *data) {
81 int value;
82 printk("\nxen:domU: file_write %lu bytes", len);
83 if (copy_from_user(&proc_data[0], buff, len))
84 return -EFAULT;
85 proc_data[len] = '\x0';
86 // printk(" ,%s", &proc_data[0]);
87 value = simple_strtol(proc_data, 0, 10);
88 switch(value) {
89 case 1:
90 send_request_to_dom0();
91 printk(" ,value = %d", value);
92 break;
93 default:
94 printk(" ,value not recognized !");
95 }
96 return len;
97 }
98 int file_read (char* page, char**start, off_t off,
99 int count, int *eof, void *data) {
100 sprintf(page, "%s", proc_data);
101 return strlen(page);
102 }
103
104 /* We create a /proc/demo/file entry. When we write a "1" ino this file once
105 * the module is loaded, the file_write function() above is called and this
106 * sends a requesst on the shared ring to the Dom0. This way we test the
107 * event channel and shared ring routines.
108 */
109 int create_procfs_entry() {
110 int ret = 0;
111 proc_dir = proc_mkdir("demo", NULL);
112 if (!proc_dir) {
113 printk("\nxen:domU Could not create demo entry in procfs");
114 ret = -EAGAIN;
115 return ret;
116 }
117 proc_file = create_proc_entry("file", 0600, proc_dir);
118 if (proc_file) {
119 proc_file->read_proc = file_read;
120 proc_file->write_proc = file_write;
121 proc_file->owner = THIS_MODULE;
122 } else {
123 printk("\nxen:domU Could not create /proc/demo/file");
124 ret = -EAGAIN;
125 return ret;
126 }
127 return ret;
128 }
129 /* Our interrupt handler for event channel that we set up */
130 static irqreturn_t as_int (int irq, void *dev_id) {
131 struct as_response *ring_resp;
132 RING_IDX i, rp;
133
134 printk("\nxen:DomU: as_int called");
135 again:
136 rp = info.ring.sring->rsp_prod;
137 printk("\nxen:DomU: ring pointers %d to %d", info.ring.rsp_cons, rp);
138 for(i=info.ring.rsp_cons; i != rp; i++) {
139 unsigned long id;
140 // what did we get from Dom0
141 ring_resp = RING_GET_RESPONSE(&(info.ring), i);
142 printk("\nxen:DomU: Recvd in IDX-%d, with id=%d, op=%d, st=%d",
143 i, ring_resp->id, ring_resp->operation, ring_resp->status);
144 id = ring_resp->id;
145 switch(ring_resp->operation) {
146 case 0:
147 printk("\nxen:DomU: operation:0");
148 break;
149 default:
150 break;
151 }
152 }
153 info.ring.rsp_cons = i;
154 if (i != info.ring.req_prod_pvt) {
155 int more_to_do;
156 RING_FINAL_CHECK_FOR_RESPONSES(&info.ring, more_to_do);
157 if(more_to_do)
158 goto again;
159 } else
160 info.ring.sring->rsp_event = i+1;
161 return IRQ_HANDLED;
162 }
163 int init_module(void) {
164 int mfn;
165 int err;
166 struct as_sring *sring;
167 /* Allocates and returns a pointer to the first byte of a memory area
168 * that is several physically contiguous pages long, and doesn't zero
169 * out the area.
170 * GFP_KERNEL - process may sleep
171 */
172
173 page = __get_free_pages(GFP_KERNEL, 1);
174 if (page == 0) {
175 printk("\nxen:DomU: could not get free page");
176 return 0;
177 }
178 /* Put a shared ring structure on this page */
179 sring = (struct as_sring*) page;
180 SHARED_RING_INIT(sring);
181 /* info.ring is the front_ring structure */
182 FRONT_RING_INIT(&(info.ring), sring, PAGE_SIZE);
183 mfn = virt_to_mfn(page);
184 /* The following grant table func is in drivers/xen/grant-table.c
185 * For shared pages, used for synchronous data, advertise a page to
186 * be shared via the hypervisor function call gnttab_grant_foreign_access.
187 * This call notifies the hypervisor that other domains are allowed to
188 * access this page.
189 * gnttab_map() has been called earlier to setup gnttable_setup_table
190 * during init phase, with a call to HYPERVISOR_grant_table_op(
191 * GNTTAB_setup_table...) and
192 * "shared" pages have been malloc'ed. This "shared" page is then used
193 * below later during the actual grant of a ref by this DOM.
194 * gnttab_grant_foreign_access()
195 * => get_free_entries
196 * gnttab_free_head - points to the ref of the head
197 * gnttab_free_count- keeps number of free refs
198 * Get a ref id by calling gnttab_entry(head)
199 * gnttab_list[entry/RPP][entry%RPP]
200 * => gnttab_grat_foreign_access_ref
201 * => update_grant_entry
202 * shared[ref].frame/domid/flags are updated
203 * "shared" above is a pointer to struct grant_entry (flags/domid/frame)
204 */
205
206 info.gref = gnttab_grant_foreign_access(DOM0_ID, mfn, 0);
207 if (info.gref < 0) {
208 printk("\nxen: could not grant foreign access");
209 free_page((unsigned long)page);
210 return 0;
211 }
212 /* The following strcpy is commented out, but was used initally to test
213 * is the memory page is indeed shared with Dom0, when in Dom0, we do a
214 * sprintf of the same memory location and get the same characters.
215 */
216 // strcpy((char*)page, "aseem sethi");
217 /* TBD: Save gref to be sent via Xenstore to dom-0. As of now both the
218 * gref and the event channel port id is sent manually during insmod
219 * in the dom0 module.
220 */
221 printk("\n gref = %d", info.gref);
222 /* Setup an event channel to Dom0 */
223 err = bind_listening_port_to_irqhandler(DOM0_ID, as_int, 0, "xen-eg", &info);
224 if (err < 0) {
225 printk("\nxen:DomU failed to setup evtchn !");
226 gnttab_end_foreign_access(info.gref, 0, page);
227 return 0;
228 }
229 info.irq = err;
230 info.port = irq_to_evtchn_port(info.irq);
231 printk(" interupt = %d, local-port = %d", info.irq, info.port);
232 printk("....\n...");
233 create_procfs_entry();
234 return 0;
235 }
236 void cleanup_module(void) {
237 printk("\nCleanup grant ref:");
238 if (gnttab_query_foreign_access(info.gref) == 0) {
239 //Remove the grant to the page
240 printk("\n xen: No one has mapped this frame");
241 // If 3rd param is non NULL, page has to be freed
242 gnttab_end_foreign_access(info.gref, 0, page);
243 // free_pages(page,1);
244 } else {
245 printk("\n xen: Someone has mapped this frame");
246 // Guess, we still free the page, since we are rmmod-ed
247 gnttab_end_foreign_access(info.gref, 0, page);
248 }
249 /* Cleanup proc entry */
250 remove_proc_entry("file", proc_dir);
251 remove_proc_entry("demo", NULL);
252 printk("....\n...");
253 }
254 MODULE_LICENSE("GPL");
255
Dom0 Code:
1 /* This is the module in Dom0. Compile it using
2 * make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
3 * Change the grant_ref below in the code to the one seen in DomU before compilation.
4 * Insmod the module using "insmod dom-.ko gref=<vaue> port=<event channel port>
5 * <value> is taken from the dmesg output in DomU when xen.eg.ko is insmod there.
6 * This will map the page that the DomU has shared with the Dom0.
7 */
8 #include <linux/module.h>
9 #include <linux/moduleparam.h>
10 #include <linux/kernel.h>
11 #include <xen/interface/grant_table.h>
12 #include <xen/interface/io/blkif.h> // for definition of blkif_sring_t
13 #include <xen/gnttab.h>
14 #include <linux/vmalloc.h>
15 #include <asm-x86/xen/hypervisor.h>
16 #include <xen/evtchn.h>
17 struct gnttab_map_grant_ref ops;
18 struct gnttab_unmap_grant_ref unmap_ops;
19 struct as_request {
20 unsigned int id; /* private guest value, echoed in resp */
21 unsigned int status;
22 unsigned int operation;
23 };
24 struct as_response {
25 unsigned int id; /* copied from request */
26 unsigned int status;
27 unsigned int operation; /* copied from request */
28 };
29 typedef struct as_request as_request_t;
30 typedef struct as_response as_response_t;
31 // From /include/xen/interface/io/ring.h
32 // The following makes the as_sring, as_back_ring, as_back_ring "types"
33 DEFINE_RING_TYPES(as, struct as_request, struct as_response);
34 struct info_t {
35 int irq;
36 int gref;
37 int remoteDomain;
38 int evtchn;
39 struct as_back_ring ring;
40 } info;
41 int gref;
42 int port;
43 module_param(gref, int, 0644);
44 module_param(port, int, 0644);
45 static irqreturn_t as_int (int irq, void *dev_id) {
46 RING_IDX rc, rp;
47 as_request_t req;
48 as_response_t resp;
49 int more_to_do, notify;
50 // dev_id is a pointer to the info structure
51 printk("\nxen:Dom0: as_int called with dev_id %x info=%x",
52 (unsigned int)dev_id, (unsigned int)&info);
53 rc = info.ring.req_cons;
54 rp = info.ring.sring->req_prod;
55 printk(" rc =%d rp =%d", rc, rp);
56 while(rc!=rp) {
57 if(RING_REQUEST_CONS_OVERFLOW(&info.ring, rc))
58 break;
59 // what did we get from the frontend at index rc
60 memcpy(&req, RING_GET_REQUEST(&info.ring, rc), sizeof(req));
61 resp.id = req.id;
62 resp.operation = req.operation;
63 resp.status = req.status+1; // Send back a status +1 of what was recvd
64 printk("\nxen:Dom0: Recvd at IDX-%d: id=%d, op=%d, status=%d",
65 rc, req.id, req.operation, req.status);
66 // update the req-consumer
67 info.ring.req_cons = ++rc;
68 barrier();
69 switch (req.operation) {
70 case 0:
71 printk("\nxen:Dom0: req.operation = 0");
72 break;
73 default:
74 printk("\nxen:Dom0: req.operation = %d", req.operation);
75 break;
76 }
77 memcpy(RING_GET_RESPONSE(&info.ring, info.ring.rsp_prod_pvt),
78 &resp, sizeof(resp));
79 info.ring.rsp_prod_pvt++;
80 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&info.ring, notify);
81 if(info.ring.rsp_prod_pvt == info.ring.req_cons) {
82 RING_FINAL_CHECK_FOR_REQUESTS(&info.ring, more_to_do);
83 } else if (RING_HAS_UNCONSUMED_REQUESTS(&info.ring)) {
84 more_to_do = 1;
85 }
86 if(notify) {
87 printk("\nxen:Dom0: Send notify to DomU");
88 notify_remote_via_irq(info.irq);
89 }
90 }
91 return IRQ_HANDLED;
92 }
93 int init_module(void) {
94 struct vm_struct *v_start;
95 as_sring_t *sring;
96 int err;
97 info.gref = gref;
98 info.remoteDomain = 1;
99 info.evtchn = port;
100 printk("\nxen: dom0: init_module with gref = %d", info.gref);
101 // The following function reserves a range of kernel address space and
102 // allocates pagetables to map that range. No actual mappings are created.
103 v_start = alloc_vm_area(PAGE_SIZE);
104 if (v_start == 0) {
105 free_vm_area(v_start);
106 printk("\nxen: dom0: could not allocate page");
107 return -EFAULT;
108 }
109 /* ops struct in paramaeres
110 * host_addr, flags, ref
111 * ops struct out parameters
112 * status (zero if OK), handle (used to unmap later), dev_bus_addr
113 */
114 gnttab_set_map_op(&ops, (unsigned long)v_start->addr, GNTMAP_host_map,
115 info.gref, info.remoteDomain); /* flags, ref, domID */
116 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &ops, 1)) {
117 printk("\nxen: dom0: HYPERVISOR map grant ref failed");
118 return -EFAULT;
119 }
120 if (ops.status) {
121 printk("\nxen: dom0: HYPERVISOR map grant ref failed status = %d",
122 ops.status);
123 return -EFAULT;
124 }
125 printk("\nxen: dom0: shared_page = %x, handle = %x, status = %x",
126 (unsigned int)v_start->addr, ops.handle, ops.status);
127 // Used for unmapping
128 unmap_ops.host_addr = (unsigned long)(v_start->addr);
129 unmap_ops.handle = ops.handle;
130 /* printk("\nBytes in page ");
131 for(i=0;i<=10;i++) {
132 printk("%c", ((char*)(v_start->addr))[i]);
133 } */
134 sring = (as_sring_t*)v_start->addr;
135 BACK_RING_INIT(&info.ring, sring, PAGE_SIZE);
136 /* Seetup an event channel to the frontend */
137 err = bind_interdomain_evtchn_to_irqhandler(info.remoteDomain,
138 info.evtchn, as_int, 0, "dom0-backend", &info);
139 if (err < 0) {
140 printk("\nxen: dom0: init_module failed binding to evtchn !");
141 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
142 &unmap_ops, 1);
143 return -EFAULT;
144 }
145 info.irq = err;
146 printk("\nxen: dom0: end init_module: int = %d", info.irq);
147 return 0;
148 }
149 void cleanup_module(void) {
150 int ret;
151 printk("\nxen: dom0: cleanup_module");
152 // Unmap foreign frames
153 // ops.handle points to the pages that were initially mapped. Set in the
154 // __init() function
155 //ops.host_addr ponts to the heap where the pages were mapped
156 ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unmap_ops, 1);
157 if (ret == 0) {
158 printk(" cleanup_module: unmapped shared frame");
159 } else {
160 printk(" cleanup_module: unmapped shared frame failed");
161 }
162 printk("...\n");
163 }
164 MODULE_LICENSE("GPL");
165