这篇文章会用-小段代码描述以下的内容:


- 两个Domain之间的内存页共享, Doamin0和DomainU


- 在这个页中设置一个共享ring


- 为共享ring设置event channel


- 在Dom0和DomU之间来回传递一些信息


介绍

在xen中的虚拟机被称为Domain. Domain0(Dom0)是特别的并拥有与正实设备交互的设备驱动, 例如网卡.这个驱动被称为后端驱动. 在我们的例子中这被称为后端domain.

在被称为 DomainU(DomU)的用户Domain有一个相应的前端驱动, 其是虚拟设备的接口,为和真实设备通信在DomU中前端驱动要连接后端驱动. 在我们以下的例子中, 这个DomU被称为前端Domain.


Xen为共享Domain间的内存提供了授权表(Grant Tables). 设备驱动使用授权表工作. 每个Domain有它自己的授权表, 并与xen共享. 在这个表中的条目由授权引用(grant references)所标识. 授权引用在Domain间传递, 且所引用的共享页由授权表所指向, domain也设置一个共享环结构(ring structure), 其用于在domain间有郊共享数据.


对于分离前/后端驱动, 前端分配一个用于共享通信 ring 的内存页, 授权它给后端domain, 并放授权引用到xenstore,


这样后端就能 map 这个页. 有共享ring这个页是一个主页, 用于传递更多的授权引用. 共享页由块设备和其它同步接收数


据的设备所使用, 异步接收数据的网络设备, 使用已知的 page flipping 方法, 这个页的所有权在Domain 间转移.


以上图展示了这个共享ring, 这个ring所有的公有和私有指针. "Request Producer" 和 "Responser Producer"是两个公共变量, 其会被共享这个页面的两者都看到. "Response Consumer" 是一个在前端(由前端设备所维护, 即DomU) ring 结构中指针. "Request Consumer" 是一个在后端(由后端设备维护, 即Dom0) ring 结构中的指针.


API 使用


在DomU kerenl中的前端驱动广告一个页面用于共享, 这通过 hypervisor 函数调用("hypercall"), (gnttab_grant_foreign_access 系统调用)完成.  hypercall 通知 hypervisor 其它 domain 允许访问这个页. DomU然后传递引用 ID 给远端的 Domain 它是"授权"可访问的. 在我们的代码中, 这个访问授权给了 Dom0. 一但远端 domain 完成操作, 那么本地 domain 应调用gnttab_end_foreign_access删除授权.



网络设备和类似接收异步接收数据的其它设备. 使用已知的 page flipping 方法. 当 页翻转时, 在本地 domain kernel中的驱动会广告一个用于转移的页, 经由 gnttab_grant_foregin_transfer 调用完成. 这个调用会通知 hypervisor其它domain可以接收这个页. 转移给远程domain的这个本地domain执行 free page. (经由producer/consumer ring).


DomU Code:

1 /* This file is run in the DomU Kernel.
  2  * It grants a page with a shared ring structure on it to the Dom0.
  3  * The grant reference and Event Channel is passed manually. Should be done via XenStore
  4  * or some other out of band mechanism.
  5  * Compile the code using
  6  * make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
  7  * Run it as follows
  8  * insmod xen-eg.ko
  9  * Pick up the grant ref and event channel that comes out in /var/log/messages and pass as
 10  * insmod parameters in the Dom0 module.
 11  * Run the Dom0 program as follows
 12  * insmod dom0.ko domid=<domid> gref=<gref>
 13  * <domid> is the domainid of DomU as seen in "xm list"
 14  * <gref> is grant refefrence when xen.ko is insmod
 15  */
 16 #include <linux/module.h>
 17 #include <linux/kernel.h>
 18 #include <linux/types.h>
 19 #include <xen/interface/xen.h>
 20 #include <xen/interface/io/ring.h> 
 21 #include <xen/grant_table.h>
 22 #include <asm/pgtable.h>
 23 #include <asm/sync_bitops.h>
 24 #include <xen/gnttab.h>
 25 #include <xen/evtchn.h>
 26 #include <asm/uaccess.h>
 27 #include <linux/proc_fs.h>
 28 int page;
 29 struct as_request {
 30     unsigned int id; /* private guest value echoed in resp */
 31     unsigned int status;
 32     unsigned int operation;
 33 };
 34 struct as_response {
 35     unsigned int  id; /* copied from request */
 36     unsigned int  status;
 37     unsigned int  operation; /* copied from request */
 38 };
 39 // The following makes the as_sring, as_back_ring, as_back_ring "types"
 40 DEFINE_RING_TYPES(as, struct as_request, struct as_response);
 41 struct info_t {
 42     struct as_front_ring   ring;
 43     grant_ref_t gref;
 44     int irq;
 45     int port;
 46 } info;
 47 #define DOM0_ID 0
 48 // Related the proc fs entries
 49 static struct proc_dir_entry *proc_dir = NULL;
 50 static struct proc_dir_entry *proc_file = NULL;
 51 char proc_data[20];
 52 /* Send an request via the shared ring to Dom0, following by an INT */
 53 int send_request_to_dom0() {
 54     struct as_request *ring_req;
 55     int notify;
 56     static int reqid=9;
 57     /* Write a request into the ring and update the req-prod pointer */
 58     ring_req = RING_GET_REQUEST(&(info.ring), info.ring.req_prod_pvt);
 59     ring_req->id = reqid;
 60     ring_req->operation = reqid;
 61     ring_req->status = reqid;
 62     printk("\nxen:DomU: Fill in IDX-%d, with id=%d, op=%d, st=%d",
 63            info.ring.req_prod_pvt, ring_req->id, ring_req->operation,
 64            ring_req->status);
 65     reqid++;
 66     info.ring.req_prod_pvt += 1;
 67     // Send a reqest to backend followed by an int if needed
 68     RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&(info.ring), notify);
 69     if (notify) {
 70         printk("\nxen:DomU: Sent a req to Dom0");
 71         notify_remote_via_irq(info.irq);
 72     } else {
 73         printk("\nxen:DomU: No notify req to Dom0");
 74         notify_remote_via_irq(info.irq);
 75     }
 76     printk("...\n");
 77     return 0;
 78 }
 79 ssize_t file_write (struct file *filp, const char __user *buff,
 80                     unsigned long len, void *data) {
 81     int value;
 82     printk("\nxen:domU: file_write %lu bytes", len);
 83     if (copy_from_user(&proc_data[0], buff, len))
 84         return -EFAULT;
 85     proc_data[len] = '\x0';
 86     // printk(" ,%s", &proc_data[0]);
 87     value = simple_strtol(proc_data, 0, 10);
 88     switch(value) {
 89         case 1:
 90             send_request_to_dom0();
 91             printk(" ,value = %d", value);
 92             break;
 93         default:
 94             printk(" ,value not recognized !");
 95     }
 96     return len;
 97 }
 98 int file_read (char* page, char**start, off_t off,
 99                int count, int *eof, void *data) {
100     sprintf(page, "%s", proc_data);
101     return strlen(page);
102 }
103  
104 /* We create a /proc/demo/file entry. When we write a "1" ino this file once
105  * the module is loaded, the file_write function() above is called and this
106  * sends a requesst on the shared ring to the Dom0. This way we test the
107  * event channel and shared ring routines.
108  */
109 int create_procfs_entry() {
110     int ret = 0;
111     proc_dir = proc_mkdir("demo", NULL);
112     if (!proc_dir) {
113         printk("\nxen:domU Could not create demo entry in procfs");
114         ret = -EAGAIN;
115         return ret;
116     }
117     proc_file = create_proc_entry("file", 0600, proc_dir);
118     if (proc_file) {
119         proc_file->read_proc = file_read;
120         proc_file->write_proc = file_write;
121         proc_file->owner = THIS_MODULE;
122     } else {
123         printk("\nxen:domU Could not create /proc/demo/file");
124         ret = -EAGAIN;
125         return ret;
126     }
127     return ret;
128 }
129 /* Our interrupt handler for event channel that we set up */
130 static irqreturn_t as_int (int irq, void *dev_id) {
131     struct as_response *ring_resp;
132     RING_IDX i, rp;
133  
134     printk("\nxen:DomU: as_int called");
135 again:
136     rp = info.ring.sring->rsp_prod;
137     printk("\nxen:DomU: ring pointers %d to %d", info.ring.rsp_cons, rp);
138     for(i=info.ring.rsp_cons; i != rp; i++) {
139         unsigned long id;
140         // what did we get from Dom0
141         ring_resp = RING_GET_RESPONSE(&(info.ring), i);
142         printk("\nxen:DomU: Recvd in IDX-%d, with id=%d, op=%d, st=%d",
143                i, ring_resp->id, ring_resp->operation, ring_resp->status);
144         id = ring_resp->id;
145         switch(ring_resp->operation) {
146             case 0:
147                 printk("\nxen:DomU: operation:0");
148                 break;
149             default:
150                 break;
151         }
152     }
153     info.ring.rsp_cons = i;
154     if (i != info.ring.req_prod_pvt) {
155         int more_to_do;
156         RING_FINAL_CHECK_FOR_RESPONSES(&info.ring, more_to_do);
157         if(more_to_do)
158             goto again;
159     } else
160         info.ring.sring->rsp_event = i+1;
161     return IRQ_HANDLED;
162 }
163 int init_module(void) {
164     int mfn;
165     int err;
166     struct as_sring *sring;
167     /* Allocates and returns a pointer to the first byte of a memory area
168      * that is several physically contiguous pages long, and doesn't zero
169      * out the area.
170      * GFP_KERNEL - process may sleep
171      */
172     
173     page = __get_free_pages(GFP_KERNEL, 1);
174     if (page == 0) {
175         printk("\nxen:DomU: could not get free page");
176         return 0;
177     }
178     /* Put a shared ring structure on this page */
179     sring = (struct as_sring*) page;
180     SHARED_RING_INIT(sring);     
181     /* info.ring is the front_ring structure */
182     FRONT_RING_INIT(&(info.ring), sring, PAGE_SIZE);
183     mfn = virt_to_mfn(page);
184     /* The following grant table func is in drivers/xen/grant-table.c
185      * For shared pages, used for synchronous data, advertise a page to
186      * be shared via the hypervisor function call gnttab_grant_foreign_access.
187      * This call notifies the hypervisor that other domains are allowed to
188      * access this page.
189      * gnttab_map() has been called earlier to setup gnttable_setup_table
190      * during init phase, with a call to HYPERVISOR_grant_table_op(
191      * GNTTAB_setup_table...) and
192      * "shared" pages have been malloc'ed. This "shared" page is then used
193      * below later during the actual grant of a ref by this DOM.
194      * gnttab_grant_foreign_access()
195      * => get_free_entries
196      * gnttab_free_head - points to the ref of the head
197      * gnttab_free_count- keeps number of free refs
198      * Get a ref id by calling gnttab_entry(head)
199      * gnttab_list[entry/RPP][entry%RPP]
200      * => gnttab_grat_foreign_access_ref
201      * => update_grant_entry
202      * shared[ref].frame/domid/flags are updated
203      * "shared" above is a pointer to struct grant_entry (flags/domid/frame)
204      */
205     
206     info.gref = gnttab_grant_foreign_access(DOM0_ID, mfn, 0);
207     if (info.gref < 0) {
208         printk("\nxen: could not grant foreign access");
209         free_page((unsigned long)page);
210         return 0;
211     }
212     /* The following strcpy is commented out, but was used initally to test
213      * is the memory page is indeed shared with Dom0, when in Dom0, we do a
214      * sprintf of the same memory location and get the same characters.
215      */
216     // strcpy((char*)page, "aseem sethi");
217     /* TBD: Save gref to be sent via Xenstore to dom-0. As of now both the
218      * gref and the event channel port id is sent manually during insmod
219      * in the dom0 module.
220      */
221     printk("\n gref = %d", info.gref);
222     /* Setup an event channel to Dom0 */
223     err = bind_listening_port_to_irqhandler(DOM0_ID, as_int, 0, "xen-eg", &info);
224     if (err < 0) {
225         printk("\nxen:DomU failed to setup evtchn !");
226         gnttab_end_foreign_access(info.gref, 0, page);
227         return 0;
228     }
229     info.irq = err;
230     info.port = irq_to_evtchn_port(info.irq);
231     printk("   interupt = %d, local-port = %d", info.irq, info.port);
232     printk("....\n...");
233     create_procfs_entry();
234     return 0;
235 }
236 void cleanup_module(void) {
237     printk("\nCleanup grant ref:");
238     if (gnttab_query_foreign_access(info.gref) == 0) {
239         //Remove the grant to the page
240         printk("\n xen: No one has mapped this frame");
241         // If 3rd param is non NULL, page has to be freed
242         gnttab_end_foreign_access(info.gref, 0, page);
243         // free_pages(page,1);
244     } else {
245         printk("\n xen: Someone has mapped this frame");
246         // Guess, we still free the page, since we are rmmod-ed
247         gnttab_end_foreign_access(info.gref, 0, page);
248     }
249     /* Cleanup proc entry */
250     remove_proc_entry("file", proc_dir);
251     remove_proc_entry("demo", NULL);
252     printk("....\n...");
253 }
254 MODULE_LICENSE("GPL");
255

Dom0 Code:

1 /* This is the module in Dom0. Compile it using
  2  * make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
  3  * Change the grant_ref below in the code to the one seen in DomU before compilation.
  4  * Insmod the module using "insmod dom-.ko gref=<vaue> port=<event channel port>
  5  * <value> is taken from the dmesg output in DomU when xen.eg.ko is insmod there.
  6  * This will map the page that the DomU has shared with the Dom0.
  7  */
  8 #include <linux/module.h>
  9 #include <linux/moduleparam.h>
 10 #include <linux/kernel.h>
 11 #include <xen/interface/grant_table.h>
 12 #include <xen/interface/io/blkif.h>  // for definition of blkif_sring_t
 13 #include <xen/gnttab.h>
 14 #include <linux/vmalloc.h>
 15 #include <asm-x86/xen/hypervisor.h>
 16 #include <xen/evtchn.h>
 17 struct gnttab_map_grant_ref   ops;
 18 struct gnttab_unmap_grant_ref unmap_ops;
 19 struct as_request {
 20     unsigned int  id;           /* private guest value, echoed in resp  */
 21     unsigned int  status; 
 22     unsigned int  operation;   
 23 };
 24 struct as_response {
 25     unsigned int  id;              /* copied from request */
 26     unsigned int  status; 
 27     unsigned int  operation;       /* copied from request */
 28 };
 29 typedef struct as_request as_request_t;
 30 typedef struct as_response as_response_t;
 31 // From /include/xen/interface/io/ring.h
 32 // The following makes the as_sring, as_back_ring, as_back_ring "types"
 33 DEFINE_RING_TYPES(as, struct as_request, struct as_response);
 34 struct info_t {
 35     int irq;
 36     int gref;
 37     int remoteDomain;
 38     int evtchn;
 39     struct as_back_ring ring;
 40 } info;
 41 int gref;
 42 int port;
 43 module_param(gref, int, 0644);
 44 module_param(port, int, 0644);
 45 static irqreturn_t as_int (int irq, void *dev_id) {
 46     RING_IDX rc, rp;
 47     as_request_t req;
 48     as_response_t resp;
 49     int more_to_do, notify;
 50     // dev_id is a pointer to the info structure
 51     printk("\nxen:Dom0: as_int called with dev_id %x info=%x",
 52            (unsigned int)dev_id, (unsigned int)&info);
 53     rc = info.ring.req_cons;
 54     rp = info.ring.sring->req_prod;
 55     printk("  rc =%d rp =%d", rc, rp);
 56     while(rc!=rp) {
 57         if(RING_REQUEST_CONS_OVERFLOW(&info.ring, rc))
 58             break;
 59         // what did we get from the frontend at index rc
 60         memcpy(&req, RING_GET_REQUEST(&info.ring, rc), sizeof(req));
 61         resp.id = req.id;
 62         resp.operation = req.operation;
 63         resp.status = req.status+1; // Send back a status +1 of what was recvd
 64         printk("\nxen:Dom0: Recvd at IDX-%d: id=%d, op=%d, status=%d",
 65                rc, req.id, req.operation, req.status);
 66         // update the req-consumer
 67         info.ring.req_cons = ++rc;
 68         barrier();
 69         switch (req.operation) {
 70             case 0:
 71                 printk("\nxen:Dom0: req.operation = 0");
 72                 break;
 73             default:
 74                 printk("\nxen:Dom0: req.operation = %d", req.operation);
 75                 break;
 76         }
 77         memcpy(RING_GET_RESPONSE(&info.ring, info.ring.rsp_prod_pvt),
 78                &resp, sizeof(resp));
 79         info.ring.rsp_prod_pvt++;
 80         RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&info.ring, notify);
 81         if(info.ring.rsp_prod_pvt == info.ring.req_cons) {
 82             RING_FINAL_CHECK_FOR_REQUESTS(&info.ring, more_to_do);
 83         } else if (RING_HAS_UNCONSUMED_REQUESTS(&info.ring)) {
 84             more_to_do = 1;
 85         }
 86         if(notify) {
 87             printk("\nxen:Dom0: Send notify to DomU");
 88             notify_remote_via_irq(info.irq);
 89         }
 90     }
 91     return IRQ_HANDLED;
 92 }
 93 int init_module(void) {
 94     struct vm_struct *v_start;
 95     as_sring_t *sring;
 96     int err;
 97     info.gref = gref;
 98     info.remoteDomain = 1;
 99     info.evtchn = port;
100     printk("\nxen: dom0: init_module with gref = %d", info.gref);
101     // The following function reserves a range of kernel address space and
102     // allocates pagetables to map that range. No actual mappings are created.
103     v_start = alloc_vm_area(PAGE_SIZE);
104     if (v_start == 0) {
105         free_vm_area(v_start);
106         printk("\nxen: dom0: could not allocate page");
107         return -EFAULT;
108     }
109     /* ops struct in paramaeres
110      * host_addr, flags, ref
111      * ops struct out parameters
112      * status (zero if OK), handle (used to unmap later), dev_bus_addr
113      */
114     gnttab_set_map_op(&ops, (unsigned long)v_start->addr, GNTMAP_host_map,
115                       info.gref, info.remoteDomain); /* flags, ref, domID */
116     if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &ops, 1)) {
117         printk("\nxen: dom0: HYPERVISOR map grant ref failed");
118         return -EFAULT;
119     }
120     if (ops.status) {
121         printk("\nxen: dom0: HYPERVISOR map grant ref failed status = %d",
122                ops.status);
123         return -EFAULT;
124     }
125     printk("\nxen: dom0: shared_page = %x, handle = %x, status = %x",
126            (unsigned int)v_start->addr, ops.handle, ops.status);
127     // Used for unmapping
128     unmap_ops.host_addr = (unsigned long)(v_start->addr);
129     unmap_ops.handle = ops.handle;
130     /* printk("\nBytes in page ");
131        for(i=0;i<=10;i++) {
132        printk("%c", ((char*)(v_start->addr))[i]);
133        } */
134     sring = (as_sring_t*)v_start->addr;
135     BACK_RING_INIT(&info.ring, sring, PAGE_SIZE);
136     /* Seetup an event channel to the frontend */
137     err = bind_interdomain_evtchn_to_irqhandler(info.remoteDomain,
138                                                 info.evtchn, as_int, 0, "dom0-backend", &info);
139     if (err < 0) {
140         printk("\nxen: dom0: init_module failed binding to evtchn !");
141         err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
142                                         &unmap_ops, 1);
143         return -EFAULT;
144     }
145     info.irq = err;
146     printk("\nxen: dom0: end init_module: int = %d", info.irq);
147     return 0;
148 }
149 void cleanup_module(void) {
150     int ret;
151     printk("\nxen: dom0: cleanup_module");
152     // Unmap foreign frames
153     // ops.handle points to the pages that were initially mapped. Set in the
154     // __init() function
155     //ops.host_addr ponts to the heap where the pages were mapped
156     ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unmap_ops, 1);
157     if (ret == 0) {
158         printk(" cleanup_module: unmapped shared frame");
159     } else {
160         printk(" cleanup_module: unmapped shared frame failed");
161     }
162     printk("...\n");
163 }
164 MODULE_LICENSE("GPL");
165