block.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731
  1. /*
  2. * Copyright (C) 2006-2009, 2011 Free Software Foundation
  3. *
  4. * This program is free software ; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation ; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY ; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with the program ; if not, write to the Free Software
  16. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. */
  18. #include <sys/types.h>
  19. #include <mach/mig_errors.h>
  20. #include <kern/kalloc.h>
  21. #include <ipc/ipc_port.h>
  22. #include <ipc/ipc_space.h>
  23. #include <vm/vm_kern.h>
  24. #include <vm/vm_user.h>
  25. #include <device/device_types.h>
  26. #include <device/device_port.h>
  27. #include <device/disk_status.h>
  28. #include <device/device_reply.user.h>
  29. #include <device/device_emul.h>
  30. #include <device/ds_routines.h>
  31. #include <xen/public/io/blkif.h>
  32. #include <xen/evt.h>
  33. #include <string.h>
  34. #include <util/atoi.h>
  35. #include "store.h"
  36. #include "block.h"
  37. #include "grant.h"
  38. #include "ring.h"
  39. #include "xen.h"
  40. /* Hypervisor part */
  41. struct block_data {
  42. struct device device;
  43. char *name;
  44. int open_count;
  45. char *backend;
  46. domid_t domid;
  47. char *vbd;
  48. int handle;
  49. unsigned info;
  50. dev_mode_t mode;
  51. unsigned sector_size;
  52. unsigned long nr_sectors;
  53. ipc_port_t port;
  54. blkif_front_ring_t ring;
  55. evtchn_port_t evt;
  56. simple_lock_data_t lock;
  57. simple_lock_data_t pushlock;
  58. };
  59. static int n_vbds;
  60. static struct block_data *vbd_data;
  61. struct device_emulation_ops hyp_block_emulation_ops;
  62. static void hyp_block_intr(int unit) {
  63. struct block_data *bd = &vbd_data[unit];
  64. blkif_response_t *rsp;
  65. int more;
  66. io_return_t *err;
  67. simple_lock(&bd->lock);
  68. more = RING_HAS_UNCONSUMED_RESPONSES(&bd->ring);
  69. while (more) {
  70. rmb(); /* make sure we see responses */
  71. rsp = RING_GET_RESPONSE(&bd->ring, bd->ring.rsp_cons++);
  72. err = (void *) (unsigned long) rsp->id;
  73. switch (rsp->status) {
  74. case BLKIF_RSP_ERROR:
  75. *err = D_IO_ERROR;
  76. break;
  77. case BLKIF_RSP_OKAY:
  78. break;
  79. default:
  80. printf("Unrecognized blkif status %d\n", rsp->status);
  81. goto drop;
  82. }
  83. thread_wakeup(err);
  84. drop:
  85. thread_wakeup_one(bd);
  86. RING_FINAL_CHECK_FOR_RESPONSES(&bd->ring, more);
  87. }
  88. simple_unlock(&bd->lock);
  89. }
  90. #define VBD_PATH "device/vbd"
  91. void hyp_block_init(void) {
  92. char **vbds, **vbd;
  93. char *c;
  94. int i, disk, partition;
  95. int n;
  96. int grant;
  97. char port_name[10];
  98. char *prefix;
  99. char device_name[32];
  100. domid_t domid;
  101. evtchn_port_t evt;
  102. hyp_store_transaction_t t;
  103. vm_offset_t addr;
  104. struct block_data *bd;
  105. blkif_sring_t *ring;
  106. vbds = hyp_store_ls(0, 1, VBD_PATH);
  107. if (!vbds) {
  108. printf("hd: No block device (%s). Hoping you don't need any\n", hyp_store_error);
  109. n_vbds = 0;
  110. return;
  111. }
  112. n = 0;
  113. for (vbd = vbds; *vbd; vbd++)
  114. n++;
  115. vbd_data = (void*) kalloc(n * sizeof(*vbd_data));
  116. if (!vbd_data) {
  117. printf("hd: No memory room for VBD\n");
  118. n_vbds = 0;
  119. return;
  120. }
  121. n_vbds = n;
  122. for (n = 0; n < n_vbds; n++) {
  123. bd = &vbd_data[n];
  124. mach_atoi((u_char *) vbds[n], &bd->handle);
  125. if (bd->handle == MACH_ATOI_DEFAULT)
  126. continue;
  127. bd->open_count = -2;
  128. bd->vbd = vbds[n];
  129. /* Get virtual number. */
  130. i = hyp_store_read_int(0, 5, VBD_PATH, "/", vbds[n], "/", "virtual-device");
  131. if (i == -1)
  132. panic("hd: couldn't virtual device of VBD %s\n",vbds[n]);
  133. if ((i >> 28) == 1) {
  134. /* xvd, new format */
  135. prefix = "xvd";
  136. disk = (i >> 8) & ((1 << 20) - 1);
  137. partition = i & ((1 << 8) - 1);
  138. } else if ((i >> 8) == 202) {
  139. /* xvd, old format */
  140. prefix = "xvd";
  141. disk = (i >> 4) & ((1 << 4) - 1);
  142. partition = i & ((1 << 4) - 1);
  143. } else if ((i >> 8) == 8) {
  144. /* SCSI */
  145. prefix = "sd";
  146. disk = (i >> 4) & ((1 << 4) - 1);
  147. partition = i & ((1 << 4) - 1);
  148. } else if ((i >> 8) == 3) {
  149. /* IDE primary */
  150. prefix = "hd";
  151. disk = (i >> 6) & ((1 << 2) - 1);
  152. partition = i & ((1 << 6) - 1);
  153. } else if ((i >> 8) == 22) {
  154. /* IDE secondary */
  155. prefix = "hd";
  156. disk = ((i >> 6) & ((1 << 2) - 1)) + 2;
  157. partition = i & ((1 << 6) - 1);
  158. } else if ((i >> 8) == 33) {
  159. /* IDE 3 */
  160. prefix = "hd";
  161. disk = ((i >> 6) & ((1 << 2) - 1)) + 4;
  162. partition = i & ((1 << 6) - 1);
  163. } else if ((i >> 8) == 34) {
  164. /* IDE 4 */
  165. prefix = "hd";
  166. disk = ((i >> 6) & ((1 << 2) - 1)) + 6;
  167. partition = i & ((1 << 6) - 1);
  168. } else if ((i >> 8) == 56) {
  169. /* IDE 5 */
  170. prefix = "hd";
  171. disk = ((i >> 6) & ((1 << 2) - 1)) + 8;
  172. partition = i & ((1 << 6) - 1);
  173. } else if ((i >> 8) == 57) {
  174. /* IDE 6 */
  175. prefix = "hd";
  176. disk = ((i >> 6) & ((1 << 2) - 1)) + 10;
  177. partition = i & ((1 << 6) - 1);
  178. } else if ((i >> 8) == 88) {
  179. /* IDE 7 */
  180. prefix = "hd";
  181. disk = ((i >> 6) & ((1 << 2) - 1)) + 12;
  182. partition = i & ((1 << 6) - 1);
  183. } else if ((i >> 8) == 89) {
  184. /* IDE 8 */
  185. prefix = "hd";
  186. disk = ((i >> 6) & ((1 << 2) - 1)) + 14;
  187. partition = i & ((1 << 6) - 1);
  188. } else if ((i >> 8) == 90) {
  189. /* IDE 9 */
  190. prefix = "hd";
  191. disk = ((i >> 6) & ((1 << 2) - 1)) + 16;
  192. partition = i & ((1 << 6) - 1);
  193. } else if ((i >> 8) == 91) {
  194. /* IDE 10 */
  195. prefix = "hd";
  196. disk = ((i >> 6) & ((1 << 2) - 1)) + 18;
  197. partition = i & ((1 << 6) - 1);
  198. } else {
  199. printf("unsupported VBD number %d\n", i);
  200. continue;
  201. }
  202. if (partition)
  203. sprintf(device_name, "%s%ds%d", prefix, disk, partition);
  204. else
  205. sprintf(device_name, "%s%d", prefix, disk);
  206. bd->name = (char*) kalloc(strlen(device_name) + 1);
  207. strcpy(bd->name, device_name);
  208. /* Get domain id of backend driver. */
  209. i = hyp_store_read_int(0, 5, VBD_PATH, "/", vbds[n], "/", "backend-id");
  210. if (i == -1)
  211. panic("%s: couldn't read backend domid (%s)", device_name, hyp_store_error);
  212. bd->domid = domid = i;
  213. do {
  214. t = hyp_store_transaction_start();
  215. /* Get a page for ring */
  216. if ((addr = vm_page_grab_phys_addr()) == -1)
  217. panic("%s: couldn't allocate space for store ring\n", device_name);
  218. ring = (void*) phystokv(addr);
  219. SHARED_RING_INIT(ring);
  220. FRONT_RING_INIT(&bd->ring, ring, PAGE_SIZE);
  221. grant = hyp_grant_give(domid, atop(addr), 0);
  222. /* and give it to backend. */
  223. i = sprintf(port_name, "%d", grant);
  224. c = hyp_store_write(t, port_name, 5, VBD_PATH, "/", vbds[n], "/", "ring-ref");
  225. if (!c)
  226. panic("%s: couldn't store ring reference (%s)", device_name, hyp_store_error);
  227. kfree((vm_offset_t) c, strlen(c)+1);
  228. /* Allocate an event channel and give it to backend. */
  229. bd->evt = evt = hyp_event_channel_alloc(domid);
  230. hyp_evt_handler(evt, hyp_block_intr, n, SPL7);
  231. i = sprintf(port_name, "%u", evt);
  232. c = hyp_store_write(t, port_name, 5, VBD_PATH, "/", vbds[n], "/", "event-channel");
  233. if (!c)
  234. panic("%s: couldn't store event channel (%s)", device_name, hyp_store_error);
  235. kfree((vm_offset_t) c, strlen(c)+1);
  236. c = hyp_store_write(t, hyp_store_state_initialized, 5, VBD_PATH, "/", vbds[n], "/", "state");
  237. if (!c)
  238. panic("%s: couldn't store state (%s)", device_name, hyp_store_error);
  239. kfree((vm_offset_t) c, strlen(c)+1);
  240. } while (!hyp_store_transaction_stop(t));
  241. /* TODO randomly wait? */
  242. c = hyp_store_read(0, 5, VBD_PATH, "/", vbds[n], "/", "backend");
  243. if (!c)
  244. panic("%s: couldn't get path to backend (%s)", device_name, hyp_store_error);
  245. bd->backend = c;
  246. while(1) {
  247. i = hyp_store_read_int(0, 3, bd->backend, "/", "state");
  248. if (i == MACH_ATOI_DEFAULT)
  249. panic("can't read state from %s", bd->backend);
  250. if (i == XenbusStateConnected)
  251. break;
  252. hyp_yield();
  253. }
  254. i = hyp_store_read_int(0, 3, bd->backend, "/", "sectors");
  255. if (i == -1)
  256. panic("%s: couldn't get number of sectors (%s)", device_name, hyp_store_error);
  257. bd->nr_sectors = i;
  258. i = hyp_store_read_int(0, 3, bd->backend, "/", "sector-size");
  259. if (i == -1)
  260. panic("%s: couldn't get sector size (%s)", device_name, hyp_store_error);
  261. if (i & ~(2*(i-1)+1))
  262. panic("sector size %d is not a power of 2\n", i);
  263. if (i > PAGE_SIZE || PAGE_SIZE % i != 0)
  264. panic("%s: couldn't handle sector size %d with pages of size %d\n", device_name, i, PAGE_SIZE);
  265. bd->sector_size = i;
  266. i = hyp_store_read_int(0, 3, bd->backend, "/", "info");
  267. if (i == -1)
  268. panic("%s: couldn't get info (%s)", device_name, hyp_store_error);
  269. bd->info = i;
  270. c = hyp_store_read(0, 3, bd->backend, "/", "mode");
  271. if (!c)
  272. panic("%s: couldn't get backend's mode (%s)", device_name, hyp_store_error);
  273. if ((c[0] == 'w') && !(bd->info & VDISK_READONLY))
  274. bd->mode = D_READ|D_WRITE;
  275. else
  276. bd->mode = D_READ;
  277. c = hyp_store_read(0, 3, bd->backend, "/", "params");
  278. if (!c)
  279. panic("%s: couldn't get backend's real device (%s)", device_name, hyp_store_error);
  280. /* TODO: change suffix */
  281. printf("%s: dom%d's VBD %s (%s,%c%s) %ldMB\n", device_name, domid,
  282. vbds[n], c, bd->mode & D_WRITE ? 'w' : 'r',
  283. bd->info & VDISK_CDROM ? ", cdrom" : "",
  284. bd->nr_sectors / ((1<<20) / 512));
  285. kfree((vm_offset_t) c, strlen(c)+1);
  286. c = hyp_store_write(0, hyp_store_state_connected, 5, VBD_PATH, "/", bd->vbd, "/", "state");
  287. if (!c)
  288. panic("couldn't store state for %s (%s)", device_name, hyp_store_error);
  289. kfree((vm_offset_t) c, strlen(c)+1);
  290. bd->open_count = -1;
  291. bd->device.emul_ops = &hyp_block_emulation_ops;
  292. bd->device.emul_data = bd;
  293. simple_lock_init(&bd->lock);
  294. simple_lock_init(&bd->pushlock);
  295. }
  296. }
  297. static ipc_port_t
  298. dev_to_port(void *d)
  299. {
  300. struct block_data *b = d;
  301. if (!d)
  302. return IP_NULL;
  303. return ipc_port_make_send(b->port);
  304. }
  305. static int
  306. device_close(void *devp)
  307. {
  308. struct block_data *bd = devp;
  309. if (--bd->open_count < 0)
  310. panic("too many closes on %s", bd->name);
  311. printf("close, %s count %d\n", bd->name, bd->open_count);
  312. if (bd->open_count)
  313. return 0;
  314. ipc_kobject_set(bd->port, IKO_NULL, IKOT_NONE);
  315. ipc_port_dealloc_kernel(bd->port);
  316. return 0;
  317. }
  318. static io_return_t
  319. device_open (ipc_port_t reply_port, mach_msg_type_name_t reply_port_type,
  320. dev_mode_t mode, char *name, device_t *devp /* out */)
  321. {
  322. int i;
  323. ipc_port_t port, notify;
  324. struct block_data *bd;
  325. for (i = 0; i < n_vbds; i++)
  326. if (!strcmp(name, vbd_data[i].name))
  327. break;
  328. if (i == n_vbds)
  329. return D_NO_SUCH_DEVICE;
  330. bd = &vbd_data[i];
  331. if (bd->open_count == -2)
  332. /* couldn't be initialized */
  333. return D_NO_SUCH_DEVICE;
  334. if ((mode & D_WRITE) && !(bd->mode & D_WRITE))
  335. return D_READ_ONLY;
  336. if (bd->open_count >= 0) {
  337. *devp = &bd->device ;
  338. bd->open_count++ ;
  339. printf("re-open, %s count %d\n", bd->name, bd->open_count);
  340. return D_SUCCESS;
  341. }
  342. bd->open_count = 1;
  343. printf("%s count %d\n", bd->name, bd->open_count);
  344. port = ipc_port_alloc_kernel();
  345. if (port == IP_NULL) {
  346. device_close(bd);
  347. return KERN_RESOURCE_SHORTAGE;
  348. }
  349. bd->port = port;
  350. *devp = &bd->device;
  351. ipc_kobject_set (port, (ipc_kobject_t) &bd->device, IKOT_DEVICE);
  352. notify = ipc_port_make_sonce (bd->port);
  353. ip_lock (bd->port);
  354. ipc_port_nsrequest (bd->port, 1, notify, &notify);
  355. assert (notify == IP_NULL);
  356. if (IP_VALID (reply_port))
  357. ds_device_open_reply (reply_port, reply_port_type, D_SUCCESS, port);
  358. else
  359. device_close(bd);
  360. return MIG_NO_REPLY;
  361. }
  362. static io_return_t
  363. device_read (void *d, ipc_port_t reply_port,
  364. mach_msg_type_name_t reply_port_type, dev_mode_t mode,
  365. recnum_t bn, int count, io_buf_ptr_t *data,
  366. unsigned *bytes_read)
  367. {
  368. int resid, amt;
  369. io_return_t err = 0;
  370. vm_page_t pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  371. grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  372. int nbpages;
  373. vm_map_copy_t copy;
  374. vm_offset_t offset, alloc_offset, o;
  375. vm_object_t object;
  376. vm_page_t m;
  377. vm_size_t len, size;
  378. struct block_data *bd = d;
  379. struct blkif_request *req;
  380. *data = 0;
  381. *bytes_read = 0;
  382. if (count < 0)
  383. return D_INVALID_SIZE;
  384. if (count == 0)
  385. return 0;
  386. /* Allocate an object to hold the data. */
  387. size = round_page (count);
  388. object = vm_object_allocate (size);
  389. if (! object)
  390. {
  391. err = D_NO_MEMORY;
  392. goto out;
  393. }
  394. alloc_offset = offset = 0;
  395. resid = count;
  396. while (resid && !err)
  397. {
  398. unsigned reqn;
  399. int i;
  400. int last_sect;
  401. nbpages = 0;
  402. /* Determine size of I/O this time around. */
  403. len = round_page(offset + resid) - offset;
  404. if (len > PAGE_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST)
  405. len = PAGE_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST;
  406. /* Allocate pages. */
  407. while (alloc_offset < offset + len)
  408. {
  409. while ((m = vm_page_grab ()) == 0)
  410. VM_PAGE_WAIT (0);
  411. assert (! m->active && ! m->inactive);
  412. m->busy = TRUE;
  413. assert(nbpages < BLKIF_MAX_SEGMENTS_PER_REQUEST);
  414. pages[nbpages++] = m;
  415. alloc_offset += PAGE_SIZE;
  416. }
  417. /* Do the read. */
  418. amt = len;
  419. if (amt > resid)
  420. amt = resid;
  421. /* allocate a request */
  422. spl_t spl = splsched();
  423. while(1) {
  424. simple_lock(&bd->lock);
  425. if (!RING_FULL(&bd->ring))
  426. break;
  427. thread_sleep(bd, &bd->lock, FALSE);
  428. }
  429. mb();
  430. reqn = bd->ring.req_prod_pvt++;;
  431. simple_lock(&bd->pushlock);
  432. simple_unlock(&bd->lock);
  433. (void) splx(spl);
  434. req = RING_GET_REQUEST(&bd->ring, reqn);
  435. req->operation = BLKIF_OP_READ;
  436. req->nr_segments = nbpages;
  437. req->handle = bd->handle;
  438. req->id = (uint64_t) (unsigned long) &err; /* pointer on the stack */
  439. req->sector_number = bn + offset / 512;
  440. for (i = 0; i < nbpages; i++) {
  441. req->seg[i].gref = gref[i] = hyp_grant_give(bd->domid, atop(pages[i]->phys_addr), 0);
  442. req->seg[i].first_sect = 0;
  443. req->seg[i].last_sect = PAGE_SIZE/512 - 1;
  444. }
  445. last_sect = ((amt - 1) & PAGE_MASK) / 512;
  446. req->seg[nbpages-1].last_sect = last_sect;
  447. memset((void*) phystokv(pages[nbpages-1]->phys_addr
  448. + (last_sect + 1) * 512),
  449. 0, PAGE_SIZE - (last_sect + 1) * 512);
  450. /* no need for a lock: as long as the request is not pushed, the event won't be triggered */
  451. assert_wait((event_t) &err, FALSE);
  452. int notify;
  453. wmb(); /* make sure it sees requests */
  454. RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bd->ring, notify);
  455. if (notify)
  456. hyp_event_channel_send(bd->evt);
  457. simple_unlock(&bd->pushlock);
  458. thread_block(NULL);
  459. if (err)
  460. printf("error reading %d bytes at sector %ld\n", amt,
  461. bn + offset / 512);
  462. for (i = 0; i < nbpages; i++)
  463. hyp_grant_takeback(gref[i]);
  464. /* Compute number of pages to insert in object. */
  465. o = offset;
  466. resid -= amt;
  467. if (resid == 0)
  468. offset = o + len;
  469. else
  470. offset += amt;
  471. /* Add pages to the object. */
  472. vm_object_lock (object);
  473. for (i = 0; i < nbpages; i++)
  474. {
  475. m = pages[i];
  476. assert (m->busy);
  477. vm_page_lock_queues ();
  478. PAGE_WAKEUP_DONE (m);
  479. m->dirty = TRUE;
  480. vm_page_insert (m, object, o);
  481. vm_page_unlock_queues ();
  482. o += PAGE_SIZE;
  483. }
  484. vm_object_unlock (object);
  485. }
  486. out:
  487. if (! err)
  488. err = vm_map_copyin_object (object, 0, round_page (count), &copy);
  489. if (! err)
  490. {
  491. *data = (io_buf_ptr_t) copy;
  492. *bytes_read = count - resid;
  493. }
  494. else
  495. vm_object_deallocate (object);
  496. return err;
  497. }
  498. static io_return_t
  499. device_write(void *d, ipc_port_t reply_port,
  500. mach_msg_type_name_t reply_port_type, dev_mode_t mode,
  501. recnum_t bn, io_buf_ptr_t data, unsigned int count,
  502. int *bytes_written)
  503. {
  504. io_return_t err = 0;
  505. vm_map_copy_t copy = (vm_map_copy_t) data;
  506. vm_offset_t buffer = 0;
  507. char *map_data;
  508. vm_offset_t map_addr;
  509. vm_size_t map_size;
  510. unsigned copy_npages = atop(round_page(count));
  511. vm_offset_t phys_addrs[copy_npages];
  512. struct block_data *bd = d;
  513. blkif_request_t *req;
  514. grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  515. unsigned reqn, size;
  516. unsigned i, nbpages, j;
  517. kern_return_t kr;
  518. if (!(bd->mode & D_WRITE))
  519. return D_READ_ONLY;
  520. if (count == 0) {
  521. vm_map_copy_discard(copy);
  522. return 0;
  523. }
  524. if (count % bd->sector_size)
  525. return D_INVALID_SIZE;
  526. if (count > copy->size)
  527. return D_INVALID_SIZE;
  528. /* XXX The underlying physical pages of the mapping could be highmem,
  529. for which drivers require the use of a bounce buffer. */
  530. kr = kmem_alloc(device_io_map, &buffer, count);
  531. if (kr != KERN_SUCCESS)
  532. return kr;
  533. kr = kmem_io_map_copyout(device_io_map, (vm_offset_t *)&map_data,
  534. &map_addr, &map_size, copy, count);
  535. if (kr != KERN_SUCCESS) {
  536. kmem_free(device_io_map, buffer, count);
  537. return kr;
  538. }
  539. memcpy((void *)buffer, map_data, count);
  540. kmem_io_map_deallocate(device_io_map, map_addr, map_size);
  541. for (i = 0; i < copy_npages; i++)
  542. phys_addrs[i] = kvtophys(buffer + ptoa(i));
  543. for (i=0; i<copy_npages; i+=nbpages) {
  544. nbpages = BLKIF_MAX_SEGMENTS_PER_REQUEST;
  545. if (nbpages > copy_npages-i)
  546. nbpages = copy_npages-i;
  547. /* allocate a request */
  548. spl_t spl = splsched();
  549. while(1) {
  550. simple_lock(&bd->lock);
  551. if (!RING_FULL(&bd->ring))
  552. break;
  553. thread_sleep(bd, &bd->lock, FALSE);
  554. }
  555. mb();
  556. reqn = bd->ring.req_prod_pvt++;;
  557. simple_lock(&bd->pushlock);
  558. simple_unlock(&bd->lock);
  559. (void) splx(spl);
  560. req = RING_GET_REQUEST(&bd->ring, reqn);
  561. req->operation = BLKIF_OP_WRITE;
  562. req->nr_segments = nbpages;
  563. req->handle = bd->handle;
  564. req->id = (uint64_t) (unsigned long) &err; /* pointer on the stack */
  565. req->sector_number = bn + i*PAGE_SIZE / 512;
  566. for (j = 0; j < nbpages; j++) {
  567. req->seg[j].gref = gref[j] = hyp_grant_give(bd->domid, atop(phys_addrs[i + j]), 1);
  568. req->seg[j].first_sect = 0;
  569. size = PAGE_SIZE;
  570. if ((i + j + 1) * PAGE_SIZE > count)
  571. size = count - (i + j) * PAGE_SIZE;
  572. req->seg[j].last_sect = size/512 - 1;
  573. }
  574. /* no need for a lock: as long as the request is not pushed, the event won't be triggered */
  575. assert_wait((event_t) &err, FALSE);
  576. int notify;
  577. wmb(); /* make sure it sees requests */
  578. RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bd->ring, notify);
  579. if (notify)
  580. hyp_event_channel_send(bd->evt);
  581. simple_unlock(&bd->pushlock);
  582. thread_block(NULL);
  583. for (j = 0; j < nbpages; j++)
  584. hyp_grant_takeback(gref[j]);
  585. if (err) {
  586. printf("error writing %u bytes at sector %d\n", count, bn);
  587. break;
  588. }
  589. }
  590. if (buffer)
  591. kmem_free(device_io_map, buffer, count);
  592. vm_map_copy_discard (copy);
  593. if (!err)
  594. *bytes_written = count;
  595. if (IP_VALID(reply_port))
  596. ds_device_write_reply (reply_port, reply_port_type, err, count);
  597. return MIG_NO_REPLY;
  598. }
  599. static io_return_t
  600. device_get_status(void *d, dev_flavor_t flavor, dev_status_t status,
  601. mach_msg_type_number_t *status_count)
  602. {
  603. struct block_data *bd = d;
  604. switch (flavor)
  605. {
  606. case DEV_GET_SIZE:
  607. status[DEV_GET_SIZE_DEVICE_SIZE] = (unsigned long long) bd->nr_sectors * 512;
  608. status[DEV_GET_SIZE_RECORD_SIZE] = bd->sector_size;
  609. *status_count = DEV_GET_SIZE_COUNT;
  610. break;
  611. case DEV_GET_RECORDS:
  612. status[DEV_GET_RECORDS_DEVICE_RECORDS] = ((unsigned long long) bd->nr_sectors * 512) / bd->sector_size;
  613. status[DEV_GET_RECORDS_RECORD_SIZE] = bd->sector_size;
  614. *status_count = DEV_GET_RECORDS_COUNT;
  615. break;
  616. default:
  617. printf("TODO: block_%s(%d)\n", __func__, flavor);
  618. return D_INVALID_OPERATION;
  619. }
  620. return D_SUCCESS;
  621. }
  622. struct device_emulation_ops hyp_block_emulation_ops = {
  623. NULL, /* dereference */
  624. NULL, /* deallocate */
  625. dev_to_port,
  626. device_open,
  627. device_close,
  628. device_write,
  629. NULL, /* write_inband */
  630. device_read,
  631. NULL, /* read_inband */
  632. NULL, /* set_status */
  633. device_get_status,
  634. NULL, /* set_filter */
  635. NULL, /* map */
  636. NULL, /* no_senders */
  637. NULL, /* write_trap */
  638. NULL, /* writev_trap */
  639. };