Line data Source code
1 : /*
2 : * Use the io_uring of Linux (>= 5.1)
3 : *
4 : * Copyright (C) Volker Lendecke 2008
5 : * Copyright (C) Jeremy Allison 2010
6 : * Copyright (C) Stefan Metzmacher 2019
7 : *
8 : * This program is free software; you can redistribute it and/or modify
9 : * it under the terms of the GNU General Public License as published by
10 : * the Free Software Foundation; either version 2 of the License, or
11 : * (at your option) any later version.
12 : *
13 : * This program is distributed in the hope that it will be useful,
14 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 : * GNU General Public License for more details.
17 : *
18 : * You should have received a copy of the GNU General Public License
19 : * along with this program; if not, write to the Free Software
20 : * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 : */
22 :
23 : #include "replace.h"
24 :
25 : /*
26 : * liburing.h only needs a forward declaration
27 : * of struct open_how.
28 : *
29 : * If struct open_how is defined in liburing/compat.h
30 : * itself, hide it away in order to avoid conflicts
31 : * with including linux/openat2.h or defining 'struct open_how'
32 : * in libreplace.
33 : */
34 : struct open_how;
35 : #ifdef HAVE_STRUCT_OPEN_HOW_LIBURING_COMPAT_H
36 : #define open_how __ignore_liburing_compat_h_open_how
37 : #include <liburing/compat.h>
38 : #undef open_how
39 : #endif /* HAVE_STRUCT_OPEN_HOW_LIBURING_COMPAT_H */
40 :
41 : #include "includes.h"
42 : #include "system/filesys.h"
43 : #include "smbd/smbd.h"
44 : #include "smbd/globals.h"
45 : #include "lib/util/tevent_unix.h"
46 : #include "lib/util/sys_rw.h"
47 : #include "lib/util/iov_buf.h"
48 : #include "smbprofile.h"
49 : #include <liburing.h>
50 :
51 : struct vfs_io_uring_request;
52 :
53 : struct vfs_io_uring_config {
54 : struct io_uring uring;
55 : struct tevent_fd *fde;
56 : /* recursion guard. See comment above vfs_io_uring_queue_run() */
57 : bool busy;
58 : /* recursion guard. See comment above vfs_io_uring_queue_run() */
59 : bool need_retry;
60 : struct vfs_io_uring_request *queue;
61 : struct vfs_io_uring_request *pending;
62 : };
63 :
64 : struct vfs_io_uring_request {
65 : struct vfs_io_uring_request *prev, *next;
66 : struct vfs_io_uring_request **list_head;
67 : struct vfs_io_uring_config *config;
68 : struct tevent_req *req;
69 : void (*completion_fn)(struct vfs_io_uring_request *cur,
70 : const char *location);
71 : struct timespec start_time;
72 : struct timespec end_time;
73 : SMBPROFILE_BYTES_ASYNC_STATE(profile_bytes);
74 : struct io_uring_sqe sqe;
75 : struct io_uring_cqe cqe;
76 : };
77 :
78 16862 : static void vfs_io_uring_finish_req(struct vfs_io_uring_request *cur,
79 : const struct io_uring_cqe *cqe,
80 : struct timespec end_time,
81 : const char *location)
82 : {
83 : struct tevent_req *req =
84 16862 : talloc_get_type_abort(cur->req,
85 : struct tevent_req);
86 16862 : void *state = _tevent_req_data(req);
87 :
88 16862 : talloc_set_destructor(state, NULL);
89 16862 : if (cur->list_head != NULL) {
90 16862 : DLIST_REMOVE((*cur->list_head), cur);
91 16862 : cur->list_head = NULL;
92 : }
93 16862 : cur->cqe = *cqe;
94 :
95 16862 : SMBPROFILE_BYTES_ASYNC_SET_IDLE(cur->profile_bytes);
96 16862 : cur->end_time = end_time;
97 :
98 : /*
99 : * We rely on being inside the _send() function
100 : * or tevent_req_defer_callback() being called
101 : * already.
102 : */
103 16862 : cur->completion_fn(cur, location);
104 16862 : }
105 :
106 180 : static void vfs_io_uring_config_destroy(struct vfs_io_uring_config *config,
107 : int ret,
108 : const char *location)
109 : {
110 180 : struct vfs_io_uring_request *cur = NULL, *next = NULL;
111 : struct timespec start_time;
112 : struct timespec end_time;
113 180 : struct io_uring_cqe err_cqe = {
114 : .res = ret,
115 : };
116 :
117 180 : PROFILE_TIMESTAMP(&start_time);
118 :
119 180 : if (config->uring.ring_fd != -1) {
120 : /* TODO: cancel queued and pending requests */
121 180 : TALLOC_FREE(config->fde);
122 180 : io_uring_queue_exit(&config->uring);
123 180 : config->uring.ring_fd = -1;
124 : }
125 :
126 180 : PROFILE_TIMESTAMP(&end_time);
127 :
128 180 : for (cur = config->pending; cur != NULL; cur = next) {
129 0 : next = cur->next;
130 0 : err_cqe.user_data = (uintptr_t)(void *)cur;
131 0 : vfs_io_uring_finish_req(cur, &err_cqe, end_time, location);
132 : }
133 :
134 180 : for (cur = config->queue; cur != NULL; cur = next) {
135 0 : next = cur->next;
136 0 : err_cqe.user_data = (uintptr_t)(void *)cur;
137 0 : cur->start_time = start_time;
138 0 : vfs_io_uring_finish_req(cur, &err_cqe, end_time, location);
139 : }
140 180 : }
141 :
142 180 : static int vfs_io_uring_config_destructor(struct vfs_io_uring_config *config)
143 : {
144 180 : vfs_io_uring_config_destroy(config, -EUCLEAN, __location__);
145 180 : return 0;
146 : }
147 :
148 0 : static int vfs_io_uring_request_state_deny_destructor(void *_state)
149 : {
150 : struct __vfs_io_uring_generic_state {
151 : struct vfs_io_uring_request ur;
152 0 : } *state = (struct __vfs_io_uring_generic_state *)_state;
153 0 : struct vfs_io_uring_request *cur = &state->ur;
154 :
155 : /* our parent is gone */
156 0 : cur->req = NULL;
157 :
158 : /* remove ourself from any list */
159 0 : DLIST_REMOVE((*cur->list_head), cur);
160 0 : cur->list_head = NULL;
161 :
162 : /*
163 : * Our state is about to go away,
164 : * all we can do is shutting down the whole uring.
165 : * But that's ok as we're most likely called from exit_server()
166 : */
167 0 : vfs_io_uring_config_destroy(cur->config, -ESHUTDOWN, __location__);
168 0 : return 0;
169 : }
170 :
171 : static void vfs_io_uring_fd_handler(struct tevent_context *ev,
172 : struct tevent_fd *fde,
173 : uint16_t flags,
174 : void *private_data);
175 :
176 180 : static int vfs_io_uring_connect(vfs_handle_struct *handle, const char *service,
177 : const char *user)
178 : {
179 : int ret;
180 : struct vfs_io_uring_config *config;
181 : unsigned num_entries;
182 : bool sqpoll;
183 180 : unsigned flags = 0;
184 :
185 180 : config = talloc_zero(handle->conn, struct vfs_io_uring_config);
186 180 : if (config == NULL) {
187 0 : DEBUG(0, ("talloc_zero() failed\n"));
188 0 : return -1;
189 : }
190 :
191 180 : SMB_VFS_HANDLE_SET_DATA(handle, config,
192 : NULL, struct vfs_io_uring_config,
193 : return -1);
194 :
195 180 : ret = SMB_VFS_NEXT_CONNECT(handle, service, user);
196 180 : if (ret < 0) {
197 0 : return ret;
198 : }
199 :
200 180 : num_entries = lp_parm_ulong(SNUM(handle->conn),
201 : "io_uring",
202 : "num_entries",
203 : 128);
204 180 : num_entries = MAX(num_entries, 1);
205 :
206 180 : sqpoll = lp_parm_bool(SNUM(handle->conn),
207 : "io_uring",
208 : "sqpoll",
209 : false);
210 180 : if (sqpoll) {
211 0 : flags |= IORING_SETUP_SQPOLL;
212 : }
213 :
214 180 : ret = io_uring_queue_init(num_entries, &config->uring, flags);
215 180 : if (ret < 0) {
216 0 : SMB_VFS_NEXT_DISCONNECT(handle);
217 0 : errno = -ret;
218 0 : return -1;
219 : }
220 :
221 180 : talloc_set_destructor(config, vfs_io_uring_config_destructor);
222 :
223 : #ifdef HAVE_IO_URING_RING_DONTFORK
224 180 : ret = io_uring_ring_dontfork(&config->uring);
225 180 : if (ret < 0) {
226 0 : SMB_VFS_NEXT_DISCONNECT(handle);
227 0 : errno = -ret;
228 0 : return -1;
229 : }
230 : #endif /* HAVE_IO_URING_RING_DONTFORK */
231 :
232 180 : config->fde = tevent_add_fd(handle->conn->sconn->ev_ctx,
233 : config,
234 : config->uring.ring_fd,
235 : TEVENT_FD_READ,
236 : vfs_io_uring_fd_handler,
237 : handle);
238 180 : if (config->fde == NULL) {
239 0 : ret = errno;
240 0 : SMB_VFS_NEXT_DISCONNECT(handle);
241 0 : errno = ret;
242 0 : return -1;
243 : }
244 :
245 180 : return 0;
246 : }
247 :
248 33514 : static void _vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
249 : {
250 33514 : struct vfs_io_uring_request *cur = NULL, *next = NULL;
251 33514 : struct io_uring_cqe *cqe = NULL;
252 : unsigned cqhead;
253 33514 : unsigned nr = 0;
254 : struct timespec start_time;
255 : struct timespec end_time;
256 : int ret;
257 :
258 33514 : PROFILE_TIMESTAMP(&start_time);
259 :
260 33514 : if (config->uring.ring_fd == -1) {
261 0 : vfs_io_uring_config_destroy(config, -ESTALE, __location__);
262 0 : return;
263 : }
264 :
265 50376 : for (cur = config->queue; cur != NULL; cur = next) {
266 16862 : struct io_uring_sqe *sqe = NULL;
267 16862 : void *state = _tevent_req_data(cur->req);
268 :
269 16862 : next = cur->next;
270 :
271 16862 : sqe = io_uring_get_sqe(&config->uring);
272 16862 : if (sqe == NULL) {
273 0 : break;
274 : }
275 :
276 16862 : talloc_set_destructor(state,
277 : vfs_io_uring_request_state_deny_destructor);
278 16862 : DLIST_REMOVE(config->queue, cur);
279 16862 : *sqe = cur->sqe;
280 16862 : DLIST_ADD_END(config->pending, cur);
281 16862 : cur->list_head = &config->pending;
282 16862 : SMBPROFILE_BYTES_ASYNC_SET_BUSY(cur->profile_bytes);
283 :
284 16862 : cur->start_time = start_time;
285 : }
286 :
287 33514 : ret = io_uring_submit(&config->uring);
288 33514 : if (ret == -EAGAIN || ret == -EBUSY) {
289 : /* We just retry later */
290 33514 : } else if (ret < 0) {
291 0 : vfs_io_uring_config_destroy(config, ret, __location__);
292 0 : return;
293 : }
294 :
295 33514 : PROFILE_TIMESTAMP(&end_time);
296 :
297 50376 : io_uring_for_each_cqe(&config->uring, cqhead, cqe) {
298 16862 : cur = (struct vfs_io_uring_request *)io_uring_cqe_get_data(cqe);
299 16862 : vfs_io_uring_finish_req(cur, cqe, end_time, __location__);
300 16862 : nr++;
301 : }
302 :
303 33514 : io_uring_cq_advance(&config->uring, nr);
304 : }
305 :
306 : /*
307 : * Wrapper function to prevent recursion which could happen
308 : * if we called _vfs_io_uring_queue_run() directly without
309 : * recursion checks.
310 : *
311 : * Looking at the pread call, we can have:
312 : *
313 : * vfs_io_uring_pread_send()
314 : * ->vfs_io_uring_pread_submit() <-----------------------------------
315 : * ->vfs_io_uring_request_submit() |
316 : * ->vfs_io_uring_queue_run() |
317 : * ->_vfs_io_uring_queue_run() |
318 : * |
319 : * But inside _vfs_io_uring_queue_run() looks like: |
320 : * |
321 : * _vfs_io_uring_queue_run() { |
322 : * if (THIS_IO_COMPLETED) { |
323 : * ->vfs_io_uring_finish_req() |
324 : * ->cur->completion_fn() |
325 : * } |
326 : * } |
327 : * |
328 : * cur->completion_fn() for pread is set to vfs_io_uring_pread_completion() |
329 : * |
330 : * vfs_io_uring_pread_completion() { |
331 : * if (READ_TERMINATED) { |
332 : * -> tevent_req_done() - We're done, go back up the stack. |
333 : * return; |
334 : * } |
335 : * |
336 : * We have a short read - adjust the io vectors |
337 : * |
338 : * ->vfs_io_uring_pread_submit() ---------------------------------------
339 : * }
340 : *
341 : * So before calling _vfs_io_uring_queue_run() we backet it with setting
342 : * a flag config->busy, and unset it once _vfs_io_uring_queue_run() finally
343 : * exits the retry loop.
344 : *
345 : * If we end up back into vfs_io_uring_queue_run() we notice we've done so
346 : * as config->busy is set and don't recurse into _vfs_io_uring_queue_run().
347 : *
348 : * We set the second flag config->need_retry that tells us to loop in the
349 : * vfs_io_uring_queue_run() call above us in the stack and return.
350 : *
351 : * When the outer call to _vfs_io_uring_queue_run() returns we are in
352 : * a loop checking if config->need_retry was set. That happens if
353 : * the short read case occurs and _vfs_io_uring_queue_run() ended up
354 : * recursing into vfs_io_uring_queue_run().
355 : *
356 : * Once vfs_io_uring_pread_completion() finishes without a short
357 : * read (the READ_TERMINATED case, tevent_req_done() is called)
358 : * then config->need_retry is left as false, we exit the loop,
359 : * set config->busy to false so the next top level call into
360 : * vfs_io_uring_queue_run() won't think it's a recursed call
361 : * and return.
362 : *
363 : */
364 :
365 33514 : static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
366 : {
367 33514 : if (config->busy) {
368 : /*
369 : * We've recursed due to short read/write.
370 : * Set need_retry to ensure we retry the
371 : * io_uring_submit().
372 : */
373 0 : config->need_retry = true;
374 0 : return;
375 : }
376 :
377 : /*
378 : * Bracket the loop calling _vfs_io_uring_queue_run()
379 : * with busy = true / busy = false.
380 : * so we can detect recursion above.
381 : */
382 :
383 33514 : config->busy = true;
384 :
385 : do {
386 33514 : config->need_retry = false;
387 33514 : _vfs_io_uring_queue_run(config);
388 33514 : } while (config->need_retry);
389 :
390 33514 : config->busy = false;
391 : }
392 :
393 16862 : static void vfs_io_uring_request_submit(struct vfs_io_uring_request *cur)
394 : {
395 16862 : struct vfs_io_uring_config *config = cur->config;
396 :
397 16862 : io_uring_sqe_set_data(&cur->sqe, cur);
398 16862 : DLIST_ADD_END(config->queue, cur);
399 16862 : cur->list_head = &config->queue;
400 :
401 16862 : vfs_io_uring_queue_run(config);
402 16862 : }
403 :
404 16652 : static void vfs_io_uring_fd_handler(struct tevent_context *ev,
405 : struct tevent_fd *fde,
406 : uint16_t flags,
407 : void *private_data)
408 : {
409 16652 : vfs_handle_struct *handle = (vfs_handle_struct *)private_data;
410 16652 : struct vfs_io_uring_config *config = NULL;
411 :
412 16652 : SMB_VFS_HANDLE_GET_DATA(handle, config,
413 : struct vfs_io_uring_config,
414 : smb_panic(__location__));
415 :
416 16652 : vfs_io_uring_queue_run(config);
417 16652 : }
418 :
419 : struct vfs_io_uring_pread_state {
420 : struct files_struct *fsp;
421 : off_t offset;
422 : struct iovec iov;
423 : size_t nread;
424 : struct vfs_io_uring_request ur;
425 : };
426 :
427 : static void vfs_io_uring_pread_submit(struct vfs_io_uring_pread_state *state);
428 : static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur,
429 : const char *location);
430 :
431 208 : static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *handle,
432 : TALLOC_CTX *mem_ctx,
433 : struct tevent_context *ev,
434 : struct files_struct *fsp,
435 : void *data,
436 : size_t n, off_t offset)
437 : {
438 208 : struct tevent_req *req = NULL;
439 208 : struct vfs_io_uring_pread_state *state = NULL;
440 208 : struct vfs_io_uring_config *config = NULL;
441 : bool ok;
442 :
443 208 : SMB_VFS_HANDLE_GET_DATA(handle, config,
444 : struct vfs_io_uring_config,
445 : smb_panic(__location__));
446 :
447 208 : req = tevent_req_create(mem_ctx, &state,
448 : struct vfs_io_uring_pread_state);
449 208 : if (req == NULL) {
450 0 : return NULL;
451 : }
452 208 : state->ur.config = config;
453 208 : state->ur.req = req;
454 208 : state->ur.completion_fn = vfs_io_uring_pread_completion;
455 :
456 208 : SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pread, profile_p,
457 : state->ur.profile_bytes, n);
458 208 : SMBPROFILE_BYTES_ASYNC_SET_IDLE(state->ur.profile_bytes);
459 :
460 208 : ok = sys_valid_io_range(offset, n);
461 208 : if (!ok) {
462 0 : tevent_req_error(req, EINVAL);
463 0 : return tevent_req_post(req, ev);
464 : }
465 :
466 208 : state->fsp = fsp;
467 208 : state->offset = offset;
468 208 : state->iov.iov_base = (void *)data;
469 208 : state->iov.iov_len = n;
470 208 : vfs_io_uring_pread_submit(state);
471 :
472 208 : if (!tevent_req_is_in_progress(req)) {
473 206 : return tevent_req_post(req, ev);
474 : }
475 :
476 2 : tevent_req_defer_callback(req, ev);
477 2 : return req;
478 : }
479 :
480 208 : static void vfs_io_uring_pread_submit(struct vfs_io_uring_pread_state *state)
481 : {
482 208 : io_uring_prep_readv(&state->ur.sqe,
483 208 : fsp_get_io_fd(state->fsp),
484 208 : &state->iov, 1,
485 208 : state->offset);
486 208 : vfs_io_uring_request_submit(&state->ur);
487 208 : }
488 :
489 208 : static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur,
490 : const char *location)
491 : {
492 208 : struct vfs_io_uring_pread_state *state = tevent_req_data(
493 : cur->req, struct vfs_io_uring_pread_state);
494 208 : struct iovec *iov = &state->iov;
495 208 : int num_iov = 1;
496 : bool ok;
497 :
498 : /*
499 : * We rely on being inside the _send() function
500 : * or tevent_req_defer_callback() being called
501 : * already.
502 : */
503 :
504 208 : if (cur->cqe.res < 0) {
505 0 : int err = -cur->cqe.res;
506 0 : _tevent_req_error(cur->req, err, location);
507 208 : return;
508 : }
509 :
510 208 : if (cur->cqe.res == 0) {
511 : /*
512 : * We reached EOF, we're done
513 : */
514 4 : tevent_req_done(cur->req);
515 4 : return;
516 : }
517 :
518 204 : ok = iov_advance(&iov, &num_iov, cur->cqe.res);
519 204 : if (!ok) {
520 : /* This is not expected! */
521 0 : DBG_ERR("iov_advance() failed cur->cqe.res=%d > iov_len=%d\n",
522 : (int)cur->cqe.res,
523 : (int)state->iov.iov_len);
524 0 : tevent_req_error(cur->req, EIO);
525 0 : return;
526 : }
527 :
528 : /* sys_valid_io_range() already checked the boundaries */
529 204 : state->nread += state->ur.cqe.res;
530 204 : if (num_iov == 0) {
531 : /* We're done */
532 204 : tevent_req_done(cur->req);
533 204 : return;
534 : }
535 :
536 : /*
537 : * sys_valid_io_range() already checked the boundaries
538 : * now try to get the rest.
539 : */
540 0 : state->offset += state->ur.cqe.res;
541 0 : vfs_io_uring_pread_submit(state);
542 : }
543 :
544 208 : static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req,
545 : struct vfs_aio_state *vfs_aio_state)
546 : {
547 208 : struct vfs_io_uring_pread_state *state = tevent_req_data(
548 : req, struct vfs_io_uring_pread_state);
549 : ssize_t ret;
550 :
551 208 : SMBPROFILE_BYTES_ASYNC_END(state->ur.profile_bytes);
552 416 : vfs_aio_state->duration = nsec_time_diff(&state->ur.end_time,
553 208 : &state->ur.start_time);
554 :
555 208 : if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
556 0 : tevent_req_received(req);
557 0 : return -1;
558 : }
559 :
560 208 : vfs_aio_state->error = 0;
561 208 : ret = state->nread;
562 :
563 208 : tevent_req_received(req);
564 208 : return ret;
565 : }
566 :
567 : struct vfs_io_uring_pwrite_state {
568 : struct files_struct *fsp;
569 : off_t offset;
570 : struct iovec iov;
571 : size_t nwritten;
572 : struct vfs_io_uring_request ur;
573 : };
574 :
575 : static void vfs_io_uring_pwrite_submit(struct vfs_io_uring_pwrite_state *state);
576 : static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur,
577 : const char *location);
578 :
579 16652 : static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *handle,
580 : TALLOC_CTX *mem_ctx,
581 : struct tevent_context *ev,
582 : struct files_struct *fsp,
583 : const void *data,
584 : size_t n, off_t offset)
585 : {
586 16652 : struct tevent_req *req = NULL;
587 16652 : struct vfs_io_uring_pwrite_state *state = NULL;
588 16652 : struct vfs_io_uring_config *config = NULL;
589 : bool ok;
590 :
591 16652 : SMB_VFS_HANDLE_GET_DATA(handle, config,
592 : struct vfs_io_uring_config,
593 : smb_panic(__location__));
594 :
595 16652 : req = tevent_req_create(mem_ctx, &state,
596 : struct vfs_io_uring_pwrite_state);
597 16652 : if (req == NULL) {
598 0 : return NULL;
599 : }
600 16652 : state->ur.config = config;
601 16652 : state->ur.req = req;
602 16652 : state->ur.completion_fn = vfs_io_uring_pwrite_completion;
603 :
604 16652 : SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pwrite, profile_p,
605 : state->ur.profile_bytes, n);
606 16652 : SMBPROFILE_BYTES_ASYNC_SET_IDLE(state->ur.profile_bytes);
607 :
608 16652 : ok = sys_valid_io_range(offset, n);
609 16652 : if (!ok) {
610 0 : tevent_req_error(req, EINVAL);
611 0 : return tevent_req_post(req, ev);
612 : }
613 :
614 16652 : state->fsp = fsp;
615 16652 : state->offset = offset;
616 16652 : state->iov.iov_base = discard_const(data);
617 16652 : state->iov.iov_len = n;
618 16652 : vfs_io_uring_pwrite_submit(state);
619 :
620 16652 : if (!tevent_req_is_in_progress(req)) {
621 4 : return tevent_req_post(req, ev);
622 : }
623 :
624 16648 : tevent_req_defer_callback(req, ev);
625 16648 : return req;
626 : }
627 :
628 16652 : static void vfs_io_uring_pwrite_submit(struct vfs_io_uring_pwrite_state *state)
629 : {
630 16652 : io_uring_prep_writev(&state->ur.sqe,
631 16652 : fsp_get_io_fd(state->fsp),
632 16652 : &state->iov, 1,
633 16652 : state->offset);
634 16652 : vfs_io_uring_request_submit(&state->ur);
635 16652 : }
636 :
637 16652 : static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur,
638 : const char *location)
639 : {
640 16652 : struct vfs_io_uring_pwrite_state *state = tevent_req_data(
641 : cur->req, struct vfs_io_uring_pwrite_state);
642 16652 : struct iovec *iov = &state->iov;
643 16652 : int num_iov = 1;
644 : bool ok;
645 :
646 : /*
647 : * We rely on being inside the _send() function
648 : * or tevent_req_defer_callback() being called
649 : * already.
650 : */
651 :
652 16652 : if (cur->cqe.res < 0) {
653 0 : int err = -cur->cqe.res;
654 0 : _tevent_req_error(cur->req, err, location);
655 16652 : return;
656 : }
657 :
658 16652 : if (cur->cqe.res == 0) {
659 : /*
660 : * Ensure we can never spin.
661 : */
662 0 : tevent_req_error(cur->req, ENOSPC);
663 0 : return;
664 : }
665 :
666 16652 : ok = iov_advance(&iov, &num_iov, cur->cqe.res);
667 16652 : if (!ok) {
668 : /* This is not expected! */
669 0 : DBG_ERR("iov_advance() failed cur->cqe.res=%d > iov_len=%d\n",
670 : (int)cur->cqe.res,
671 : (int)state->iov.iov_len);
672 0 : tevent_req_error(cur->req, EIO);
673 0 : return;
674 : }
675 :
676 : /* sys_valid_io_range() already checked the boundaries */
677 16652 : state->nwritten += state->ur.cqe.res;
678 16652 : if (num_iov == 0) {
679 : /* We're done */
680 16652 : tevent_req_done(cur->req);
681 16652 : return;
682 : }
683 :
684 : /*
685 : * sys_valid_io_range() already checked the boundaries
686 : * now try to write the rest.
687 : */
688 0 : state->offset += state->ur.cqe.res;
689 0 : vfs_io_uring_pwrite_submit(state);
690 : }
691 :
692 16652 : static ssize_t vfs_io_uring_pwrite_recv(struct tevent_req *req,
693 : struct vfs_aio_state *vfs_aio_state)
694 : {
695 16652 : struct vfs_io_uring_pwrite_state *state = tevent_req_data(
696 : req, struct vfs_io_uring_pwrite_state);
697 : ssize_t ret;
698 :
699 16652 : SMBPROFILE_BYTES_ASYNC_END(state->ur.profile_bytes);
700 33304 : vfs_aio_state->duration = nsec_time_diff(&state->ur.end_time,
701 16652 : &state->ur.start_time);
702 :
703 16652 : if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
704 0 : tevent_req_received(req);
705 0 : return -1;
706 : }
707 :
708 16652 : vfs_aio_state->error = 0;
709 16652 : ret = state->nwritten;
710 :
711 16652 : tevent_req_received(req);
712 16652 : return ret;
713 : }
714 :
715 : struct vfs_io_uring_fsync_state {
716 : struct vfs_io_uring_request ur;
717 : };
718 :
719 : static void vfs_io_uring_fsync_completion(struct vfs_io_uring_request *cur,
720 : const char *location);
721 :
722 2 : static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *handle,
723 : TALLOC_CTX *mem_ctx,
724 : struct tevent_context *ev,
725 : struct files_struct *fsp)
726 : {
727 2 : struct tevent_req *req = NULL;
728 2 : struct vfs_io_uring_fsync_state *state = NULL;
729 2 : struct vfs_io_uring_config *config = NULL;
730 :
731 2 : SMB_VFS_HANDLE_GET_DATA(handle, config,
732 : struct vfs_io_uring_config,
733 : smb_panic(__location__));
734 :
735 2 : req = tevent_req_create(mem_ctx, &state,
736 : struct vfs_io_uring_fsync_state);
737 2 : if (req == NULL) {
738 0 : return NULL;
739 : }
740 2 : state->ur.config = config;
741 2 : state->ur.req = req;
742 2 : state->ur.completion_fn = vfs_io_uring_fsync_completion;
743 :
744 2 : SMBPROFILE_BYTES_ASYNC_START(syscall_asys_fsync, profile_p,
745 : state->ur.profile_bytes, 0);
746 2 : SMBPROFILE_BYTES_ASYNC_SET_IDLE(state->ur.profile_bytes);
747 :
748 2 : io_uring_prep_fsync(&state->ur.sqe,
749 : fsp_get_io_fd(fsp),
750 : 0); /* fsync_flags */
751 2 : vfs_io_uring_request_submit(&state->ur);
752 :
753 2 : if (!tevent_req_is_in_progress(req)) {
754 0 : return tevent_req_post(req, ev);
755 : }
756 :
757 2 : tevent_req_defer_callback(req, ev);
758 2 : return req;
759 : }
760 :
761 2 : static void vfs_io_uring_fsync_completion(struct vfs_io_uring_request *cur,
762 : const char *location)
763 : {
764 : /*
765 : * We rely on being inside the _send() function
766 : * or tevent_req_defer_callback() being called
767 : * already.
768 : */
769 :
770 2 : if (cur->cqe.res < 0) {
771 0 : int err = -cur->cqe.res;
772 0 : _tevent_req_error(cur->req, err, location);
773 0 : return;
774 : }
775 :
776 2 : if (cur->cqe.res > 0) {
777 : /* This is not expected! */
778 0 : DBG_ERR("got cur->cqe.res=%d\n", (int)cur->cqe.res);
779 0 : tevent_req_error(cur->req, EIO);
780 0 : return;
781 : }
782 :
783 2 : tevent_req_done(cur->req);
784 : }
785 :
786 2 : static int vfs_io_uring_fsync_recv(struct tevent_req *req,
787 : struct vfs_aio_state *vfs_aio_state)
788 : {
789 2 : struct vfs_io_uring_fsync_state *state = tevent_req_data(
790 : req, struct vfs_io_uring_fsync_state);
791 :
792 2 : SMBPROFILE_BYTES_ASYNC_END(state->ur.profile_bytes);
793 4 : vfs_aio_state->duration = nsec_time_diff(&state->ur.end_time,
794 2 : &state->ur.start_time);
795 :
796 2 : if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
797 0 : tevent_req_received(req);
798 0 : return -1;
799 : }
800 :
801 2 : vfs_aio_state->error = 0;
802 :
803 2 : tevent_req_received(req);
804 2 : return 0;
805 : }
806 :
807 : static struct vfs_fn_pointers vfs_io_uring_fns = {
808 : .connect_fn = vfs_io_uring_connect,
809 : .pread_send_fn = vfs_io_uring_pread_send,
810 : .pread_recv_fn = vfs_io_uring_pread_recv,
811 : .pwrite_send_fn = vfs_io_uring_pwrite_send,
812 : .pwrite_recv_fn = vfs_io_uring_pwrite_recv,
813 : .fsync_send_fn = vfs_io_uring_fsync_send,
814 : .fsync_recv_fn = vfs_io_uring_fsync_recv,
815 : };
816 :
817 : static_decl_vfs;
818 201 : NTSTATUS vfs_io_uring_init(TALLOC_CTX *ctx)
819 : {
820 201 : return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
821 : "io_uring", &vfs_io_uring_fns);
822 : }
|