Line data Source code
1 : /*
2 : * Simulate Posix AIO using pthreads.
3 : *
4 : * Based on the aio_fork work from Volker and Volker's pthreadpool library.
5 : *
6 : * Copyright (C) Volker Lendecke 2008
7 : * Copyright (C) Jeremy Allison 2012
8 : *
9 : * This program is free software; you can redistribute it and/or modify
10 : * it under the terms of the GNU General Public License as published by
11 : * the Free Software Foundation; either version 3 of the License, or
12 : * (at your option) any later version.
13 : *
14 : * This program is distributed in the hope that it will be useful,
15 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : * GNU General Public License for more details.
18 : *
19 : * You should have received a copy of the GNU General Public License
20 : * along with this program; if not, write to the Free Software
21 : * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 : */
23 :
24 : #include "includes.h"
25 : #include "system/filesys.h"
26 : #include "system/shmem.h"
27 : #include "smbd/smbd.h"
28 : #include "smbd/globals.h"
29 : #include "../lib/pthreadpool/pthreadpool_tevent.h"
30 : #ifdef HAVE_LINUX_FALLOC_H
31 : #include <linux/falloc.h>
32 : #endif
33 :
34 : #if defined(HAVE_OPENAT) && defined(HAVE_LINUX_THREAD_CREDENTIALS)
35 :
36 : /*
37 : * We must have openat() to do any thread-based
38 : * asynchronous opens. We also must be using
39 : * thread-specific credentials (Linux-only
40 : * for now).
41 : */
42 :
43 : struct aio_open_private_data {
44 : struct aio_open_private_data *prev, *next;
45 : /* Inputs. */
46 : int dir_fd;
47 : bool opened_dir_fd;
48 : int flags;
49 : mode_t mode;
50 : uint64_t mid;
51 : bool in_progress;
52 : struct smb_filename *fsp_name;
53 : struct smb_filename *smb_fname;
54 : connection_struct *conn;
55 : struct smbXsrv_connection *xconn;
56 : const struct security_unix_token *ux_tok;
57 : uint64_t initial_allocation_size;
58 : /* Returns. */
59 : int ret_fd;
60 : int ret_errno;
61 : };
62 :
63 : /* List of outstanding requests we have. */
64 : static struct aio_open_private_data *open_pd_list;
65 :
66 : static void aio_open_do(struct aio_open_private_data *opd);
67 : static void opd_free(struct aio_open_private_data *opd);
68 :
69 : /************************************************************************
70 : Find the open private data by mid.
71 : ***********************************************************************/
72 :
73 0 : static struct aio_open_private_data *find_open_private_data_by_mid(uint64_t mid)
74 : {
75 : struct aio_open_private_data *opd;
76 :
77 0 : for (opd = open_pd_list; opd != NULL; opd = opd->next) {
78 0 : if (opd->mid == mid) {
79 0 : return opd;
80 : }
81 : }
82 :
83 0 : return NULL;
84 : }
85 :
86 : /************************************************************************
87 : Callback when an open completes.
88 : ***********************************************************************/
89 :
90 0 : static void aio_open_handle_completion(struct tevent_req *subreq)
91 : {
92 : struct aio_open_private_data *opd =
93 0 : tevent_req_callback_data(subreq,
94 : struct aio_open_private_data);
95 : int ret;
96 :
97 0 : ret = pthreadpool_tevent_job_recv(subreq);
98 0 : TALLOC_FREE(subreq);
99 :
100 : /*
101 : * We're no longer in flight. Remove the
102 : * destructor used to preserve opd so
103 : * a talloc_free actually removes it.
104 : */
105 0 : talloc_set_destructor(opd, NULL);
106 :
107 0 : if (opd->conn == NULL) {
108 : /*
109 : * We were shutdown closed in flight. No one
110 : * wants the result, and state has been reparented
111 : * to the NULL context, so just free it so we
112 : * don't leak memory.
113 : */
114 0 : DBG_NOTICE("aio open request for %s abandoned in flight\n",
115 : opd->fsp_name->base_name);
116 0 : if (opd->ret_fd != -1) {
117 0 : close(opd->ret_fd);
118 0 : opd->ret_fd = -1;
119 : }
120 : /*
121 : * Find outstanding event and reschedule so the client
122 : * gets an error message return from the open.
123 : */
124 0 : schedule_deferred_open_message_smb(opd->xconn, opd->mid);
125 0 : opd_free(opd);
126 0 : return;
127 : }
128 :
129 0 : if (ret != 0) {
130 : bool ok;
131 :
132 0 : if (ret != EAGAIN) {
133 0 : smb_panic("aio_open_handle_completion");
134 : /* notreached. */
135 : return;
136 : }
137 : /*
138 : * Make sure we run as the user again
139 : */
140 0 : ok = change_to_user_and_service(opd->conn, opd->conn->vuid);
141 0 : if (!ok) {
142 0 : smb_panic("Can't change to user");
143 : return;
144 : }
145 : /*
146 : * If we get EAGAIN from pthreadpool_tevent_job_recv() this
147 : * means the lower level pthreadpool failed to create a new
148 : * thread. Fallback to sync processing in that case to allow
149 : * some progress for the client.
150 : */
151 0 : aio_open_do(opd);
152 : }
153 :
154 0 : DEBUG(10,("aio_open_handle_completion: mid %llu "
155 : "for file %s completed\n",
156 : (unsigned long long)opd->mid,
157 : opd->fsp_name->base_name));
158 :
159 0 : opd->in_progress = false;
160 :
161 : /* Find outstanding event and reschedule. */
162 0 : if (!schedule_deferred_open_message_smb(opd->xconn, opd->mid)) {
163 : /*
164 : * Outstanding event didn't exist or was
165 : * cancelled. Free up the fd and throw
166 : * away the result.
167 : */
168 0 : if (opd->ret_fd != -1) {
169 0 : close(opd->ret_fd);
170 0 : opd->ret_fd = -1;
171 : }
172 0 : opd_free(opd);
173 : }
174 : }
175 :
176 : /*****************************************************************
177 : The core of the async open code - the worker function. Note we
178 : use the new openat() system call to avoid any problems with
179 : current working directory changes plus we change credentials
180 : on the thread to prevent any security race conditions.
181 : *****************************************************************/
182 :
183 0 : static void aio_open_worker(void *private_data)
184 : {
185 0 : struct aio_open_private_data *opd =
186 : (struct aio_open_private_data *)private_data;
187 :
188 : /* Become the correct credential on this thread. */
189 0 : if (set_thread_credentials(opd->ux_tok->uid,
190 0 : opd->ux_tok->gid,
191 0 : (size_t)opd->ux_tok->ngroups,
192 0 : opd->ux_tok->groups) != 0) {
193 0 : opd->ret_fd = -1;
194 0 : opd->ret_errno = errno;
195 0 : return;
196 : }
197 :
198 0 : aio_open_do(opd);
199 : }
200 :
201 0 : static void aio_open_do(struct aio_open_private_data *opd)
202 : {
203 0 : opd->ret_fd = openat(opd->dir_fd,
204 0 : opd->smb_fname->base_name,
205 : opd->flags,
206 : opd->mode);
207 :
208 0 : if (opd->ret_fd == -1) {
209 0 : opd->ret_errno = errno;
210 : } else {
211 : /* Create was successful. */
212 0 : opd->ret_errno = 0;
213 :
214 : #if defined(HAVE_LINUX_FALLOCATE)
215 : /*
216 : * See if we can set the initial
217 : * allocation size. We don't record
218 : * the return for this as it's an
219 : * optimization - the upper layer
220 : * will also do this for us once
221 : * the open returns.
222 : */
223 0 : if (opd->initial_allocation_size) {
224 0 : (void)fallocate(opd->ret_fd,
225 : FALLOC_FL_KEEP_SIZE,
226 : 0,
227 0 : (off_t)opd->initial_allocation_size);
228 : }
229 : #endif
230 : }
231 0 : }
232 :
233 : /************************************************************************
234 : Open private data teardown.
235 : ***********************************************************************/
236 :
237 0 : static void opd_free(struct aio_open_private_data *opd)
238 : {
239 0 : if (opd->opened_dir_fd && opd->dir_fd != -1) {
240 0 : close(opd->dir_fd);
241 : }
242 0 : DLIST_REMOVE(open_pd_list, opd);
243 0 : TALLOC_FREE(opd);
244 0 : }
245 :
246 : /************************************************************************
247 : Create and initialize a private data struct for async open.
248 : ***********************************************************************/
249 :
250 0 : static struct aio_open_private_data *create_private_open_data(
251 : TALLOC_CTX *ctx,
252 : const struct files_struct *dirfsp,
253 : const struct smb_filename *smb_fname,
254 : const files_struct *fsp,
255 : int flags,
256 : mode_t mode)
257 : {
258 0 : struct aio_open_private_data *opd = talloc_zero(ctx,
259 : struct aio_open_private_data);
260 :
261 0 : if (!opd) {
262 0 : return NULL;
263 : }
264 :
265 0 : *opd = (struct aio_open_private_data) {
266 : .dir_fd = -1,
267 : .ret_fd = -1,
268 : .ret_errno = EINPROGRESS,
269 : .flags = flags,
270 : .mode = mode,
271 0 : .mid = fsp->mid,
272 : .in_progress = true,
273 0 : .conn = fsp->conn,
274 : /*
275 : * TODO: In future we need a proper algorithm
276 : * to find the correct connection for a fsp.
277 : * For now we only have one connection, so this is correct...
278 : */
279 0 : .xconn = fsp->conn->sconn->client->connections,
280 0 : .initial_allocation_size = fsp->initial_allocation_size,
281 : };
282 :
283 : /* Copy our current credentials. */
284 0 : opd->ux_tok = copy_unix_token(opd, get_current_utok(fsp->conn));
285 0 : if (opd->ux_tok == NULL) {
286 0 : opd_free(opd);
287 0 : return NULL;
288 : }
289 :
290 : /*
291 : * Copy the full fsp_name and smb_fname which is the basename.
292 : */
293 0 : opd->smb_fname = cp_smb_filename(opd, smb_fname);
294 0 : if (opd->smb_fname == NULL) {
295 0 : opd_free(opd);
296 0 : return NULL;
297 : }
298 :
299 0 : opd->fsp_name = cp_smb_filename(opd, fsp->fsp_name);
300 0 : if (opd->fsp_name == NULL) {
301 0 : opd_free(opd);
302 0 : return NULL;
303 : }
304 :
305 0 : if (fsp_get_pathref_fd(dirfsp) != AT_FDCWD) {
306 0 : opd->dir_fd = fsp_get_pathref_fd(dirfsp);
307 : } else {
308 : #if defined(O_DIRECTORY)
309 0 : opd->dir_fd = open(".", O_RDONLY|O_DIRECTORY);
310 : #else
311 : opd->dir_fd = open(".", O_RDONLY);
312 : #endif
313 0 : opd->opened_dir_fd = true;
314 : }
315 0 : if (opd->dir_fd == -1) {
316 0 : opd_free(opd);
317 0 : return NULL;
318 : }
319 :
320 0 : DLIST_ADD_END(open_pd_list, opd);
321 0 : return opd;
322 : }
323 :
324 0 : static int opd_inflight_destructor(struct aio_open_private_data *opd)
325 : {
326 : /*
327 : * Setting conn to NULL allows us to
328 : * discover the connection was torn
329 : * down which kills the fsp that owns
330 : * opd.
331 : */
332 0 : DBG_NOTICE("aio open request for %s cancelled\n",
333 : opd->fsp_name->base_name);
334 0 : opd->conn = NULL;
335 : /* Don't let opd go away. */
336 0 : return -1;
337 : }
338 :
339 : /*****************************************************************
340 : Setup an async open.
341 : *****************************************************************/
342 :
343 0 : static int open_async(const struct files_struct *dirfsp,
344 : const struct smb_filename *smb_fname,
345 : const files_struct *fsp,
346 : int flags,
347 : mode_t mode)
348 : {
349 0 : struct aio_open_private_data *opd = NULL;
350 0 : struct tevent_req *subreq = NULL;
351 :
352 : /*
353 : * Allocate off fsp->conn, not NULL or fsp. As we're going
354 : * async fsp will get talloc_free'd when we return
355 : * EINPROGRESS/NT_STATUS_MORE_PROCESSING_REQUIRED. A new fsp
356 : * pointer gets allocated on every re-run of the
357 : * open code path. Allocating on fsp->conn instead
358 : * of NULL allows use to get notified via destructor
359 : * if the conn is force-closed or we shutdown.
360 : * opd is always safely freed in all codepath so no
361 : * memory leaks.
362 : */
363 0 : opd = create_private_open_data(fsp->conn,
364 : dirfsp,
365 : smb_fname,
366 : fsp,
367 : flags,
368 : mode);
369 0 : if (opd == NULL) {
370 0 : DEBUG(10, ("open_async: Could not create private data.\n"));
371 0 : return -1;
372 : }
373 :
374 0 : subreq = pthreadpool_tevent_job_send(opd,
375 0 : fsp->conn->sconn->ev_ctx,
376 0 : fsp->conn->sconn->pool,
377 : aio_open_worker, opd);
378 0 : if (subreq == NULL) {
379 0 : opd_free(opd);
380 0 : return -1;
381 : }
382 0 : tevent_req_set_callback(subreq, aio_open_handle_completion, opd);
383 :
384 0 : DEBUG(5,("open_async: mid %llu created for file %s\n",
385 : (unsigned long long)opd->mid,
386 : opd->fsp_name->base_name));
387 :
388 : /*
389 : * Add a destructor to protect us from connection
390 : * teardown whilst the open thread is in flight.
391 : */
392 0 : talloc_set_destructor(opd, opd_inflight_destructor);
393 :
394 : /* Cause the calling code to reschedule us. */
395 0 : errno = EINPROGRESS; /* Maps to NT_STATUS_MORE_PROCESSING_REQUIRED. */
396 0 : return -1;
397 : }
398 :
399 : /*****************************************************************
400 : Look for a matching SMB2 mid. If we find it we're rescheduled,
401 : just return the completed open.
402 : *****************************************************************/
403 :
404 0 : static bool find_completed_open(files_struct *fsp,
405 : int *p_fd,
406 : int *p_errno)
407 : {
408 : struct aio_open_private_data *opd;
409 :
410 0 : opd = find_open_private_data_by_mid(fsp->mid);
411 0 : if (!opd) {
412 0 : return false;
413 : }
414 :
415 0 : if (opd->in_progress) {
416 0 : DEBUG(0,("find_completed_open: mid %llu "
417 : "still in progress for "
418 : "file %s. PANIC !\n",
419 : (unsigned long long)opd->mid,
420 : opd->fsp_name->base_name));
421 : /* Disaster ! This is an open timeout. Just panic. */
422 0 : smb_panic("find_completed_open - in_progress\n");
423 : /* notreached. */
424 : return false;
425 : }
426 :
427 0 : *p_fd = opd->ret_fd;
428 0 : *p_errno = opd->ret_errno;
429 :
430 0 : DEBUG(5,("find_completed_open: mid %llu returning "
431 : "fd = %d, errno = %d (%s) "
432 : "for file %s\n",
433 : (unsigned long long)opd->mid,
434 : opd->ret_fd,
435 : opd->ret_errno,
436 : strerror(opd->ret_errno),
437 : smb_fname_str_dbg(fsp->fsp_name)));
438 :
439 : /* Now we can free the opd. */
440 0 : opd_free(opd);
441 0 : return true;
442 : }
443 :
444 : /*****************************************************************
445 : The core open function. Only go async on O_CREAT|O_EXCL
446 : opens to prevent any race conditions.
447 : *****************************************************************/
448 :
449 212976 : static int aio_pthread_openat_fn(vfs_handle_struct *handle,
450 : const struct files_struct *dirfsp,
451 : const struct smb_filename *smb_fname,
452 : struct files_struct *fsp,
453 : const struct vfs_open_how *how)
454 : {
455 212976 : int my_errno = 0;
456 212976 : int fd = -1;
457 212976 : bool aio_allow_open = lp_parm_bool(
458 212976 : SNUM(handle->conn), "aio_pthread", "aio open", false);
459 :
460 212976 : if (how->resolve != 0) {
461 21770 : errno = ENOSYS;
462 21770 : return -1;
463 : }
464 :
465 191206 : if (is_named_stream(smb_fname)) {
466 : /* Don't handle stream opens. */
467 0 : errno = ENOENT;
468 0 : return -1;
469 : }
470 :
471 191206 : if (fsp->conn->sconn->pool == NULL) {
472 : /*
473 : * a threadpool is required for async support
474 : */
475 0 : aio_allow_open = false;
476 : }
477 :
478 191206 : if (fsp->conn->sconn->client != NULL &&
479 191206 : fsp->conn->sconn->client->server_multi_channel_enabled) {
480 : /*
481 : * This module is not compatible with multi channel yet.
482 : */
483 191206 : aio_allow_open = false;
484 : }
485 :
486 191206 : if (fsp->fsp_flags.is_pathref) {
487 : /* Use SMB_VFS_NEXT_OPENAT() to call openat() with O_PATH. */
488 104010 : aio_allow_open = false;
489 : }
490 :
491 191206 : if (!(how->flags & O_CREAT)) {
492 : /* Only creates matter. */
493 171190 : aio_allow_open = false;
494 : }
495 :
496 191206 : if (!(how->flags & O_EXCL)) {
497 : /* Only creates with O_EXCL matter. */
498 171190 : aio_allow_open = false;
499 : }
500 :
501 191206 : if (!aio_allow_open) {
502 : /* aio opens turned off. */
503 191206 : return SMB_VFS_NEXT_OPENAT(handle,
504 : dirfsp,
505 : smb_fname,
506 : fsp,
507 : how);
508 : }
509 :
510 : /*
511 : * See if this is a reentrant call - i.e. is this a
512 : * restart of an existing open that just completed.
513 : */
514 :
515 0 : if (find_completed_open(fsp,
516 : &fd,
517 : &my_errno)) {
518 0 : errno = my_errno;
519 0 : return fd;
520 : }
521 :
522 : /* Ok, it's a create exclusive call - pass it to a thread helper. */
523 0 : return open_async(dirfsp, smb_fname, fsp, how->flags, how->mode);
524 : }
525 : #endif
526 :
527 : static struct vfs_fn_pointers vfs_aio_pthread_fns = {
528 : #if defined(HAVE_OPENAT) && defined(HAVE_LINUX_THREAD_CREDENTIALS)
529 : .openat_fn = aio_pthread_openat_fn,
530 : #endif
531 : };
532 :
533 : static_decl_vfs;
534 55 : NTSTATUS vfs_aio_pthread_init(TALLOC_CTX *ctx)
535 : {
536 55 : return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
537 : "aio_pthread", &vfs_aio_pthread_fns);
538 : }
|