Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 :
4 : trivial database library
5 :
6 : Copyright (C) Volker Lendecke 2012,2013
7 : Copyright (C) Stefan Metzmacher 2013,2014
8 : Copyright (C) Michael Adam 2014
9 :
10 : ** NOTE! The following LGPL license applies to the tdb
11 : ** library. This does NOT imply that all of Samba is released
12 : ** under the LGPL
13 :
14 : This library is free software; you can redistribute it and/or
15 : modify it under the terms of the GNU Lesser General Public
16 : License as published by the Free Software Foundation; either
17 : version 3 of the License, or (at your option) any later version.
18 :
19 : This library is distributed in the hope that it will be useful,
20 : but WITHOUT ANY WARRANTY; without even the implied warranty of
21 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 : Lesser General Public License for more details.
23 :
24 : You should have received a copy of the GNU Lesser General Public
25 : License along with this library; if not, see <http://www.gnu.org/licenses/>.
26 : */
27 : #include "tdb_private.h"
28 : #include "system/threads.h"
29 :
30 : #ifdef USE_TDB_MUTEX_LOCKING
31 :
32 : /*
33 : * If we run with mutexes, we store the "struct tdb_mutexes" at the
34 : * beginning of the file. We store an additional tdb_header right
35 : * beyond the mutex area, page aligned. All the offsets within the tdb
36 : * are relative to the area behind the mutex area. tdb->map_ptr points
37 : * behind the mmap area as well, so the read and write path in the
38 : * mutex case can remain unchanged.
39 : *
40 : * Early in the mutex development the mutexes were placed between the hash
41 : * chain pointers and the real tdb data. This had two drawbacks: First, it
42 : * made pointer calculations more complex. Second, we had to mmap the mutex
43 : * area twice. One was the normal map_ptr in the tdb. This frequently changed
44 : * from within tdb_oob. At least the Linux glibc robust mutex code assumes
45 : * constant pointers in memory, so a constantly changing mmap area destroys
46 : * the mutex list. So we had to mmap the first bytes of the file with a second
47 : * mmap call. With that scheme, very weird errors happened that could be
48 : * easily fixed by doing the mutex mmap in a second file. It seemed that
49 : * mapping the same memory area twice does not end up in accessing the same
50 : * physical page, looking at the mutexes in gdb it seemed that old data showed
51 : * up after some re-mapping. To avoid a separate mutex file, the code now puts
52 : * the real content of the tdb file after the mutex area. This way we do not
53 : * have overlapping mmap areas, the mutex area is mmapped once and not
54 : * changed, the tdb data area's mmap is constantly changed but does not
55 : * overlap.
56 : */
57 :
58 : struct tdb_mutexes {
59 : struct tdb_header hdr;
60 :
61 : /* protect allrecord_lock */
62 : pthread_mutex_t allrecord_mutex;
63 :
64 : /*
65 : * F_UNLCK: free,
66 : * F_RDLCK: shared,
67 : * F_WRLCK: exclusive
68 : */
69 : short int allrecord_lock;
70 :
71 : /*
72 : * Index 0 is the freelist mutex, followed by
73 : * one mutex per hashchain.
74 : */
75 : pthread_mutex_t hashchains[1];
76 : };
77 :
78 984115810 : bool tdb_have_mutexes(struct tdb_context *tdb)
79 : {
80 984115810 : return ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) != 0);
81 : }
82 :
83 8106667 : size_t tdb_mutex_size(struct tdb_context *tdb)
84 : {
85 222290 : size_t mutex_size;
86 :
87 7902130 : if (!tdb_have_mutexes(tdb)) {
88 6485488 : return 0;
89 : }
90 :
91 1436952 : mutex_size = sizeof(struct tdb_mutexes);
92 1436952 : mutex_size += tdb->hash_size * sizeof(pthread_mutex_t);
93 :
94 1416642 : return TDB_ALIGN(mutex_size, tdb->page_size);
95 : }
96 :
97 : /*
98 : * Get the index for a chain mutex
99 : */
100 756311673 : static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len,
101 : unsigned *idx)
102 : {
103 : /*
104 : * Weird but true: We fcntl lock 1 byte at an offset 4 bytes before
105 : * the 4 bytes of the freelist start and the hash chain that is about
106 : * to be locked. See lock_offset() where the freelist is -1 vs the
107 : * "+1" in TDB_HASH_TOP(). Because the mutex array is represented in
108 : * the tdb file itself as data, we need to adjust the offset here.
109 : */
110 756311673 : const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t);
111 :
112 756311673 : if (!tdb_have_mutexes(tdb)) {
113 521412941 : return false;
114 : }
115 218955628 : if (len != 1) {
116 : /* Possibly the allrecord lock */
117 309964 : return false;
118 : }
119 218637222 : if (off < freelist_lock_ofs) {
120 : /* One of the special locks */
121 35756743 : return false;
122 : }
123 181569500 : if (tdb->hash_size == 0) {
124 : /* tdb not initialized yet, called from tdb_open_ex() */
125 0 : return false;
126 : }
127 181569500 : if (off >= TDB_DATA_START(tdb->hash_size)) {
128 : /* Single record lock from traverses */
129 313680 : return false;
130 : }
131 :
132 : /*
133 : * Now we know it's a freelist or hash chain lock. Those are always 4
134 : * byte aligned. Paranoia check.
135 : */
136 181252940 : if ((off % sizeof(tdb_off_t)) != 0) {
137 0 : abort();
138 : }
139 :
140 : /*
141 : * Re-index the fcntl offset into an offset into the mutex array
142 : */
143 181252940 : off -= freelist_lock_ofs; /* rebase to index 0 */
144 181252940 : off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */
145 :
146 181252940 : *idx = off;
147 181252940 : return true;
148 : }
149 :
150 56459047 : static bool tdb_have_mutex_chainlocks(struct tdb_context *tdb)
151 : {
152 1306453 : int i;
153 :
154 92412754 : for (i=0; i < tdb->num_lockrecs; i++) {
155 1281384 : bool ret;
156 1281384 : unsigned idx;
157 :
158 37235091 : ret = tdb_mutex_index(tdb,
159 35953707 : tdb->lockrecs[i].off,
160 35953707 : tdb->lockrecs[i].count,
161 : &idx);
162 35953707 : if (!ret) {
163 35953707 : continue;
164 : }
165 :
166 0 : if (idx == 0) {
167 : /* this is the freelist mutex */
168 0 : continue;
169 : }
170 :
171 0 : return true;
172 : }
173 :
174 55152594 : return false;
175 : }
176 :
177 91628129 : static int chain_mutex_lock(pthread_mutex_t *m, bool waitflag)
178 : {
179 1581151 : int ret;
180 :
181 91628129 : if (waitflag) {
182 91247696 : ret = pthread_mutex_lock(m);
183 : } else {
184 380433 : ret = pthread_mutex_trylock(m);
185 : }
186 91628129 : if (ret != EOWNERDEAD) {
187 90046977 : return ret;
188 : }
189 :
190 : /*
191 : * For chainlocks, we don't do any cleanup (yet?)
192 : */
193 2 : return pthread_mutex_consistent(m);
194 : }
195 :
196 18 : static int allrecord_mutex_lock(struct tdb_mutexes *m, bool waitflag)
197 : {
198 0 : int ret;
199 :
200 18 : if (waitflag) {
201 13 : ret = pthread_mutex_lock(&m->allrecord_mutex);
202 : } else {
203 5 : ret = pthread_mutex_trylock(&m->allrecord_mutex);
204 : }
205 18 : if (ret != EOWNERDEAD) {
206 17 : return ret;
207 : }
208 :
209 : /*
210 : * The allrecord lock holder died. We need to reset the allrecord_lock
211 : * to F_UNLCK. This should also be the indication for
212 : * tdb_needs_recovery.
213 : */
214 1 : m->allrecord_lock = F_UNLCK;
215 :
216 1 : return pthread_mutex_consistent(&m->allrecord_mutex);
217 : }
218 :
219 415677364 : bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len,
220 : bool waitflag, int *pret)
221 : {
222 415677364 : struct tdb_mutexes *m = tdb->mutexes;
223 10855065 : pthread_mutex_t *chain;
224 10855065 : int ret;
225 10855065 : unsigned idx;
226 10855065 : bool allrecord_ok;
227 :
228 415677364 : if (!tdb_mutex_index(tdb, off, len, &idx)) {
229 315775614 : return false;
230 : }
231 90627836 : chain = &m->hashchains[idx];
232 :
233 90627839 : again:
234 90627839 : ret = chain_mutex_lock(chain, waitflag);
235 90627839 : if (ret == EBUSY) {
236 2723 : ret = EAGAIN;
237 : }
238 90627835 : if (ret != 0) {
239 2727 : errno = ret;
240 2727 : goto fail;
241 : }
242 :
243 90625112 : if (idx == 0) {
244 : /*
245 : * This is a freelist lock, which is independent to
246 : * the allrecord lock. So we're done once we got the
247 : * freelist mutex.
248 : */
249 34166065 : *pret = 0;
250 34166065 : return true;
251 : }
252 :
253 56459047 : if (tdb_have_mutex_chainlocks(tdb)) {
254 : /*
255 : * We can only check the allrecord lock once. If we do it with
256 : * one chain mutex locked, we will deadlock with the allrecord
257 : * locker process in the following way: We lock the first hash
258 : * chain, we check for the allrecord lock. We keep the hash
259 : * chain locked. Then the allrecord locker locks the
260 : * allrecord_mutex. It walks the list of chain mutexes,
261 : * locking them all in sequence. Meanwhile, we have the chain
262 : * mutex locked, so the allrecord locker blocks trying to lock
263 : * our chain mutex. Then we come in and try to lock the second
264 : * chain lock, which in most cases will be the freelist. We
265 : * see that the allrecord lock is locked and put ourselves on
266 : * the allrecord_mutex. This will never be signalled though
267 : * because the allrecord locker waits for us to give up the
268 : * chain lock.
269 : */
270 :
271 0 : *pret = 0;
272 0 : return true;
273 : }
274 :
275 : /*
276 : * Check if someone is has the allrecord lock: queue if so.
277 : */
278 :
279 56459047 : allrecord_ok = false;
280 :
281 56459047 : if (m->allrecord_lock == F_UNLCK) {
282 : /*
283 : * allrecord lock not taken
284 : */
285 56459035 : allrecord_ok = true;
286 : }
287 :
288 56459047 : if ((m->allrecord_lock == F_RDLCK) && (rw == F_RDLCK)) {
289 : /*
290 : * allrecord shared lock taken, but we only want to read
291 : */
292 6 : allrecord_ok = true;
293 : }
294 :
295 56459047 : if (allrecord_ok) {
296 56459041 : *pret = 0;
297 56459041 : return true;
298 : }
299 :
300 6 : ret = pthread_mutex_unlock(chain);
301 6 : if (ret != 0) {
302 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
303 : "(chain_mutex) failed: %s\n", strerror(ret)));
304 0 : errno = ret;
305 0 : goto fail;
306 : }
307 6 : ret = allrecord_mutex_lock(m, waitflag);
308 6 : if (ret == EBUSY) {
309 3 : ret = EAGAIN;
310 : }
311 6 : if (ret != 0) {
312 3 : if (waitflag || (ret != EAGAIN)) {
313 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_%slock"
314 : "(allrecord_mutex) failed: %s\n",
315 : waitflag ? "" : "try_", strerror(ret)));
316 : }
317 3 : errno = ret;
318 3 : goto fail;
319 : }
320 3 : ret = pthread_mutex_unlock(&m->allrecord_mutex);
321 3 : if (ret != 0) {
322 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
323 : "(allrecord_mutex) failed: %s\n", strerror(ret)));
324 0 : errno = ret;
325 0 : goto fail;
326 : }
327 3 : goto again;
328 :
329 2730 : fail:
330 2730 : *pret = -1;
331 2730 : return true;
332 : }
333 :
334 304680602 : bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len,
335 : int *pret)
336 : {
337 304680602 : struct tdb_mutexes *m = tdb->mutexes;
338 8291253 : pthread_mutex_t *chain;
339 8291253 : int ret;
340 8291253 : unsigned idx;
341 :
342 304680602 : if (!tdb_mutex_index(tdb, off, len, &idx)) {
343 207345391 : return false;
344 : }
345 90625104 : chain = &m->hashchains[idx];
346 :
347 90625104 : ret = pthread_mutex_unlock(chain);
348 90625104 : if (ret == 0) {
349 90625104 : *pret = 0;
350 90625104 : return true;
351 : }
352 0 : errno = ret;
353 0 : *pret = -1;
354 0 : return true;
355 : }
356 :
357 13 : int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
358 : enum tdb_lock_flags flags)
359 : {
360 13 : struct tdb_mutexes *m = tdb->mutexes;
361 0 : int ret;
362 0 : uint32_t i;
363 13 : bool waitflag = (flags & TDB_LOCK_WAIT);
364 0 : int saved_errno;
365 :
366 13 : if (tdb->flags & TDB_NOLOCK) {
367 0 : return 0;
368 : }
369 :
370 13 : if (flags & TDB_LOCK_MARK_ONLY) {
371 1 : return 0;
372 : }
373 :
374 12 : ret = allrecord_mutex_lock(m, waitflag);
375 12 : if (!waitflag && (ret == EBUSY)) {
376 1 : errno = EAGAIN;
377 1 : tdb->ecode = TDB_ERR_LOCK;
378 1 : return -1;
379 : }
380 11 : if (ret != 0) {
381 0 : if (!(flags & TDB_LOCK_PROBE)) {
382 0 : TDB_LOG((tdb, TDB_DEBUG_TRACE,
383 : "allrecord_mutex_lock() failed: %s\n",
384 : strerror(ret)));
385 : }
386 0 : tdb->ecode = TDB_ERR_LOCK;
387 0 : return -1;
388 : }
389 :
390 11 : if (m->allrecord_lock != F_UNLCK) {
391 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
392 : (int)m->allrecord_lock));
393 0 : goto fail_unlock_allrecord_mutex;
394 : }
395 11 : m->allrecord_lock = (ltype == F_RDLCK) ? F_RDLCK : F_WRLCK;
396 :
397 1000294 : for (i=0; i<tdb->hash_size; i++) {
398 :
399 : /* ignore hashchains[0], the freelist */
400 1000284 : pthread_mutex_t *chain = &m->hashchains[i+1];
401 :
402 1000284 : ret = chain_mutex_lock(chain, waitflag);
403 1000284 : if (!waitflag && (ret == EBUSY)) {
404 1 : errno = EAGAIN;
405 1 : goto fail_unroll_allrecord_lock;
406 : }
407 1000283 : if (ret != 0) {
408 0 : if (!(flags & TDB_LOCK_PROBE)) {
409 0 : TDB_LOG((tdb, TDB_DEBUG_TRACE,
410 : "chain_mutex_lock() failed: %s\n",
411 : strerror(ret)));
412 : }
413 0 : errno = ret;
414 0 : goto fail_unroll_allrecord_lock;
415 : }
416 :
417 1000283 : ret = pthread_mutex_unlock(chain);
418 1000283 : if (ret != 0) {
419 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
420 : "(chainlock) failed: %s\n", strerror(ret)));
421 0 : errno = ret;
422 0 : goto fail_unroll_allrecord_lock;
423 : }
424 : }
425 : /*
426 : * We leave this routine with m->allrecord_mutex locked
427 : */
428 10 : return 0;
429 :
430 1 : fail_unroll_allrecord_lock:
431 1 : m->allrecord_lock = F_UNLCK;
432 :
433 1 : fail_unlock_allrecord_mutex:
434 1 : saved_errno = errno;
435 1 : ret = pthread_mutex_unlock(&m->allrecord_mutex);
436 1 : if (ret != 0) {
437 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
438 : "(allrecord_mutex) failed: %s\n", strerror(ret)));
439 : }
440 1 : errno = saved_errno;
441 1 : tdb->ecode = TDB_ERR_LOCK;
442 1 : return -1;
443 : }
444 :
445 2 : int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
446 : {
447 2 : struct tdb_mutexes *m = tdb->mutexes;
448 0 : int ret;
449 0 : uint32_t i;
450 :
451 2 : if (tdb->flags & TDB_NOLOCK) {
452 0 : return 0;
453 : }
454 :
455 : /*
456 : * Our only caller tdb_allrecord_upgrade()
457 : * guarantees that we already own the allrecord lock.
458 : *
459 : * Which means m->allrecord_mutex is still locked by us.
460 : */
461 :
462 2 : if (m->allrecord_lock != F_RDLCK) {
463 0 : tdb->ecode = TDB_ERR_LOCK;
464 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
465 : (int)m->allrecord_lock));
466 0 : return -1;
467 : }
468 :
469 2 : m->allrecord_lock = F_WRLCK;
470 :
471 8 : for (i=0; i<tdb->hash_size; i++) {
472 :
473 : /* ignore hashchains[0], the freelist */
474 6 : pthread_mutex_t *chain = &m->hashchains[i+1];
475 :
476 6 : ret = chain_mutex_lock(chain, true);
477 6 : if (ret != 0) {
478 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_lock"
479 : "(chainlock) failed: %s\n", strerror(ret)));
480 0 : goto fail_unroll_allrecord_lock;
481 : }
482 :
483 6 : ret = pthread_mutex_unlock(chain);
484 6 : if (ret != 0) {
485 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
486 : "(chainlock) failed: %s\n", strerror(ret)));
487 0 : goto fail_unroll_allrecord_lock;
488 : }
489 : }
490 :
491 2 : return 0;
492 :
493 0 : fail_unroll_allrecord_lock:
494 0 : m->allrecord_lock = F_RDLCK;
495 0 : tdb->ecode = TDB_ERR_LOCK;
496 0 : return -1;
497 : }
498 :
499 0 : void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
500 : {
501 0 : struct tdb_mutexes *m = tdb->mutexes;
502 :
503 : /*
504 : * Our only caller tdb_allrecord_upgrade() (in the error case)
505 : * guarantees that we already own the allrecord lock.
506 : *
507 : * Which means m->allrecord_mutex is still locked by us.
508 : */
509 :
510 0 : if (m->allrecord_lock != F_WRLCK) {
511 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
512 : (int)m->allrecord_lock));
513 0 : return;
514 : }
515 :
516 0 : m->allrecord_lock = F_RDLCK;
517 0 : return;
518 : }
519 :
520 :
521 7 : int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
522 : {
523 7 : struct tdb_mutexes *m = tdb->mutexes;
524 0 : short old;
525 0 : int ret;
526 :
527 7 : if (tdb->flags & TDB_NOLOCK) {
528 0 : return 0;
529 : }
530 :
531 : /*
532 : * Our only callers tdb_allrecord_unlock() and
533 : * tdb_allrecord_lock() (in the error path)
534 : * guarantee that we already own the allrecord lock.
535 : *
536 : * Which means m->allrecord_mutex is still locked by us.
537 : */
538 :
539 7 : if ((m->allrecord_lock != F_RDLCK) && (m->allrecord_lock != F_WRLCK)) {
540 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
541 : (int)m->allrecord_lock));
542 0 : return -1;
543 : }
544 :
545 7 : old = m->allrecord_lock;
546 7 : m->allrecord_lock = F_UNLCK;
547 :
548 7 : ret = pthread_mutex_unlock(&m->allrecord_mutex);
549 7 : if (ret != 0) {
550 0 : m->allrecord_lock = old;
551 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
552 : "(allrecord_mutex) failed: %s\n", strerror(ret)));
553 0 : return -1;
554 : }
555 7 : return 0;
556 : }
557 :
558 343939 : int tdb_mutex_init(struct tdb_context *tdb)
559 : {
560 9304 : struct tdb_mutexes *m;
561 9304 : pthread_mutexattr_t ma;
562 9304 : uint32_t i;
563 9304 : int ret;
564 :
565 343939 : ret = tdb_mutex_mmap(tdb);
566 343939 : if (ret == -1) {
567 0 : return -1;
568 : }
569 343939 : m = tdb->mutexes;
570 :
571 343939 : ret = pthread_mutexattr_init(&ma);
572 343939 : if (ret != 0) {
573 0 : goto fail_munmap;
574 : }
575 343939 : ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
576 343939 : if (ret != 0) {
577 0 : goto fail;
578 : }
579 343939 : ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
580 343939 : if (ret != 0) {
581 0 : goto fail;
582 : }
583 343939 : ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
584 343939 : if (ret != 0) {
585 0 : goto fail;
586 : }
587 :
588 2195568396 : for (i=0; i<tdb->hash_size+1; i++) {
589 2195224457 : pthread_mutex_t *chain = &m->hashchains[i];
590 :
591 2195224457 : ret = pthread_mutex_init(chain, &ma);
592 2195224457 : if (ret != 0) {
593 0 : goto fail;
594 : }
595 : }
596 :
597 343939 : m->allrecord_lock = F_UNLCK;
598 :
599 343939 : ret = pthread_mutex_init(&m->allrecord_mutex, &ma);
600 343939 : if (ret != 0) {
601 0 : goto fail;
602 : }
603 334635 : ret = 0;
604 343939 : fail:
605 343939 : pthread_mutexattr_destroy(&ma);
606 343939 : fail_munmap:
607 :
608 343939 : if (ret == 0) {
609 334635 : return 0;
610 : }
611 :
612 0 : tdb_mutex_munmap(tdb);
613 :
614 0 : errno = ret;
615 0 : return -1;
616 : }
617 :
618 700644 : int tdb_mutex_mmap(struct tdb_context *tdb)
619 : {
620 18608 : size_t len;
621 18608 : void *ptr;
622 :
623 700644 : len = tdb_mutex_size(tdb);
624 700644 : if (len == 0) {
625 0 : return 0;
626 : }
627 :
628 700644 : if (tdb->mutexes != NULL) {
629 334635 : return 0;
630 : }
631 :
632 356705 : ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE,
633 : tdb->fd, 0);
634 356705 : if (ptr == MAP_FAILED) {
635 0 : return -1;
636 : }
637 356705 : tdb->mutexes = (struct tdb_mutexes *)ptr;
638 :
639 356705 : return 0;
640 : }
641 :
642 6733157 : int tdb_mutex_munmap(struct tdb_context *tdb)
643 : {
644 185929 : size_t len;
645 185929 : int ret;
646 :
647 6733157 : len = tdb_mutex_size(tdb);
648 6548930 : if (len == 0) {
649 6485488 : return 0;
650 : }
651 :
652 63442 : ret = munmap(tdb->mutexes, len);
653 63442 : if (ret == -1) {
654 0 : return -1;
655 : }
656 63442 : tdb->mutexes = NULL;
657 :
658 63442 : return 0;
659 : }
660 :
661 : static bool tdb_mutex_locking_cached;
662 :
663 40642 : static bool tdb_mutex_locking_supported(void)
664 : {
665 880 : pthread_mutexattr_t ma;
666 880 : pthread_mutex_t m;
667 880 : int ret;
668 880 : static bool initialized;
669 :
670 40642 : if (initialized) {
671 0 : return tdb_mutex_locking_cached;
672 : }
673 :
674 40642 : initialized = true;
675 :
676 40642 : ret = pthread_mutexattr_init(&ma);
677 40642 : if (ret != 0) {
678 0 : return false;
679 : }
680 40642 : ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
681 40642 : if (ret != 0) {
682 0 : goto cleanup_ma;
683 : }
684 40642 : ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
685 40642 : if (ret != 0) {
686 0 : goto cleanup_ma;
687 : }
688 40642 : ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
689 40642 : if (ret != 0) {
690 0 : goto cleanup_ma;
691 : }
692 40642 : ret = pthread_mutex_init(&m, &ma);
693 40642 : if (ret != 0) {
694 0 : goto cleanup_ma;
695 : }
696 40642 : ret = pthread_mutex_lock(&m);
697 40642 : if (ret != 0) {
698 0 : goto cleanup_m;
699 : }
700 : /*
701 : * This makes sure we have real mutexes
702 : * from a threading library instead of just
703 : * stubs from libc.
704 : */
705 40642 : ret = pthread_mutex_lock(&m);
706 40642 : if (ret != EDEADLK) {
707 0 : goto cleanup_lock;
708 : }
709 40642 : ret = pthread_mutex_unlock(&m);
710 40642 : if (ret != 0) {
711 0 : goto cleanup_m;
712 : }
713 :
714 40642 : tdb_mutex_locking_cached = true;
715 40642 : goto cleanup_m;
716 :
717 0 : cleanup_lock:
718 0 : pthread_mutex_unlock(&m);
719 40642 : cleanup_m:
720 40642 : pthread_mutex_destroy(&m);
721 40642 : cleanup_ma:
722 40642 : pthread_mutexattr_destroy(&ma);
723 40642 : return tdb_mutex_locking_cached;
724 : }
725 :
726 : static void (*tdb_robust_mutext_old_handler)(int) = SIG_ERR;
727 : static pid_t tdb_robust_mutex_pid = -1;
728 :
729 40642 : static bool tdb_robust_mutex_setup_sigchild(void (*handler)(int),
730 : void (**p_old_handler)(int))
731 : {
732 : #ifdef HAVE_SIGACTION
733 880 : struct sigaction act;
734 880 : struct sigaction oldact;
735 :
736 40642 : memset(&act, '\0', sizeof(act));
737 :
738 40642 : act.sa_handler = handler;
739 : #ifdef SA_RESTART
740 40642 : act.sa_flags = SA_RESTART;
741 : #endif
742 40642 : sigemptyset(&act.sa_mask);
743 40642 : sigaddset(&act.sa_mask, SIGCHLD);
744 40642 : sigaction(SIGCHLD, &act, &oldact);
745 40642 : if (p_old_handler) {
746 40642 : *p_old_handler = oldact.sa_handler;
747 : }
748 40642 : return true;
749 : #else /* !HAVE_SIGACTION */
750 : return false;
751 : #endif
752 : }
753 :
754 42331 : static void tdb_robust_mutex_handler(int sig)
755 : {
756 42331 : pid_t child_pid = tdb_robust_mutex_pid;
757 :
758 42331 : if (child_pid != -1) {
759 878 : pid_t pid;
760 :
761 40640 : pid = waitpid(child_pid, NULL, WNOHANG);
762 40640 : if (pid == -1) {
763 19028 : switch (errno) {
764 19028 : case ECHILD:
765 19028 : tdb_robust_mutex_pid = -1;
766 19028 : return;
767 :
768 0 : default:
769 0 : return;
770 : }
771 : }
772 21612 : if (pid == child_pid) {
773 21612 : tdb_robust_mutex_pid = -1;
774 21612 : return;
775 : }
776 : }
777 :
778 1691 : if (tdb_robust_mutext_old_handler == SIG_DFL) {
779 1660 : return;
780 : }
781 12 : if (tdb_robust_mutext_old_handler == SIG_IGN) {
782 0 : return;
783 : }
784 12 : if (tdb_robust_mutext_old_handler == SIG_ERR) {
785 0 : return;
786 : }
787 :
788 12 : tdb_robust_mutext_old_handler(sig);
789 : }
790 :
791 81284 : static void tdb_robust_mutex_wait_for_child(pid_t *child_pid)
792 : {
793 81284 : int options = WNOHANG;
794 :
795 81284 : if (*child_pid == -1) {
796 39762 : return;
797 : }
798 :
799 59655 : while (tdb_robust_mutex_pid > 0) {
800 1741 : pid_t pid;
801 :
802 : /*
803 : * First we try with WNOHANG, as the process might not exist
804 : * anymore. Once we've sent SIGKILL we block waiting for the
805 : * exit.
806 : */
807 38043 : pid = waitpid(*child_pid, NULL, options);
808 38043 : if (pid == -1) {
809 0 : if (errno == EINTR) {
810 0 : continue;
811 0 : } else if (errno == ECHILD) {
812 0 : break;
813 : } else {
814 0 : abort();
815 : }
816 : }
817 38043 : if (pid == *child_pid) {
818 18159 : break;
819 : }
820 :
821 19013 : kill(*child_pid, SIGKILL);
822 19013 : options = 0;
823 : }
824 :
825 40642 : tdb_robust_mutex_pid = -1;
826 40642 : *child_pid = -1;
827 : }
828 :
829 966659 : _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
830 : {
831 966659 : void *ptr = NULL;
832 966659 : pthread_mutex_t *m = NULL;
833 25378 : pthread_mutexattr_t ma;
834 966659 : int ret = 1;
835 966659 : int pipe_down[2] = { -1, -1 };
836 966659 : int pipe_up[2] = { -1, -1 };
837 25378 : ssize_t nread;
838 966659 : char c = 0;
839 25378 : bool ok;
840 25378 : static bool initialized;
841 966659 : pid_t saved_child_pid = -1;
842 966659 : bool cleanup_ma = false;
843 :
844 966659 : if (initialized) {
845 926017 : return tdb_mutex_locking_cached;
846 : }
847 :
848 40642 : initialized = true;
849 :
850 40642 : ok = tdb_mutex_locking_supported();
851 40642 : if (!ok) {
852 0 : return false;
853 : }
854 :
855 40642 : tdb_mutex_locking_cached = false;
856 :
857 40642 : ptr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE,
858 : MAP_SHARED|MAP_ANON, -1 /* fd */, 0);
859 40642 : if (ptr == MAP_FAILED) {
860 0 : return false;
861 : }
862 :
863 40642 : ret = pipe(pipe_down);
864 40642 : if (ret != 0) {
865 0 : goto cleanup;
866 : }
867 40642 : ret = pipe(pipe_up);
868 40642 : if (ret != 0) {
869 0 : goto cleanup;
870 : }
871 :
872 40642 : ret = pthread_mutexattr_init(&ma);
873 40642 : if (ret != 0) {
874 0 : goto cleanup;
875 : }
876 40642 : cleanup_ma = true;
877 40642 : ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
878 40642 : if (ret != 0) {
879 0 : goto cleanup;
880 : }
881 40642 : ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
882 40642 : if (ret != 0) {
883 0 : goto cleanup;
884 : }
885 40642 : ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
886 40642 : if (ret != 0) {
887 0 : goto cleanup;
888 : }
889 40642 : ret = pthread_mutex_init(ptr, &ma);
890 40642 : if (ret != 0) {
891 0 : goto cleanup;
892 : }
893 40642 : m = (pthread_mutex_t *)ptr;
894 :
895 40642 : if (tdb_robust_mutex_setup_sigchild(tdb_robust_mutex_handler,
896 39762 : &tdb_robust_mutext_old_handler) == false) {
897 0 : goto cleanup;
898 : }
899 :
900 40642 : tdb_robust_mutex_pid = fork();
901 40642 : saved_child_pid = tdb_robust_mutex_pid;
902 40642 : if (tdb_robust_mutex_pid == 0) {
903 0 : size_t nwritten;
904 0 : close(pipe_down[1]);
905 0 : close(pipe_up[0]);
906 0 : ret = pthread_mutex_lock(m);
907 0 : nwritten = write(pipe_up[1], &ret, sizeof(ret));
908 0 : if (nwritten != sizeof(ret)) {
909 0 : _exit(1);
910 : }
911 0 : if (ret != 0) {
912 0 : _exit(1);
913 : }
914 0 : nread = read(pipe_down[0], &c, 1);
915 0 : if (nread != 1) {
916 0 : _exit(1);
917 : }
918 : /* leave locked */
919 0 : _exit(0);
920 : }
921 40642 : if (tdb_robust_mutex_pid == -1) {
922 0 : goto cleanup;
923 : }
924 40642 : close(pipe_down[0]);
925 40642 : pipe_down[0] = -1;
926 40642 : close(pipe_up[1]);
927 40642 : pipe_up[1] = -1;
928 :
929 40642 : nread = read(pipe_up[0], &ret, sizeof(ret));
930 40642 : if (nread != sizeof(ret)) {
931 0 : goto cleanup;
932 : }
933 :
934 40642 : ret = pthread_mutex_trylock(m);
935 40642 : if (ret != EBUSY) {
936 0 : if (ret == 0) {
937 0 : pthread_mutex_unlock(m);
938 : }
939 0 : goto cleanup;
940 : }
941 :
942 40642 : if (write(pipe_down[1], &c, 1) != 1) {
943 0 : goto cleanup;
944 : }
945 :
946 40642 : nread = read(pipe_up[0], &c, 1);
947 40642 : if (nread != 0) {
948 0 : goto cleanup;
949 : }
950 :
951 40642 : tdb_robust_mutex_wait_for_child(&saved_child_pid);
952 :
953 40642 : ret = pthread_mutex_trylock(m);
954 40642 : if (ret != EOWNERDEAD) {
955 0 : if (ret == 0) {
956 0 : pthread_mutex_unlock(m);
957 : }
958 0 : goto cleanup;
959 : }
960 :
961 40642 : ret = pthread_mutex_consistent(m);
962 40642 : if (ret != 0) {
963 0 : goto cleanup;
964 : }
965 :
966 40642 : ret = pthread_mutex_trylock(m);
967 40642 : if (ret != EDEADLK && ret != EBUSY) {
968 0 : pthread_mutex_unlock(m);
969 0 : goto cleanup;
970 : }
971 :
972 40642 : ret = pthread_mutex_unlock(m);
973 40642 : if (ret != 0) {
974 0 : goto cleanup;
975 : }
976 :
977 40642 : tdb_mutex_locking_cached = true;
978 :
979 40642 : cleanup:
980 : /*
981 : * Note that we don't reset the signal handler we just reset
982 : * tdb_robust_mutex_pid to -1. This is ok as this code path is only
983 : * called once per process.
984 : *
985 : * Leaving our signal handler avoids races with other threads potentially
986 : * setting up their SIGCHLD handlers.
987 : *
988 : * The worst thing that can happen is that the other newer signal
989 : * handler will get the SIGCHLD signal for our child and/or reap the
990 : * child with a wait() function. tdb_robust_mutex_wait_for_child()
991 : * handles the case where waitpid returns ECHILD.
992 : */
993 40642 : tdb_robust_mutex_wait_for_child(&saved_child_pid);
994 :
995 40642 : if (m != NULL) {
996 40642 : pthread_mutex_destroy(m);
997 : }
998 40642 : if (cleanup_ma) {
999 40642 : pthread_mutexattr_destroy(&ma);
1000 : }
1001 40642 : if (pipe_down[0] != -1) {
1002 0 : close(pipe_down[0]);
1003 : }
1004 40642 : if (pipe_down[1] != -1) {
1005 40642 : close(pipe_down[1]);
1006 : }
1007 40642 : if (pipe_up[0] != -1) {
1008 40642 : close(pipe_up[0]);
1009 : }
1010 40642 : if (pipe_up[1] != -1) {
1011 0 : close(pipe_up[1]);
1012 : }
1013 40642 : if (ptr != NULL) {
1014 40642 : munmap(ptr, sizeof(pthread_mutex_t));
1015 : }
1016 :
1017 40642 : return tdb_mutex_locking_cached;
1018 : }
1019 :
1020 : #else
1021 :
1022 : size_t tdb_mutex_size(struct tdb_context *tdb)
1023 : {
1024 : return 0;
1025 : }
1026 :
1027 : bool tdb_have_mutexes(struct tdb_context *tdb)
1028 : {
1029 : return false;
1030 : }
1031 :
1032 : int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
1033 : enum tdb_lock_flags flags)
1034 : {
1035 : tdb->ecode = TDB_ERR_LOCK;
1036 : return -1;
1037 : }
1038 :
1039 : int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
1040 : {
1041 : return -1;
1042 : }
1043 :
1044 : int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
1045 : {
1046 : tdb->ecode = TDB_ERR_LOCK;
1047 : return -1;
1048 : }
1049 :
1050 : void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
1051 : {
1052 : return;
1053 : }
1054 :
1055 : int tdb_mutex_mmap(struct tdb_context *tdb)
1056 : {
1057 : errno = ENOSYS;
1058 : return -1;
1059 : }
1060 :
1061 : int tdb_mutex_munmap(struct tdb_context *tdb)
1062 : {
1063 : errno = ENOSYS;
1064 : return -1;
1065 : }
1066 :
1067 : int tdb_mutex_init(struct tdb_context *tdb)
1068 : {
1069 : errno = ENOSYS;
1070 : return -1;
1071 : }
1072 :
1073 : _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
1074 : {
1075 : return false;
1076 : }
1077 :
1078 : #endif
|