LCOV - code coverage report
Current view: top level - lib/tdb/common - mutex.c (source / functions) Hit Total Coverage
Test: coverage report for master 2f515e9b Lines: 364 501 72.7 %
Date: 2024-04-21 15:09:00 Functions: 19 20 95.0 %

          Line data    Source code
       1             : /*
       2             :    Unix SMB/CIFS implementation.
       3             : 
       4             :    trivial database library
       5             : 
       6             :    Copyright (C) Volker Lendecke 2012,2013
       7             :    Copyright (C) Stefan Metzmacher 2013,2014
       8             :    Copyright (C) Michael Adam 2014
       9             : 
      10             :      ** NOTE! The following LGPL license applies to the tdb
      11             :      ** library. This does NOT imply that all of Samba is released
      12             :      ** under the LGPL
      13             : 
      14             :    This library is free software; you can redistribute it and/or
      15             :    modify it under the terms of the GNU Lesser General Public
      16             :    License as published by the Free Software Foundation; either
      17             :    version 3 of the License, or (at your option) any later version.
      18             : 
      19             :    This library is distributed in the hope that it will be useful,
      20             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      21             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      22             :    Lesser General Public License for more details.
      23             : 
      24             :    You should have received a copy of the GNU Lesser General Public
      25             :    License along with this library; if not, see <http://www.gnu.org/licenses/>.
      26             : */
      27             : #include "tdb_private.h"
      28             : #include "system/threads.h"
      29             : 
      30             : #ifdef USE_TDB_MUTEX_LOCKING
      31             : 
      32             : /*
      33             :  * If we run with mutexes, we store the "struct tdb_mutexes" at the
      34             :  * beginning of the file. We store an additional tdb_header right
      35             :  * beyond the mutex area, page aligned. All the offsets within the tdb
      36             :  * are relative to the area behind the mutex area. tdb->map_ptr points
      37             :  * behind the mmap area as well, so the read and write path in the
      38             :  * mutex case can remain unchanged.
      39             :  *
      40             :  * Early in the mutex development the mutexes were placed between the hash
      41             :  * chain pointers and the real tdb data. This had two drawbacks: First, it
      42             :  * made pointer calculations more complex. Second, we had to mmap the mutex
      43             :  * area twice. One was the normal map_ptr in the tdb. This frequently changed
      44             :  * from within tdb_oob. At least the Linux glibc robust mutex code assumes
      45             :  * constant pointers in memory, so a constantly changing mmap area destroys
      46             :  * the mutex list. So we had to mmap the first bytes of the file with a second
      47             :  * mmap call. With that scheme, very weird errors happened that could be
      48             :  * easily fixed by doing the mutex mmap in a second file. It seemed that
      49             :  * mapping the same memory area twice does not end up in accessing the same
      50             :  * physical page, looking at the mutexes in gdb it seemed that old data showed
      51             :  * up after some re-mapping. To avoid a separate mutex file, the code now puts
      52             :  * the real content of the tdb file after the mutex area. This way we do not
      53             :  * have overlapping mmap areas, the mutex area is mmapped once and not
      54             :  * changed, the tdb data area's mmap is constantly changed but does not
      55             :  * overlap.
      56             :  */
      57             : 
      58             : struct tdb_mutexes {
      59             :         struct tdb_header hdr;
      60             : 
      61             :         /* protect allrecord_lock */
      62             :         pthread_mutex_t allrecord_mutex;
      63             : 
      64             :         /*
      65             :          * F_UNLCK: free,
      66             :          * F_RDLCK: shared,
      67             :          * F_WRLCK: exclusive
      68             :          */
      69             :         short int allrecord_lock;
      70             : 
      71             :         /*
      72             :          * Index 0 is the freelist mutex, followed by
      73             :          * one mutex per hashchain.
      74             :          */
      75             :         pthread_mutex_t hashchains[1];
      76             : };
      77             : 
      78   984115810 : bool tdb_have_mutexes(struct tdb_context *tdb)
      79             : {
      80   984115810 :         return ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) != 0);
      81             : }
      82             : 
      83     8106667 : size_t tdb_mutex_size(struct tdb_context *tdb)
      84             : {
      85      222290 :         size_t mutex_size;
      86             : 
      87     7902130 :         if (!tdb_have_mutexes(tdb)) {
      88     6485488 :                 return 0;
      89             :         }
      90             : 
      91     1436952 :         mutex_size = sizeof(struct tdb_mutexes);
      92     1436952 :         mutex_size += tdb->hash_size * sizeof(pthread_mutex_t);
      93             : 
      94     1416642 :         return TDB_ALIGN(mutex_size, tdb->page_size);
      95             : }
      96             : 
      97             : /*
      98             :  * Get the index for a chain mutex
      99             :  */
     100   756311673 : static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len,
     101             :                             unsigned *idx)
     102             : {
     103             :         /*
     104             :          * Weird but true: We fcntl lock 1 byte at an offset 4 bytes before
     105             :          * the 4 bytes of the freelist start and the hash chain that is about
     106             :          * to be locked. See lock_offset() where the freelist is -1 vs the
     107             :          * "+1" in TDB_HASH_TOP(). Because the mutex array is represented in
     108             :          * the tdb file itself as data, we need to adjust the offset here.
     109             :          */
     110   756311673 :         const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t);
     111             : 
     112   756311673 :         if (!tdb_have_mutexes(tdb)) {
     113   521412941 :                 return false;
     114             :         }
     115   218955628 :         if (len != 1) {
     116             :                 /* Possibly the allrecord lock */
     117      309964 :                 return false;
     118             :         }
     119   218637222 :         if (off < freelist_lock_ofs) {
     120             :                 /* One of the special locks */
     121    35756743 :                 return false;
     122             :         }
     123   181569500 :         if (tdb->hash_size == 0) {
     124             :                 /* tdb not initialized yet, called from tdb_open_ex() */
     125           0 :                 return false;
     126             :         }
     127   181569500 :         if (off >= TDB_DATA_START(tdb->hash_size)) {
     128             :                 /* Single record lock from traverses */
     129      313680 :                 return false;
     130             :         }
     131             : 
     132             :         /*
     133             :          * Now we know it's a freelist or hash chain lock. Those are always 4
     134             :          * byte aligned. Paranoia check.
     135             :          */
     136   181252940 :         if ((off % sizeof(tdb_off_t)) != 0) {
     137           0 :                 abort();
     138             :         }
     139             : 
     140             :         /*
     141             :          * Re-index the fcntl offset into an offset into the mutex array
     142             :          */
     143   181252940 :         off -= freelist_lock_ofs; /* rebase to index 0 */
     144   181252940 :         off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */
     145             : 
     146   181252940 :         *idx = off;
     147   181252940 :         return true;
     148             : }
     149             : 
     150    56459047 : static bool tdb_have_mutex_chainlocks(struct tdb_context *tdb)
     151             : {
     152     1306453 :         int i;
     153             : 
     154    92412754 :         for (i=0; i < tdb->num_lockrecs; i++) {
     155     1281384 :                 bool ret;
     156     1281384 :                 unsigned idx;
     157             : 
     158    37235091 :                 ret = tdb_mutex_index(tdb,
     159    35953707 :                                       tdb->lockrecs[i].off,
     160    35953707 :                                       tdb->lockrecs[i].count,
     161             :                                       &idx);
     162    35953707 :                 if (!ret) {
     163    35953707 :                         continue;
     164             :                 }
     165             : 
     166           0 :                 if (idx == 0) {
     167             :                         /* this is the freelist mutex */
     168           0 :                         continue;
     169             :                 }
     170             : 
     171           0 :                 return true;
     172             :         }
     173             : 
     174    55152594 :         return false;
     175             : }
     176             : 
     177    91628129 : static int chain_mutex_lock(pthread_mutex_t *m, bool waitflag)
     178             : {
     179     1581151 :         int ret;
     180             : 
     181    91628129 :         if (waitflag) {
     182    91247696 :                 ret = pthread_mutex_lock(m);
     183             :         } else {
     184      380433 :                 ret = pthread_mutex_trylock(m);
     185             :         }
     186    91628129 :         if (ret != EOWNERDEAD) {
     187    90046977 :                 return ret;
     188             :         }
     189             : 
     190             :         /*
     191             :          * For chainlocks, we don't do any cleanup (yet?)
     192             :          */
     193           2 :         return pthread_mutex_consistent(m);
     194             : }
     195             : 
     196          18 : static int allrecord_mutex_lock(struct tdb_mutexes *m, bool waitflag)
     197             : {
     198           0 :         int ret;
     199             : 
     200          18 :         if (waitflag) {
     201          13 :                 ret = pthread_mutex_lock(&m->allrecord_mutex);
     202             :         } else {
     203           5 :                 ret = pthread_mutex_trylock(&m->allrecord_mutex);
     204             :         }
     205          18 :         if (ret != EOWNERDEAD) {
     206          17 :                 return ret;
     207             :         }
     208             : 
     209             :         /*
     210             :          * The allrecord lock holder died. We need to reset the allrecord_lock
     211             :          * to F_UNLCK. This should also be the indication for
     212             :          * tdb_needs_recovery.
     213             :          */
     214           1 :         m->allrecord_lock = F_UNLCK;
     215             : 
     216           1 :         return pthread_mutex_consistent(&m->allrecord_mutex);
     217             : }
     218             : 
     219   415677364 : bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len,
     220             :                     bool waitflag, int *pret)
     221             : {
     222   415677364 :         struct tdb_mutexes *m = tdb->mutexes;
     223    10855065 :         pthread_mutex_t *chain;
     224    10855065 :         int ret;
     225    10855065 :         unsigned idx;
     226    10855065 :         bool allrecord_ok;
     227             : 
     228   415677364 :         if (!tdb_mutex_index(tdb, off, len, &idx)) {
     229   315775614 :                 return false;
     230             :         }
     231    90627836 :         chain = &m->hashchains[idx];
     232             : 
     233    90627839 : again:
     234    90627839 :         ret = chain_mutex_lock(chain, waitflag);
     235    90627839 :         if (ret == EBUSY) {
     236        2723 :                 ret = EAGAIN;
     237             :         }
     238    90627835 :         if (ret != 0) {
     239        2727 :                 errno = ret;
     240        2727 :                 goto fail;
     241             :         }
     242             : 
     243    90625112 :         if (idx == 0) {
     244             :                 /*
     245             :                  * This is a freelist lock, which is independent to
     246             :                  * the allrecord lock. So we're done once we got the
     247             :                  * freelist mutex.
     248             :                  */
     249    34166065 :                 *pret = 0;
     250    34166065 :                 return true;
     251             :         }
     252             : 
     253    56459047 :         if (tdb_have_mutex_chainlocks(tdb)) {
     254             :                 /*
     255             :                  * We can only check the allrecord lock once. If we do it with
     256             :                  * one chain mutex locked, we will deadlock with the allrecord
     257             :                  * locker process in the following way: We lock the first hash
     258             :                  * chain, we check for the allrecord lock. We keep the hash
     259             :                  * chain locked. Then the allrecord locker locks the
     260             :                  * allrecord_mutex. It walks the list of chain mutexes,
     261             :                  * locking them all in sequence. Meanwhile, we have the chain
     262             :                  * mutex locked, so the allrecord locker blocks trying to lock
     263             :                  * our chain mutex. Then we come in and try to lock the second
     264             :                  * chain lock, which in most cases will be the freelist. We
     265             :                  * see that the allrecord lock is locked and put ourselves on
     266             :                  * the allrecord_mutex. This will never be signalled though
     267             :                  * because the allrecord locker waits for us to give up the
     268             :                  * chain lock.
     269             :                  */
     270             : 
     271           0 :                 *pret = 0;
     272           0 :                 return true;
     273             :         }
     274             : 
     275             :         /*
     276             :          * Check if someone is has the allrecord lock: queue if so.
     277             :          */
     278             : 
     279    56459047 :         allrecord_ok = false;
     280             : 
     281    56459047 :         if (m->allrecord_lock == F_UNLCK) {
     282             :                 /*
     283             :                  * allrecord lock not taken
     284             :                  */
     285    56459035 :                 allrecord_ok = true;
     286             :         }
     287             : 
     288    56459047 :         if ((m->allrecord_lock == F_RDLCK) && (rw == F_RDLCK)) {
     289             :                 /*
     290             :                  * allrecord shared lock taken, but we only want to read
     291             :                  */
     292           6 :                 allrecord_ok = true;
     293             :         }
     294             : 
     295    56459047 :         if (allrecord_ok) {
     296    56459041 :                 *pret = 0;
     297    56459041 :                 return true;
     298             :         }
     299             : 
     300           6 :         ret = pthread_mutex_unlock(chain);
     301           6 :         if (ret != 0) {
     302           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
     303             :                          "(chain_mutex) failed: %s\n", strerror(ret)));
     304           0 :                 errno = ret;
     305           0 :                 goto fail;
     306             :         }
     307           6 :         ret = allrecord_mutex_lock(m, waitflag);
     308           6 :         if (ret == EBUSY) {
     309           3 :                 ret = EAGAIN;
     310             :         }
     311           6 :         if (ret != 0) {
     312           3 :                 if (waitflag || (ret != EAGAIN)) {
     313           0 :                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_%slock"
     314             :                                  "(allrecord_mutex) failed: %s\n",
     315             :                                  waitflag ? "" : "try_",  strerror(ret)));
     316             :                 }
     317           3 :                 errno = ret;
     318           3 :                 goto fail;
     319             :         }
     320           3 :         ret = pthread_mutex_unlock(&m->allrecord_mutex);
     321           3 :         if (ret != 0) {
     322           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
     323             :                          "(allrecord_mutex) failed: %s\n", strerror(ret)));
     324           0 :                 errno = ret;
     325           0 :                 goto fail;
     326             :         }
     327           3 :         goto again;
     328             : 
     329        2730 : fail:
     330        2730 :         *pret = -1;
     331        2730 :         return true;
     332             : }
     333             : 
     334   304680602 : bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len,
     335             :                       int *pret)
     336             : {
     337   304680602 :         struct tdb_mutexes *m = tdb->mutexes;
     338     8291253 :         pthread_mutex_t *chain;
     339     8291253 :         int ret;
     340     8291253 :         unsigned idx;
     341             : 
     342   304680602 :         if (!tdb_mutex_index(tdb, off, len, &idx)) {
     343   207345391 :                 return false;
     344             :         }
     345    90625104 :         chain = &m->hashchains[idx];
     346             : 
     347    90625104 :         ret = pthread_mutex_unlock(chain);
     348    90625104 :         if (ret == 0) {
     349    90625104 :                 *pret = 0;
     350    90625104 :                 return true;
     351             :         }
     352           0 :         errno = ret;
     353           0 :         *pret = -1;
     354           0 :         return true;
     355             : }
     356             : 
     357          13 : int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
     358             :                              enum tdb_lock_flags flags)
     359             : {
     360          13 :         struct tdb_mutexes *m = tdb->mutexes;
     361           0 :         int ret;
     362           0 :         uint32_t i;
     363          13 :         bool waitflag = (flags & TDB_LOCK_WAIT);
     364           0 :         int saved_errno;
     365             : 
     366          13 :         if (tdb->flags & TDB_NOLOCK) {
     367           0 :                 return 0;
     368             :         }
     369             : 
     370          13 :         if (flags & TDB_LOCK_MARK_ONLY) {
     371           1 :                 return 0;
     372             :         }
     373             : 
     374          12 :         ret = allrecord_mutex_lock(m, waitflag);
     375          12 :         if (!waitflag && (ret == EBUSY)) {
     376           1 :                 errno = EAGAIN;
     377           1 :                 tdb->ecode = TDB_ERR_LOCK;
     378           1 :                 return -1;
     379             :         }
     380          11 :         if (ret != 0) {
     381           0 :                 if (!(flags & TDB_LOCK_PROBE)) {
     382           0 :                         TDB_LOG((tdb, TDB_DEBUG_TRACE,
     383             :                                  "allrecord_mutex_lock() failed: %s\n",
     384             :                                  strerror(ret)));
     385             :                 }
     386           0 :                 tdb->ecode = TDB_ERR_LOCK;
     387           0 :                 return -1;
     388             :         }
     389             : 
     390          11 :         if (m->allrecord_lock != F_UNLCK) {
     391           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
     392             :                          (int)m->allrecord_lock));
     393           0 :                 goto fail_unlock_allrecord_mutex;
     394             :         }
     395          11 :         m->allrecord_lock = (ltype == F_RDLCK) ? F_RDLCK : F_WRLCK;
     396             : 
     397     1000294 :         for (i=0; i<tdb->hash_size; i++) {
     398             : 
     399             :                 /* ignore hashchains[0], the freelist */
     400     1000284 :                 pthread_mutex_t *chain = &m->hashchains[i+1];
     401             : 
     402     1000284 :                 ret = chain_mutex_lock(chain, waitflag);
     403     1000284 :                 if (!waitflag && (ret == EBUSY)) {
     404           1 :                         errno = EAGAIN;
     405           1 :                         goto fail_unroll_allrecord_lock;
     406             :                 }
     407     1000283 :                 if (ret != 0) {
     408           0 :                         if (!(flags & TDB_LOCK_PROBE)) {
     409           0 :                                 TDB_LOG((tdb, TDB_DEBUG_TRACE,
     410             :                                          "chain_mutex_lock() failed: %s\n",
     411             :                                          strerror(ret)));
     412             :                         }
     413           0 :                         errno = ret;
     414           0 :                         goto fail_unroll_allrecord_lock;
     415             :                 }
     416             : 
     417     1000283 :                 ret = pthread_mutex_unlock(chain);
     418     1000283 :                 if (ret != 0) {
     419           0 :                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
     420             :                                  "(chainlock) failed: %s\n", strerror(ret)));
     421           0 :                         errno = ret;
     422           0 :                         goto fail_unroll_allrecord_lock;
     423             :                 }
     424             :         }
     425             :         /*
     426             :          * We leave this routine with m->allrecord_mutex locked
     427             :          */
     428          10 :         return 0;
     429             : 
     430           1 : fail_unroll_allrecord_lock:
     431           1 :         m->allrecord_lock = F_UNLCK;
     432             : 
     433           1 : fail_unlock_allrecord_mutex:
     434           1 :         saved_errno = errno;
     435           1 :         ret = pthread_mutex_unlock(&m->allrecord_mutex);
     436           1 :         if (ret != 0) {
     437           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
     438             :                          "(allrecord_mutex) failed: %s\n", strerror(ret)));
     439             :         }
     440           1 :         errno = saved_errno;
     441           1 :         tdb->ecode = TDB_ERR_LOCK;
     442           1 :         return -1;
     443             : }
     444             : 
     445           2 : int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
     446             : {
     447           2 :         struct tdb_mutexes *m = tdb->mutexes;
     448           0 :         int ret;
     449           0 :         uint32_t i;
     450             : 
     451           2 :         if (tdb->flags & TDB_NOLOCK) {
     452           0 :                 return 0;
     453             :         }
     454             : 
     455             :         /*
     456             :          * Our only caller tdb_allrecord_upgrade()
     457             :          * guarantees that we already own the allrecord lock.
     458             :          *
     459             :          * Which means m->allrecord_mutex is still locked by us.
     460             :          */
     461             : 
     462           2 :         if (m->allrecord_lock != F_RDLCK) {
     463           0 :                 tdb->ecode = TDB_ERR_LOCK;
     464           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
     465             :                          (int)m->allrecord_lock));
     466           0 :                 return -1;
     467             :         }
     468             : 
     469           2 :         m->allrecord_lock = F_WRLCK;
     470             : 
     471           8 :         for (i=0; i<tdb->hash_size; i++) {
     472             : 
     473             :                 /* ignore hashchains[0], the freelist */
     474           6 :                 pthread_mutex_t *chain = &m->hashchains[i+1];
     475             : 
     476           6 :                 ret = chain_mutex_lock(chain, true);
     477           6 :                 if (ret != 0) {
     478           0 :                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_lock"
     479             :                                  "(chainlock) failed: %s\n", strerror(ret)));
     480           0 :                         goto fail_unroll_allrecord_lock;
     481             :                 }
     482             : 
     483           6 :                 ret = pthread_mutex_unlock(chain);
     484           6 :                 if (ret != 0) {
     485           0 :                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
     486             :                                  "(chainlock) failed: %s\n", strerror(ret)));
     487           0 :                         goto fail_unroll_allrecord_lock;
     488             :                 }
     489             :         }
     490             : 
     491           2 :         return 0;
     492             : 
     493           0 : fail_unroll_allrecord_lock:
     494           0 :         m->allrecord_lock = F_RDLCK;
     495           0 :         tdb->ecode = TDB_ERR_LOCK;
     496           0 :         return -1;
     497             : }
     498             : 
     499           0 : void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
     500             : {
     501           0 :         struct tdb_mutexes *m = tdb->mutexes;
     502             : 
     503             :         /*
     504             :          * Our only caller tdb_allrecord_upgrade() (in the error case)
     505             :          * guarantees that we already own the allrecord lock.
     506             :          *
     507             :          * Which means m->allrecord_mutex is still locked by us.
     508             :          */
     509             : 
     510           0 :         if (m->allrecord_lock != F_WRLCK) {
     511           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
     512             :                          (int)m->allrecord_lock));
     513           0 :                 return;
     514             :         }
     515             : 
     516           0 :         m->allrecord_lock = F_RDLCK;
     517           0 :         return;
     518             : }
     519             : 
     520             : 
     521           7 : int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
     522             : {
     523           7 :         struct tdb_mutexes *m = tdb->mutexes;
     524           0 :         short old;
     525           0 :         int ret;
     526             : 
     527           7 :         if (tdb->flags & TDB_NOLOCK) {
     528           0 :                 return 0;
     529             :         }
     530             : 
     531             :         /*
     532             :          * Our only callers tdb_allrecord_unlock() and
     533             :          * tdb_allrecord_lock() (in the error path)
     534             :          * guarantee that we already own the allrecord lock.
     535             :          *
     536             :          * Which means m->allrecord_mutex is still locked by us.
     537             :          */
     538             : 
     539           7 :         if ((m->allrecord_lock != F_RDLCK) && (m->allrecord_lock != F_WRLCK)) {
     540           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
     541             :                          (int)m->allrecord_lock));
     542           0 :                 return -1;
     543             :         }
     544             : 
     545           7 :         old = m->allrecord_lock;
     546           7 :         m->allrecord_lock = F_UNLCK;
     547             : 
     548           7 :         ret = pthread_mutex_unlock(&m->allrecord_mutex);
     549           7 :         if (ret != 0) {
     550           0 :                 m->allrecord_lock = old;
     551           0 :                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
     552             :                          "(allrecord_mutex) failed: %s\n", strerror(ret)));
     553           0 :                 return -1;
     554             :         }
     555           7 :         return 0;
     556             : }
     557             : 
     558      343939 : int tdb_mutex_init(struct tdb_context *tdb)
     559             : {
     560        9304 :         struct tdb_mutexes *m;
     561        9304 :         pthread_mutexattr_t ma;
     562        9304 :         uint32_t i;
     563        9304 :         int ret;
     564             : 
     565      343939 :         ret = tdb_mutex_mmap(tdb);
     566      343939 :         if (ret == -1) {
     567           0 :                 return -1;
     568             :         }
     569      343939 :         m = tdb->mutexes;
     570             : 
     571      343939 :         ret = pthread_mutexattr_init(&ma);
     572      343939 :         if (ret != 0) {
     573           0 :                 goto fail_munmap;
     574             :         }
     575      343939 :         ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
     576      343939 :         if (ret != 0) {
     577           0 :                 goto fail;
     578             :         }
     579      343939 :         ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
     580      343939 :         if (ret != 0) {
     581           0 :                 goto fail;
     582             :         }
     583      343939 :         ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
     584      343939 :         if (ret != 0) {
     585           0 :                 goto fail;
     586             :         }
     587             : 
     588  2195568396 :         for (i=0; i<tdb->hash_size+1; i++) {
     589  2195224457 :                 pthread_mutex_t *chain = &m->hashchains[i];
     590             : 
     591  2195224457 :                 ret = pthread_mutex_init(chain, &ma);
     592  2195224457 :                 if (ret != 0) {
     593           0 :                         goto fail;
     594             :                 }
     595             :         }
     596             : 
     597      343939 :         m->allrecord_lock = F_UNLCK;
     598             : 
     599      343939 :         ret = pthread_mutex_init(&m->allrecord_mutex, &ma);
     600      343939 :         if (ret != 0) {
     601           0 :                 goto fail;
     602             :         }
     603      334635 :         ret = 0;
     604      343939 : fail:
     605      343939 :         pthread_mutexattr_destroy(&ma);
     606      343939 : fail_munmap:
     607             : 
     608      343939 :         if (ret == 0) {
     609      334635 :                 return 0;
     610             :         }
     611             : 
     612           0 :         tdb_mutex_munmap(tdb);
     613             : 
     614           0 :         errno = ret;
     615           0 :         return -1;
     616             : }
     617             : 
     618      700644 : int tdb_mutex_mmap(struct tdb_context *tdb)
     619             : {
     620       18608 :         size_t len;
     621       18608 :         void *ptr;
     622             : 
     623      700644 :         len = tdb_mutex_size(tdb);
     624      700644 :         if (len == 0) {
     625           0 :                 return 0;
     626             :         }
     627             : 
     628      700644 :         if (tdb->mutexes != NULL) {
     629      334635 :                 return 0;
     630             :         }
     631             : 
     632      356705 :         ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE,
     633             :                    tdb->fd, 0);
     634      356705 :         if (ptr == MAP_FAILED) {
     635           0 :                 return -1;
     636             :         }
     637      356705 :         tdb->mutexes = (struct tdb_mutexes *)ptr;
     638             : 
     639      356705 :         return 0;
     640             : }
     641             : 
     642     6733157 : int tdb_mutex_munmap(struct tdb_context *tdb)
     643             : {
     644      185929 :         size_t len;
     645      185929 :         int ret;
     646             : 
     647     6733157 :         len = tdb_mutex_size(tdb);
     648     6548930 :         if (len == 0) {
     649     6485488 :                 return 0;
     650             :         }
     651             : 
     652       63442 :         ret = munmap(tdb->mutexes, len);
     653       63442 :         if (ret == -1) {
     654           0 :                 return -1;
     655             :         }
     656       63442 :         tdb->mutexes = NULL;
     657             : 
     658       63442 :         return 0;
     659             : }
     660             : 
     661             : static bool tdb_mutex_locking_cached;
     662             : 
     663       40642 : static bool tdb_mutex_locking_supported(void)
     664             : {
     665         880 :         pthread_mutexattr_t ma;
     666         880 :         pthread_mutex_t m;
     667         880 :         int ret;
     668         880 :         static bool initialized;
     669             : 
     670       40642 :         if (initialized) {
     671           0 :                 return tdb_mutex_locking_cached;
     672             :         }
     673             : 
     674       40642 :         initialized = true;
     675             : 
     676       40642 :         ret = pthread_mutexattr_init(&ma);
     677       40642 :         if (ret != 0) {
     678           0 :                 return false;
     679             :         }
     680       40642 :         ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
     681       40642 :         if (ret != 0) {
     682           0 :                 goto cleanup_ma;
     683             :         }
     684       40642 :         ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
     685       40642 :         if (ret != 0) {
     686           0 :                 goto cleanup_ma;
     687             :         }
     688       40642 :         ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
     689       40642 :         if (ret != 0) {
     690           0 :                 goto cleanup_ma;
     691             :         }
     692       40642 :         ret = pthread_mutex_init(&m, &ma);
     693       40642 :         if (ret != 0) {
     694           0 :                 goto cleanup_ma;
     695             :         }
     696       40642 :         ret = pthread_mutex_lock(&m);
     697       40642 :         if (ret != 0) {
     698           0 :                 goto cleanup_m;
     699             :         }
     700             :         /*
     701             :          * This makes sure we have real mutexes
     702             :          * from a threading library instead of just
     703             :          * stubs from libc.
     704             :          */
     705       40642 :         ret = pthread_mutex_lock(&m);
     706       40642 :         if (ret != EDEADLK) {
     707           0 :                 goto cleanup_lock;
     708             :         }
     709       40642 :         ret = pthread_mutex_unlock(&m);
     710       40642 :         if (ret != 0) {
     711           0 :                 goto cleanup_m;
     712             :         }
     713             : 
     714       40642 :         tdb_mutex_locking_cached = true;
     715       40642 :         goto cleanup_m;
     716             : 
     717           0 : cleanup_lock:
     718           0 :         pthread_mutex_unlock(&m);
     719       40642 : cleanup_m:
     720       40642 :         pthread_mutex_destroy(&m);
     721       40642 : cleanup_ma:
     722       40642 :         pthread_mutexattr_destroy(&ma);
     723       40642 :         return tdb_mutex_locking_cached;
     724             : }
     725             : 
     726             : static void (*tdb_robust_mutext_old_handler)(int) = SIG_ERR;
     727             : static pid_t tdb_robust_mutex_pid = -1;
     728             : 
     729       40642 : static bool tdb_robust_mutex_setup_sigchild(void (*handler)(int),
     730             :                         void (**p_old_handler)(int))
     731             : {
     732             : #ifdef HAVE_SIGACTION
     733         880 :         struct sigaction act;
     734         880 :         struct sigaction oldact;
     735             : 
     736       40642 :         memset(&act, '\0', sizeof(act));
     737             : 
     738       40642 :         act.sa_handler = handler;
     739             : #ifdef SA_RESTART
     740       40642 :         act.sa_flags = SA_RESTART;
     741             : #endif
     742       40642 :         sigemptyset(&act.sa_mask);
     743       40642 :         sigaddset(&act.sa_mask, SIGCHLD);
     744       40642 :         sigaction(SIGCHLD, &act, &oldact);
     745       40642 :         if (p_old_handler) {
     746       40642 :                 *p_old_handler = oldact.sa_handler;
     747             :         }
     748       40642 :         return true;
     749             : #else /* !HAVE_SIGACTION */
     750             :         return false;
     751             : #endif
     752             : }
     753             : 
     754       42331 : static void tdb_robust_mutex_handler(int sig)
     755             : {
     756       42331 :         pid_t child_pid = tdb_robust_mutex_pid;
     757             : 
     758       42331 :         if (child_pid != -1) {
     759         878 :                 pid_t pid;
     760             : 
     761       40640 :                 pid = waitpid(child_pid, NULL, WNOHANG);
     762       40640 :                 if (pid == -1) {
     763       19028 :                         switch (errno) {
     764       19028 :                         case ECHILD:
     765       19028 :                                 tdb_robust_mutex_pid = -1;
     766       19028 :                                 return;
     767             : 
     768           0 :                         default:
     769           0 :                                 return;
     770             :                         }
     771             :                 }
     772       21612 :                 if (pid == child_pid) {
     773       21612 :                         tdb_robust_mutex_pid = -1;
     774       21612 :                         return;
     775             :                 }
     776             :         }
     777             : 
     778        1691 :         if (tdb_robust_mutext_old_handler == SIG_DFL) {
     779        1660 :                 return;
     780             :         }
     781          12 :         if (tdb_robust_mutext_old_handler == SIG_IGN) {
     782           0 :                 return;
     783             :         }
     784          12 :         if (tdb_robust_mutext_old_handler == SIG_ERR) {
     785           0 :                 return;
     786             :         }
     787             : 
     788          12 :         tdb_robust_mutext_old_handler(sig);
     789             : }
     790             : 
     791       81284 : static void tdb_robust_mutex_wait_for_child(pid_t *child_pid)
     792             : {
     793       81284 :         int options = WNOHANG;
     794             : 
     795       81284 :         if (*child_pid == -1) {
     796       39762 :                 return;
     797             :         }
     798             : 
     799       59655 :         while (tdb_robust_mutex_pid > 0) {
     800        1741 :                 pid_t pid;
     801             : 
     802             :                 /*
     803             :                  * First we try with WNOHANG, as the process might not exist
     804             :                  * anymore. Once we've sent SIGKILL we block waiting for the
     805             :                  * exit.
     806             :                  */
     807       38043 :                 pid = waitpid(*child_pid, NULL, options);
     808       38043 :                 if (pid == -1) {
     809           0 :                         if (errno == EINTR) {
     810           0 :                                 continue;
     811           0 :                         } else if (errno == ECHILD) {
     812           0 :                                 break;
     813             :                         } else {
     814           0 :                                 abort();
     815             :                         }
     816             :                 }
     817       38043 :                 if (pid == *child_pid) {
     818       18159 :                         break;
     819             :                 }
     820             : 
     821       19013 :                 kill(*child_pid, SIGKILL);
     822       19013 :                 options = 0;
     823             :         }
     824             : 
     825       40642 :         tdb_robust_mutex_pid = -1;
     826       40642 :         *child_pid = -1;
     827             : }
     828             : 
     829      966659 : _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
     830             : {
     831      966659 :         void *ptr = NULL;
     832      966659 :         pthread_mutex_t *m = NULL;
     833       25378 :         pthread_mutexattr_t ma;
     834      966659 :         int ret = 1;
     835      966659 :         int pipe_down[2] = { -1, -1 };
     836      966659 :         int pipe_up[2] = { -1, -1 };
     837       25378 :         ssize_t nread;
     838      966659 :         char c = 0;
     839       25378 :         bool ok;
     840       25378 :         static bool initialized;
     841      966659 :         pid_t saved_child_pid = -1;
     842      966659 :         bool cleanup_ma = false;
     843             : 
     844      966659 :         if (initialized) {
     845      926017 :                 return tdb_mutex_locking_cached;
     846             :         }
     847             : 
     848       40642 :         initialized = true;
     849             : 
     850       40642 :         ok = tdb_mutex_locking_supported();
     851       40642 :         if (!ok) {
     852           0 :                 return false;
     853             :         }
     854             : 
     855       40642 :         tdb_mutex_locking_cached = false;
     856             : 
     857       40642 :         ptr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE,
     858             :                    MAP_SHARED|MAP_ANON, -1 /* fd */, 0);
     859       40642 :         if (ptr == MAP_FAILED) {
     860           0 :                 return false;
     861             :         }
     862             : 
     863       40642 :         ret = pipe(pipe_down);
     864       40642 :         if (ret != 0) {
     865           0 :                 goto cleanup;
     866             :         }
     867       40642 :         ret = pipe(pipe_up);
     868       40642 :         if (ret != 0) {
     869           0 :                 goto cleanup;
     870             :         }
     871             : 
     872       40642 :         ret = pthread_mutexattr_init(&ma);
     873       40642 :         if (ret != 0) {
     874           0 :                 goto cleanup;
     875             :         }
     876       40642 :         cleanup_ma = true;
     877       40642 :         ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
     878       40642 :         if (ret != 0) {
     879           0 :                 goto cleanup;
     880             :         }
     881       40642 :         ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
     882       40642 :         if (ret != 0) {
     883           0 :                 goto cleanup;
     884             :         }
     885       40642 :         ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
     886       40642 :         if (ret != 0) {
     887           0 :                 goto cleanup;
     888             :         }
     889       40642 :         ret = pthread_mutex_init(ptr, &ma);
     890       40642 :         if (ret != 0) {
     891           0 :                 goto cleanup;
     892             :         }
     893       40642 :         m = (pthread_mutex_t *)ptr;
     894             : 
     895       40642 :         if (tdb_robust_mutex_setup_sigchild(tdb_robust_mutex_handler,
     896       39762 :                         &tdb_robust_mutext_old_handler) == false) {
     897           0 :                 goto cleanup;
     898             :         }
     899             : 
     900       40642 :         tdb_robust_mutex_pid = fork();
     901       40642 :         saved_child_pid = tdb_robust_mutex_pid;
     902       40642 :         if (tdb_robust_mutex_pid == 0) {
     903           0 :                 size_t nwritten;
     904           0 :                 close(pipe_down[1]);
     905           0 :                 close(pipe_up[0]);
     906           0 :                 ret = pthread_mutex_lock(m);
     907           0 :                 nwritten = write(pipe_up[1], &ret, sizeof(ret));
     908           0 :                 if (nwritten != sizeof(ret)) {
     909           0 :                         _exit(1);
     910             :                 }
     911           0 :                 if (ret != 0) {
     912           0 :                         _exit(1);
     913             :                 }
     914           0 :                 nread = read(pipe_down[0], &c, 1);
     915           0 :                 if (nread != 1) {
     916           0 :                         _exit(1);
     917             :                 }
     918             :                 /* leave locked */
     919           0 :                 _exit(0);
     920             :         }
     921       40642 :         if (tdb_robust_mutex_pid == -1) {
     922           0 :                 goto cleanup;
     923             :         }
     924       40642 :         close(pipe_down[0]);
     925       40642 :         pipe_down[0] = -1;
     926       40642 :         close(pipe_up[1]);
     927       40642 :         pipe_up[1] = -1;
     928             : 
     929       40642 :         nread = read(pipe_up[0], &ret, sizeof(ret));
     930       40642 :         if (nread != sizeof(ret)) {
     931           0 :                 goto cleanup;
     932             :         }
     933             : 
     934       40642 :         ret = pthread_mutex_trylock(m);
     935       40642 :         if (ret != EBUSY) {
     936           0 :                 if (ret == 0) {
     937           0 :                         pthread_mutex_unlock(m);
     938             :                 }
     939           0 :                 goto cleanup;
     940             :         }
     941             : 
     942       40642 :         if (write(pipe_down[1], &c, 1) != 1) {
     943           0 :                 goto cleanup;
     944             :         }
     945             : 
     946       40642 :         nread = read(pipe_up[0], &c, 1);
     947       40642 :         if (nread != 0) {
     948           0 :                 goto cleanup;
     949             :         }
     950             : 
     951       40642 :         tdb_robust_mutex_wait_for_child(&saved_child_pid);
     952             : 
     953       40642 :         ret = pthread_mutex_trylock(m);
     954       40642 :         if (ret != EOWNERDEAD) {
     955           0 :                 if (ret == 0) {
     956           0 :                         pthread_mutex_unlock(m);
     957             :                 }
     958           0 :                 goto cleanup;
     959             :         }
     960             : 
     961       40642 :         ret = pthread_mutex_consistent(m);
     962       40642 :         if (ret != 0) {
     963           0 :                 goto cleanup;
     964             :         }
     965             : 
     966       40642 :         ret = pthread_mutex_trylock(m);
     967       40642 :         if (ret != EDEADLK && ret != EBUSY) {
     968           0 :                 pthread_mutex_unlock(m);
     969           0 :                 goto cleanup;
     970             :         }
     971             : 
     972       40642 :         ret = pthread_mutex_unlock(m);
     973       40642 :         if (ret != 0) {
     974           0 :                 goto cleanup;
     975             :         }
     976             : 
     977       40642 :         tdb_mutex_locking_cached = true;
     978             : 
     979       40642 : cleanup:
     980             :         /*
     981             :          * Note that we don't reset the signal handler we just reset
     982             :          * tdb_robust_mutex_pid to -1. This is ok as this code path is only
     983             :          * called once per process.
     984             :          *
     985             :          * Leaving our signal handler avoids races with other threads potentially
     986             :          * setting up their SIGCHLD handlers.
     987             :          *
     988             :          * The worst thing that can happen is that the other newer signal
     989             :          * handler will get the SIGCHLD signal for our child and/or reap the
     990             :          * child with a wait() function. tdb_robust_mutex_wait_for_child()
     991             :          * handles the case where waitpid returns ECHILD.
     992             :          */
     993       40642 :         tdb_robust_mutex_wait_for_child(&saved_child_pid);
     994             : 
     995       40642 :         if (m != NULL) {
     996       40642 :                 pthread_mutex_destroy(m);
     997             :         }
     998       40642 :         if (cleanup_ma) {
     999       40642 :                 pthread_mutexattr_destroy(&ma);
    1000             :         }
    1001       40642 :         if (pipe_down[0] != -1) {
    1002           0 :                 close(pipe_down[0]);
    1003             :         }
    1004       40642 :         if (pipe_down[1] != -1) {
    1005       40642 :                 close(pipe_down[1]);
    1006             :         }
    1007       40642 :         if (pipe_up[0] != -1) {
    1008       40642 :                 close(pipe_up[0]);
    1009             :         }
    1010       40642 :         if (pipe_up[1] != -1) {
    1011           0 :                 close(pipe_up[1]);
    1012             :         }
    1013       40642 :         if (ptr != NULL) {
    1014       40642 :                 munmap(ptr, sizeof(pthread_mutex_t));
    1015             :         }
    1016             : 
    1017       40642 :         return tdb_mutex_locking_cached;
    1018             : }
    1019             : 
    1020             : #else
    1021             : 
    1022             : size_t tdb_mutex_size(struct tdb_context *tdb)
    1023             : {
    1024             :         return 0;
    1025             : }
    1026             : 
    1027             : bool tdb_have_mutexes(struct tdb_context *tdb)
    1028             : {
    1029             :         return false;
    1030             : }
    1031             : 
    1032             : int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
    1033             :                              enum tdb_lock_flags flags)
    1034             : {
    1035             :         tdb->ecode = TDB_ERR_LOCK;
    1036             :         return -1;
    1037             : }
    1038             : 
    1039             : int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
    1040             : {
    1041             :         return -1;
    1042             : }
    1043             : 
    1044             : int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
    1045             : {
    1046             :         tdb->ecode = TDB_ERR_LOCK;
    1047             :         return -1;
    1048             : }
    1049             : 
    1050             : void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
    1051             : {
    1052             :         return;
    1053             : }
    1054             : 
    1055             : int tdb_mutex_mmap(struct tdb_context *tdb)
    1056             : {
    1057             :         errno = ENOSYS;
    1058             :         return -1;
    1059             : }
    1060             : 
    1061             : int tdb_mutex_munmap(struct tdb_context *tdb)
    1062             : {
    1063             :         errno = ENOSYS;
    1064             :         return -1;
    1065             : }
    1066             : 
    1067             : int tdb_mutex_init(struct tdb_context *tdb)
    1068             : {
    1069             :         errno = ENOSYS;
    1070             :         return -1;
    1071             : }
    1072             : 
    1073             : _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
    1074             : {
    1075             :         return false;
    1076             : }
    1077             : 
    1078             : #endif

Generated by: LCOV version 1.14