Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 :
4 : trivial database library
5 :
6 : Copyright (C) Andrew Tridgell 1999-2005
7 : Copyright (C) Paul `Rusty' Russell 2000
8 : Copyright (C) Jeremy Allison 2000-2003
9 :
10 : ** NOTE! The following LGPL license applies to the tdb
11 : ** library. This does NOT imply that all of Samba is released
12 : ** under the LGPL
13 :
14 : This library is free software; you can redistribute it and/or
15 : modify it under the terms of the GNU Lesser General Public
16 : License as published by the Free Software Foundation; either
17 : version 3 of the License, or (at your option) any later version.
18 :
19 : This library is distributed in the hope that it will be useful,
20 : but WITHOUT ANY WARRANTY; without even the implied warranty of
21 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 : Lesser General Public License for more details.
23 :
24 : You should have received a copy of the GNU Lesser General Public
25 : License along with this library; if not, see <http://www.gnu.org/licenses/>.
26 : */
27 :
28 : #include "tdb_private.h"
29 :
30 : #define TDB_NEXT_LOCK_ERR ((tdb_off_t)-1)
31 :
32 : /* Uses traverse lock: 0 = finish, TDB_NEXT_LOCK_ERR = error,
33 : other = record offset */
34 575345327 : static tdb_off_t tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tlock,
35 : struct tdb_record *rec)
36 : {
37 575345327 : int want_next = (tlock->off != 0);
38 :
39 : /* Lock each chain from the start one. */
40 836825342 : for (; tlock->list < tdb->hash_size; tlock->list++) {
41 830699438 : if (!tlock->off && tlock->list != 0) {
42 : /* this is an optimisation for the common case where
43 : the hash chain is empty, which is particularly
44 : common for the use of tdb with ldb, where large
45 : hashes are used. In that case we spend most of our
46 : time in tdb_brlock(), locking empty hash chains.
47 :
48 : To avoid this, we do an unlocked pre-check to see
49 : if the hash chain is empty before starting to look
50 : inside it. If it is empty then we can avoid that
51 : hash chain. If it isn't empty then we can't believe
52 : the value we get back, as we read it without a
53 : lock, so instead we get the lock and re-fetch the
54 : value below.
55 :
56 : Notice that not doing this optimisation on the
57 : first hash chain is critical. We must guarantee
58 : that we have done at least one fcntl lock at the
59 : start of a search to guarantee that memory is
60 : coherent on SMP systems. If records are added by
61 : others during the search then that's OK, and we
62 : could possibly miss those with this trick, but we
63 : could miss them anyway without this trick, so the
64 : semantics don't change.
65 :
66 : With a non-indexed ldb search this trick gains us a
67 : factor of around 80 in speed on a linux 2.6.x
68 : system (testing using ldbtest).
69 : */
70 255354111 : tdb->methods->next_hash_chain(tdb, &tlock->list);
71 255354111 : if (tlock->list == tdb->hash_size) {
72 5538731 : continue;
73 : }
74 : }
75 :
76 825160707 : if (tdb_lock(tdb, tlock->list, tlock->lock_rw) == -1)
77 0 : return TDB_NEXT_LOCK_ERR;
78 :
79 : /* No previous record? Start at top of chain. */
80 825160707 : if (!tlock->off) {
81 255946130 : if (tdb_ofs_read(tdb, TDB_HASH_TOP(tlock->list),
82 255946130 : &tlock->off) == -1)
83 0 : goto fail;
84 : } else {
85 : /* Otherwise unlock the previous record. */
86 569214577 : if (tdb_unlock_record(tdb, tlock->off) != 0)
87 0 : goto fail;
88 : }
89 :
90 825160707 : if (want_next) {
91 : /* We have offset of old record: grab next */
92 569214577 : if (tdb_rec_read(tdb, tlock->off, rec) == -1)
93 0 : goto fail;
94 569214577 : tlock->off = rec->next;
95 : }
96 :
97 : /* Iterate through chain */
98 841227941 : while( tlock->off) {
99 585286657 : if (tdb_rec_read(tdb, tlock->off, rec) == -1)
100 0 : goto fail;
101 :
102 : /* Detect infinite loops. From "Shlomi Yaakobovich" <Shlomi@exanet.com>. */
103 585286657 : if (tlock->off == rec->next) {
104 0 : tdb->ecode = TDB_ERR_CORRUPT;
105 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_next_lock: loop detected.\n"));
106 0 : goto fail;
107 : }
108 :
109 585286657 : if (!TDB_DEAD(rec)) {
110 : /* Woohoo: we found one! */
111 569219423 : if (tdb_lock_record(tdb, tlock->off) != 0)
112 0 : goto fail;
113 569219423 : return tlock->off;
114 : }
115 :
116 16067234 : tlock->off = rec->next;
117 : }
118 255941284 : tdb_unlock(tdb, tlock->list, tlock->lock_rw);
119 255941284 : want_next = 0;
120 : }
121 : /* We finished iteration without finding anything */
122 6125904 : tdb->ecode = TDB_SUCCESS;
123 6125904 : return 0;
124 :
125 0 : fail:
126 0 : tlock->off = 0;
127 0 : if (tdb_unlock(tdb, tlock->list, tlock->lock_rw) != 0)
128 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_next_lock: On error unlock failed!\n"));
129 0 : return TDB_NEXT_LOCK_ERR;
130 : }
131 :
132 : /* traverse the entire database - calling fn(tdb, key, data) on each element.
133 : return -1 on error or the record count traversed
134 : if fn is NULL then it is not called
135 : a non-zero return value from fn() indicates that the traversal should stop
136 : */
137 6130716 : static int tdb_traverse_internal(struct tdb_context *tdb,
138 : tdb_traverse_func fn, void *private_data,
139 : struct tdb_traverse_lock *tl)
140 : {
141 165762 : TDB_DATA key, dbuf;
142 165762 : struct tdb_record rec;
143 6130716 : int ret = 0, count = 0;
144 165762 : tdb_off_t off;
145 165762 : size_t recbuf_len;
146 :
147 6130716 : recbuf_len = 4096;
148 6130716 : key.dptr = malloc(recbuf_len);
149 6130716 : if (key.dptr == NULL) {
150 0 : return -1;
151 : }
152 :
153 : /* This was in the initialization, above, but the IRIX compiler
154 : * did not like it. crh
155 : */
156 6130716 : tl->next = tdb->travlocks.next;
157 :
158 : /* fcntl locks don't stack: beware traverse inside traverse */
159 6130716 : tdb->travlocks.next = tl;
160 :
161 : /* tdb_next_lock places locks on the record returned, and its chain */
162 575345095 : while ((off = tdb_next_lock(tdb, tl, &rec)) != 0) {
163 5461897 : tdb_len_t full_len;
164 5461897 : int nread;
165 :
166 569219225 : if (off == TDB_NEXT_LOCK_ERR) {
167 0 : ret = -1;
168 0 : goto out;
169 : }
170 :
171 569219225 : full_len = rec.key_len + rec.data_len;
172 :
173 569219225 : if (full_len > recbuf_len) {
174 260786 : recbuf_len = full_len;
175 :
176 : /*
177 : * No realloc, we don't need the old data and thus can
178 : * do without the memcpy
179 : */
180 260786 : free(key.dptr);
181 260786 : key.dptr = malloc(recbuf_len);
182 :
183 260786 : if (key.dptr == NULL) {
184 0 : ret = -1;
185 0 : if (tdb_unlock(tdb, tl->list, tl->lock_rw)
186 : != 0) {
187 0 : goto out;
188 : }
189 0 : if (tdb_unlock_record(tdb, tl->off) != 0) {
190 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL,
191 : "tdb_traverse: malloc "
192 : "failed and unlock_record "
193 : "failed!\n"));
194 : }
195 0 : goto out;
196 : }
197 : }
198 :
199 569219225 : count++;
200 : /* now read the full record */
201 569219225 : nread = tdb->methods->tdb_read(tdb, tl->off + sizeof(rec),
202 563757328 : key.dptr, full_len, 0);
203 569219225 : if (nread == -1) {
204 0 : ret = -1;
205 0 : if (tdb_unlock(tdb, tl->list, tl->lock_rw) != 0)
206 0 : goto out;
207 0 : if (tdb_unlock_record(tdb, tl->off) != 0)
208 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_traverse: key.dptr == NULL and unlock_record failed!\n"));
209 0 : goto out;
210 : }
211 569219225 : key.dsize = rec.key_len;
212 569219225 : dbuf.dptr = key.dptr + rec.key_len;
213 569219225 : dbuf.dsize = rec.data_len;
214 :
215 5461897 : tdb_trace_1rec_retrec(tdb, "traverse", key, dbuf);
216 :
217 : /* Drop chain lock, call out */
218 569219225 : if (tdb_unlock(tdb, tl->list, tl->lock_rw) != 0) {
219 0 : ret = -1;
220 0 : goto out;
221 : }
222 569219225 : if (fn && fn(tdb, key, dbuf, private_data)) {
223 : /* They want us to terminate traversal */
224 9 : tdb_trace_ret(tdb, "tdb_traverse_end", count);
225 4842 : if (tdb_unlock_record(tdb, tl->off) != 0) {
226 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_traverse: unlock_record failed!\n"));;
227 0 : ret = -1;
228 : }
229 4842 : goto out;
230 : }
231 : }
232 165758 : tdb_trace(tdb, "tdb_traverse_end");
233 6125870 : out:
234 6130712 : SAFE_FREE(key.dptr);
235 6130712 : tdb->travlocks.next = tl->next;
236 6130712 : if (ret < 0)
237 0 : return -1;
238 : else
239 6130712 : return count;
240 : }
241 :
242 :
243 : /*
244 : a read style traverse - temporarily marks each record read only
245 : */
246 411326 : _PUBLIC_ int tdb_traverse_read(struct tdb_context *tdb,
247 : tdb_traverse_func fn, void *private_data)
248 : {
249 411326 : struct tdb_traverse_lock tl = { NULL, 0, 0, F_RDLCK };
250 1239 : int ret;
251 :
252 411326 : tdb->traverse_read++;
253 1239 : tdb_trace(tdb, "tdb_traverse_read_start");
254 411326 : ret = tdb_traverse_internal(tdb, fn, private_data, &tl);
255 411322 : tdb->traverse_read--;
256 :
257 411322 : return ret;
258 : }
259 :
260 : /*
261 : a write style traverse - needs to get the transaction lock to
262 : prevent deadlocks
263 :
264 : WARNING: The data buffer given to the callback fn does NOT meet the
265 : alignment guarantees malloc gives you.
266 : */
267 5719963 : _PUBLIC_ int tdb_traverse(struct tdb_context *tdb,
268 : tdb_traverse_func fn, void *private_data)
269 : {
270 5719963 : struct tdb_traverse_lock tl = { NULL, 0, 0, F_WRLCK };
271 164574 : enum tdb_lock_flags lock_flags;
272 164574 : int ret;
273 :
274 5719963 : if (tdb->read_only || tdb->traverse_read) {
275 571 : return tdb_traverse_read(tdb, fn, private_data);
276 : }
277 :
278 5719392 : lock_flags = TDB_LOCK_WAIT;
279 :
280 5719392 : if (tdb->allrecord_lock.count != 0) {
281 : /*
282 : * This avoids a deadlock between tdb_lockall() and
283 : * tdb_traverse(). See
284 : * https://bugzilla.samba.org/show_bug.cgi?id=11381
285 : */
286 87904 : lock_flags = TDB_LOCK_NOWAIT;
287 : }
288 :
289 5719392 : if (tdb_transaction_lock(tdb, F_WRLCK, lock_flags)) {
290 2 : return -1;
291 : }
292 :
293 5719390 : tdb->traverse_write++;
294 164523 : tdb_trace(tdb, "tdb_traverse_start");
295 5719390 : ret = tdb_traverse_internal(tdb, fn, private_data, &tl);
296 5719390 : tdb->traverse_write--;
297 :
298 5719390 : tdb_transaction_unlock(tdb, F_WRLCK);
299 :
300 5719390 : return ret;
301 : }
302 :
303 :
304 : /* find the first entry in the database and return its key */
305 34 : _PUBLIC_ TDB_DATA tdb_firstkey(struct tdb_context *tdb)
306 : {
307 22 : TDB_DATA key;
308 22 : struct tdb_record rec;
309 22 : tdb_off_t off;
310 :
311 : /* release any old lock */
312 34 : if (tdb_unlock_record(tdb, tdb->travlocks.off) != 0)
313 0 : return tdb_null;
314 34 : tdb->travlocks.off = tdb->travlocks.list = 0;
315 34 : tdb->travlocks.lock_rw = F_RDLCK;
316 :
317 : /* Grab first record: locks chain and returned record. */
318 34 : off = tdb_next_lock(tdb, &tdb->travlocks, &rec);
319 34 : if (off == 0 || off == TDB_NEXT_LOCK_ERR) {
320 4 : tdb_trace_retrec(tdb, "tdb_firstkey", tdb_null);
321 8 : return tdb_null;
322 : }
323 : /* now read the key */
324 26 : key.dsize = rec.key_len;
325 26 : key.dptr =tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),key.dsize);
326 :
327 18 : tdb_trace_retrec(tdb, "tdb_firstkey", key);
328 :
329 : /* Unlock the hash chain of the record we just read. */
330 26 : if (tdb_unlock(tdb, tdb->travlocks.list, tdb->travlocks.lock_rw) != 0)
331 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_firstkey: error occurred while tdb_unlocking!\n"));
332 26 : return key;
333 : }
334 :
335 : /* find the next entry in the database, returning its key */
336 198 : _PUBLIC_ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
337 : {
338 184 : uint32_t oldlist;
339 198 : TDB_DATA key = tdb_null;
340 184 : struct tdb_record rec;
341 198 : unsigned char *k = NULL;
342 184 : tdb_off_t off;
343 :
344 : /* Is locked key the old key? If so, traverse will be reliable. */
345 198 : if (tdb->travlocks.off) {
346 198 : if (tdb_lock(tdb,tdb->travlocks.list,tdb->travlocks.lock_rw))
347 0 : return tdb_null;
348 198 : if (tdb_rec_read(tdb, tdb->travlocks.off, &rec) == -1
349 198 : || !(k = tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),
350 : rec.key_len))
351 198 : || memcmp(k, oldkey.dptr, oldkey.dsize) != 0) {
352 : /* No, it wasn't: unlock it and start from scratch */
353 0 : if (tdb_unlock_record(tdb, tdb->travlocks.off) != 0) {
354 : tdb_trace_1rec_retrec(tdb, "tdb_nextkey",
355 0 : oldkey, tdb_null);
356 0 : SAFE_FREE(k);
357 0 : return tdb_null;
358 : }
359 0 : if (tdb_unlock(tdb, tdb->travlocks.list, tdb->travlocks.lock_rw) != 0) {
360 0 : SAFE_FREE(k);
361 0 : return tdb_null;
362 : }
363 0 : tdb->travlocks.off = 0;
364 : }
365 :
366 198 : SAFE_FREE(k);
367 : }
368 :
369 198 : if (!tdb->travlocks.off) {
370 : /* No previous element: do normal find, and lock record */
371 0 : tdb->travlocks.off = tdb_find_lock_hash(tdb, oldkey, tdb->hash_fn(&oldkey), tdb->travlocks.lock_rw, &rec);
372 0 : if (!tdb->travlocks.off) {
373 0 : tdb_trace_1rec_retrec(tdb, "tdb_nextkey", oldkey, tdb_null);
374 0 : return tdb_null;
375 : }
376 0 : tdb->travlocks.list = BUCKET(rec.full_hash);
377 0 : if (tdb_lock_record(tdb, tdb->travlocks.off) != 0) {
378 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: lock_record failed (%s)!\n", strerror(errno)));
379 0 : return tdb_null;
380 : }
381 : }
382 198 : oldlist = tdb->travlocks.list;
383 :
384 : /* Grab next record: locks chain and returned record,
385 : unlocks old record */
386 198 : off = tdb_next_lock(tdb, &tdb->travlocks, &rec);
387 198 : if (off != TDB_NEXT_LOCK_ERR && off != 0) {
388 172 : key.dsize = rec.key_len;
389 178 : key.dptr = tdb_alloc_read(tdb, tdb->travlocks.off+sizeof(rec),
390 6 : key.dsize);
391 : /* Unlock the chain of this new record */
392 172 : if (tdb_unlock(tdb, tdb->travlocks.list, tdb->travlocks.lock_rw) != 0)
393 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n"));
394 : }
395 : /* Unlock the chain of old record */
396 198 : if (tdb_unlock(tdb, oldlist, tdb->travlocks.lock_rw) != 0)
397 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n"));
398 184 : tdb_trace_1rec_retrec(tdb, "tdb_nextkey", oldkey, key);
399 198 : return key;
400 : }
401 :
402 228671 : _PUBLIC_ int tdb_traverse_chain(struct tdb_context *tdb,
403 : unsigned chain,
404 : tdb_traverse_func fn,
405 : void *private_data)
406 : {
407 19957 : tdb_off_t rec_ptr;
408 19957 : struct tdb_chainwalk_ctx chainwalk;
409 228671 : int count = 0;
410 19957 : int ret;
411 :
412 228671 : if (chain >= tdb->hash_size) {
413 0 : tdb->ecode = TDB_ERR_EINVAL;
414 0 : return -1;
415 : }
416 :
417 228671 : if (tdb->traverse_read != 0) {
418 0 : tdb->ecode = TDB_ERR_LOCK;
419 0 : return -1;
420 : }
421 :
422 228671 : ret = tdb_lock(tdb, chain, F_RDLCK);
423 228671 : if (ret == -1) {
424 0 : return -1;
425 : }
426 :
427 228671 : tdb->traverse_read += 1;
428 :
429 228671 : ret = tdb_ofs_read(tdb, TDB_HASH_TOP(chain), &rec_ptr);
430 228671 : if (ret == -1) {
431 0 : goto fail;
432 : }
433 :
434 228671 : tdb_chainwalk_init(&chainwalk, rec_ptr);
435 :
436 275145 : while (rec_ptr != 0) {
437 1706 : struct tdb_record rec;
438 1706 : bool ok;
439 :
440 46474 : ret = tdb_rec_read(tdb, rec_ptr, &rec);
441 46474 : if (ret == -1) {
442 0 : goto fail;
443 : }
444 :
445 46474 : if (!TDB_DEAD(&rec)) {
446 : /* no overflow checks, tdb_rec_read checked it */
447 46327 : tdb_off_t key_ofs = rec_ptr + sizeof(rec);
448 46327 : size_t full_len = rec.key_len + rec.data_len;
449 46327 : uint8_t *buf = NULL;
450 :
451 46327 : TDB_DATA key = { .dsize = rec.key_len };
452 46327 : TDB_DATA data = { .dsize = rec.data_len };
453 :
454 46327 : if ((tdb->transaction == NULL) &&
455 46327 : (tdb->map_ptr != NULL)) {
456 46327 : ret = tdb_oob(tdb, key_ofs, full_len, 0);
457 44621 : if (ret == -1) {
458 0 : goto fail;
459 : }
460 46327 : key.dptr = (uint8_t *)tdb->map_ptr + key_ofs;
461 : } else {
462 0 : buf = tdb_alloc_read(tdb, key_ofs, full_len);
463 0 : if (buf == NULL) {
464 0 : goto fail;
465 : }
466 0 : key.dptr = buf;
467 : }
468 46327 : data.dptr = key.dptr + key.dsize;
469 :
470 46327 : ret = fn(tdb, key, data, private_data);
471 46327 : free(buf);
472 :
473 46327 : count += 1;
474 :
475 46327 : if (ret != 0) {
476 0 : break;
477 : }
478 : }
479 :
480 46474 : rec_ptr = rec.next;
481 :
482 46474 : ok = tdb_chainwalk_check(tdb, &chainwalk, rec_ptr);
483 46474 : if (!ok) {
484 0 : goto fail;
485 : }
486 : }
487 228671 : tdb->traverse_read -= 1;
488 228671 : tdb_unlock(tdb, chain, F_RDLCK);
489 228671 : return count;
490 :
491 0 : fail:
492 0 : tdb->traverse_read -= 1;
493 0 : tdb_unlock(tdb, chain, F_RDLCK);
494 0 : return -1;
495 : }
496 :
497 228671 : _PUBLIC_ int tdb_traverse_key_chain(struct tdb_context *tdb,
498 : TDB_DATA key,
499 : tdb_traverse_func fn,
500 : void *private_data)
501 : {
502 19957 : uint32_t hash, chain;
503 19957 : int ret;
504 :
505 228671 : hash = tdb->hash_fn(&key);
506 228671 : chain = BUCKET(hash);
507 228671 : ret = tdb_traverse_chain(tdb, chain, fn, private_data);
508 :
509 228671 : return ret;
510 : }
|