Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 :
4 : trivial database library
5 :
6 : Copyright (C) Andrew Tridgell 1999-2005
7 : Copyright (C) Paul `Rusty' Russell 2000
8 : Copyright (C) Jeremy Allison 2000-2003
9 :
10 : ** NOTE! The following LGPL license applies to the tdb
11 : ** library. This does NOT imply that all of Samba is released
12 : ** under the LGPL
13 :
14 : This library is free software; you can redistribute it and/or
15 : modify it under the terms of the GNU Lesser General Public
16 : License as published by the Free Software Foundation; either
17 : version 3 of the License, or (at your option) any later version.
18 :
19 : This library is distributed in the hope that it will be useful,
20 : but WITHOUT ANY WARRANTY; without even the implied warranty of
21 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 : Lesser General Public License for more details.
23 :
24 : You should have received a copy of the GNU Lesser General Public
25 : License along with this library; if not, see <http://www.gnu.org/licenses/>.
26 : */
27 :
28 : #include "tdb_private.h"
29 :
30 : /* read a freelist record and check for simple errors */
31 491589683 : int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off, struct tdb_record *rec)
32 : {
33 491589683 : if (tdb->methods->tdb_read(tdb, off, rec, sizeof(*rec),DOCONV()) == -1)
34 0 : return -1;
35 :
36 491589683 : if (rec->magic == TDB_MAGIC) {
37 : /* this happens when a app is showdown while deleting a record - we should
38 : not completely fail when this happens */
39 0 : TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read non-free magic 0x%x at offset=%u - fixing\n",
40 : rec->magic, off));
41 0 : rec->magic = TDB_FREE_MAGIC;
42 0 : if (tdb_rec_write(tdb, off, rec) == -1)
43 0 : return -1;
44 : }
45 :
46 491589683 : if (rec->magic != TDB_FREE_MAGIC) {
47 : /* Ensure ecode is set for log fn. */
48 0 : tdb->ecode = TDB_ERR_CORRUPT;
49 0 : TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read bad magic 0x%x at offset=%u\n",
50 : rec->magic, off));
51 0 : return -1;
52 : }
53 491589683 : if (tdb_oob(tdb, rec->next, sizeof(*rec), 0) != 0)
54 0 : return -1;
55 446659078 : return 0;
56 : }
57 :
58 : /* update a record tailer (must hold allocation lock) */
59 144436393 : static int update_tailer(struct tdb_context *tdb, tdb_off_t offset,
60 : const struct tdb_record *rec)
61 : {
62 7070980 : tdb_off_t totalsize;
63 :
64 : /* Offset of tailer from record header */
65 144436393 : totalsize = sizeof(*rec) + rec->rec_len;
66 144436393 : return tdb_ofs_write(tdb, offset + totalsize - sizeof(tdb_off_t),
67 : &totalsize);
68 : }
69 :
70 : /**
71 : * Read the record directly on the left.
72 : * Fail if there is no record on the left.
73 : */
74 426517540 : static int read_record_on_left(struct tdb_context *tdb, tdb_off_t rec_ptr,
75 : tdb_off_t *left_p,
76 : struct tdb_record *left_r)
77 : {
78 41796485 : tdb_off_t left_ptr;
79 41796485 : tdb_off_t left_size;
80 41796485 : struct tdb_record left_rec;
81 41796485 : int ret;
82 :
83 426517540 : left_ptr = rec_ptr - sizeof(tdb_off_t);
84 :
85 426517540 : if (left_ptr <= TDB_DATA_START(tdb->hash_size)) {
86 : /* no record on the left */
87 58888522 : return -1;
88 : }
89 :
90 : /* Read in tailer and jump back to header */
91 365431658 : ret = tdb_ofs_read(tdb, left_ptr, &left_size);
92 365431658 : if (ret == -1) {
93 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL,
94 : "tdb_free: left offset read failed at %u\n", left_ptr));
95 0 : return -1;
96 : }
97 :
98 : /* it could be uninitialised data */
99 365431658 : if (left_size == 0 || left_size == TDB_PAD_U32) {
100 2012636 : return -1;
101 : }
102 :
103 363277668 : if (left_size > rec_ptr) {
104 0 : return -1;
105 : }
106 :
107 363277668 : left_ptr = rec_ptr - left_size;
108 :
109 363277668 : if (left_ptr < TDB_DATA_START(tdb->hash_size)) {
110 0 : return -1;
111 : }
112 :
113 : /* Now read in the left record */
114 402735439 : ret = tdb->methods->tdb_read(tdb, left_ptr, &left_rec,
115 363277668 : sizeof(left_rec), DOCONV());
116 363277668 : if (ret == -1) {
117 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL,
118 : "tdb_free: left read failed at %u (%u)\n",
119 : left_ptr, left_size));
120 0 : return -1;
121 : }
122 :
123 363277668 : *left_p = left_ptr;
124 363277668 : *left_r = left_rec;
125 :
126 363277668 : return 0;
127 : }
128 :
129 : /**
130 : * Merge new freelist record with the direct left neighbour.
131 : * This assumes that left_rec represents the record
132 : * directly to the left of right_rec and that this is
133 : * a freelist record.
134 : */
135 1748881 : static int merge_with_left_record(struct tdb_context *tdb,
136 : tdb_off_t left_ptr,
137 : struct tdb_record *left_rec,
138 : struct tdb_record *right_rec)
139 : {
140 56892 : int ret;
141 :
142 1748881 : left_rec->rec_len += sizeof(*right_rec) + right_rec->rec_len;
143 :
144 1748881 : ret = tdb_rec_write(tdb, left_ptr, left_rec);
145 1748881 : if (ret == -1) {
146 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL,
147 : "merge_with_left_record: update_left failed at %u\n",
148 : left_ptr));
149 0 : return -1;
150 : }
151 :
152 1748881 : ret = update_tailer(tdb, left_ptr, left_rec);
153 1748881 : if (ret == -1) {
154 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL,
155 : "merge_with_left_record: update_tailer failed at %u\n",
156 : left_ptr));
157 0 : return -1;
158 : }
159 :
160 1691989 : return 0;
161 : }
162 :
163 : /**
164 : * Check whether the record left of a given freelist record is
165 : * also a freelist record, and if so, merge the two records.
166 : *
167 : * Return code:
168 : * -1 upon error
169 : * 0 if left was not a free record
170 : * 1 if left was free and successfully merged.
171 : *
172 : * The current record is handed in with pointer and fully read record.
173 : *
174 : * The left record pointer and struct can be retrieved as result
175 : * in lp and lr;
176 : */
177 426517540 : static int check_merge_with_left_record(struct tdb_context *tdb,
178 : tdb_off_t rec_ptr,
179 : struct tdb_record *rec,
180 : tdb_off_t *lp,
181 : struct tdb_record *lr)
182 : {
183 41796485 : tdb_off_t left_ptr;
184 41796485 : struct tdb_record left_rec;
185 41796485 : int ret;
186 :
187 426517540 : ret = read_record_on_left(tdb, rec_ptr, &left_ptr, &left_rec);
188 426517540 : if (ret != 0) {
189 60901158 : return 0;
190 : }
191 :
192 363277668 : if (left_rec.magic != TDB_FREE_MAGIC) {
193 322127908 : return 0;
194 : }
195 :
196 : /* It's free - expand to include it. */
197 1748881 : ret = merge_with_left_record(tdb, left_ptr, &left_rec, rec);
198 1748881 : if (ret != 0) {
199 0 : return -1;
200 : }
201 :
202 1748881 : if (lp != NULL) {
203 454482 : *lp = left_ptr;
204 : }
205 :
206 1748881 : if (lr != NULL) {
207 454482 : *lr = left_rec;
208 : }
209 :
210 1691989 : return 1;
211 : }
212 :
213 : /**
214 : * Check whether the record left of a given freelist record is
215 : * also a freelist record, and if so, merge the two records.
216 : *
217 : * Return code:
218 : * -1 upon error
219 : * 0 if left was not a free record
220 : * 1 if left was free and successfully merged.
221 : *
222 : * In this variant, the input record is specified just as the pointer
223 : * and is read from the database if needed.
224 : *
225 : * next_ptr will contain the original record's next pointer after
226 : * successful merging (which will be lost after merging), so that
227 : * the caller can update the last pointer.
228 : */
229 0 : static int check_merge_ptr_with_left_record(struct tdb_context *tdb,
230 : tdb_off_t rec_ptr,
231 : tdb_off_t *next_ptr)
232 : {
233 0 : tdb_off_t left_ptr;
234 0 : struct tdb_record rec, left_rec;
235 0 : int ret;
236 :
237 0 : ret = read_record_on_left(tdb, rec_ptr, &left_ptr, &left_rec);
238 0 : if (ret != 0) {
239 0 : return 0;
240 : }
241 :
242 0 : if (left_rec.magic != TDB_FREE_MAGIC) {
243 0 : return 0;
244 : }
245 :
246 : /* It's free - expand to include it. */
247 :
248 0 : ret = tdb->methods->tdb_read(tdb, rec_ptr, &rec,
249 0 : sizeof(rec), DOCONV());
250 0 : if (ret != 0) {
251 0 : return -1;
252 : }
253 :
254 0 : ret = merge_with_left_record(tdb, left_ptr, &left_rec, &rec);
255 0 : if (ret != 0) {
256 0 : return -1;
257 : }
258 :
259 0 : if (next_ptr != NULL) {
260 0 : *next_ptr = rec.next;
261 : }
262 :
263 0 : return 1;
264 : }
265 :
266 : /**
267 : * Add an element into the freelist.
268 : *
269 : * We merge the new record into the left record if it is also a
270 : * free record, but not with the right one. This makes the
271 : * operation O(1) instead of O(n): merging with the right record
272 : * requires a traverse of the freelist to find the previous
273 : * record in the free list.
274 : *
275 : * This prevents db traverses from being O(n^2) after a lot of deletes.
276 : */
277 5008924 : int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
278 : {
279 305462 : int ret;
280 :
281 : /* Allocation and tailer lock */
282 5008924 : if (tdb_lock(tdb, -1, F_WRLCK) != 0)
283 0 : return -1;
284 :
285 : /* set an initial tailer, so if we fail we don't leave a bogus record */
286 5008924 : if (update_tailer(tdb, offset, rec) != 0) {
287 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed!\n"));
288 0 : goto fail;
289 : }
290 :
291 5008924 : ret = check_merge_with_left_record(tdb, offset, rec, NULL, NULL);
292 5008924 : if (ret == -1) {
293 0 : goto fail;
294 : }
295 5008924 : if (ret == 1) {
296 : /* merged */
297 1294399 : goto done;
298 : }
299 :
300 : /* Nothing to merge, prepend to free list */
301 :
302 3714525 : rec->magic = TDB_FREE_MAGIC;
303 :
304 7429050 : if (tdb_ofs_read(tdb, FREELIST_TOP, &rec->next) == -1 ||
305 7429050 : tdb_rec_write(tdb, offset, rec) == -1 ||
306 3714525 : tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
307 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free record write failed at offset=%u\n", offset));
308 0 : goto fail;
309 : }
310 :
311 3714525 : done:
312 : /* And we're done. */
313 5008924 : tdb_unlock(tdb, -1, F_WRLCK);
314 5008924 : return 0;
315 :
316 0 : fail:
317 0 : tdb_unlock(tdb, -1, F_WRLCK);
318 0 : return -1;
319 : }
320 :
321 :
322 :
323 : /*
324 : the core of tdb_allocate - called when we have decided which
325 : free list entry to use
326 :
327 : Note that we try to allocate by grabbing data from the end of an existing record,
328 : not the beginning. This is so the left merge in a free is more likely to be
329 : able to free up the record without fragmentation
330 : */
331 69790363 : static tdb_off_t tdb_allocate_ofs(struct tdb_context *tdb,
332 : tdb_len_t length, tdb_off_t rec_ptr,
333 : struct tdb_record *rec, tdb_off_t last_ptr)
334 : {
335 : #define MIN_REC_SIZE (sizeof(struct tdb_record) + sizeof(tdb_off_t) + 8)
336 :
337 69790363 : if (rec->rec_len < length + MIN_REC_SIZE) {
338 : /* we have to grab the whole record */
339 :
340 : /* unlink it from the previous record */
341 951069 : if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1) {
342 0 : return 0;
343 : }
344 :
345 : /* mark it not free */
346 951069 : rec->magic = TDB_MAGIC;
347 951069 : if (tdb_rec_write(tdb, rec_ptr, rec) == -1) {
348 0 : return 0;
349 : }
350 951069 : return rec_ptr;
351 : }
352 :
353 : /* we're going to just shorten the existing record */
354 68839294 : rec->rec_len -= (length + sizeof(*rec));
355 68839294 : if (tdb_rec_write(tdb, rec_ptr, rec) == -1) {
356 0 : return 0;
357 : }
358 68839294 : if (update_tailer(tdb, rec_ptr, rec) == -1) {
359 0 : return 0;
360 : }
361 :
362 : /* and setup the new record */
363 68839294 : rec_ptr += sizeof(*rec) + rec->rec_len;
364 :
365 68839294 : memset(rec, '\0', sizeof(*rec));
366 68839294 : rec->rec_len = length;
367 68839294 : rec->magic = TDB_MAGIC;
368 :
369 68839294 : if (tdb_rec_write(tdb, rec_ptr, rec) == -1) {
370 0 : return 0;
371 : }
372 :
373 68839294 : if (update_tailer(tdb, rec_ptr, rec) == -1) {
374 0 : return 0;
375 : }
376 :
377 65484981 : return rec_ptr;
378 : }
379 :
380 : /* allocate some space from the free list. The offset returned points
381 : to a unconnected tdb_record within the database with room for at
382 : least length bytes of total data
383 :
384 : 0 is returned if the space could not be allocated
385 : */
386 69790364 : static tdb_off_t tdb_allocate_from_freelist(
387 : struct tdb_context *tdb, tdb_len_t length, struct tdb_record *rec)
388 : {
389 3389462 : tdb_off_t rec_ptr, last_ptr, newrec_ptr;
390 3389462 : struct tdb_chainwalk_ctx chainwalk;
391 3389462 : bool modified;
392 3389462 : struct {
393 : tdb_off_t rec_ptr, last_ptr;
394 : tdb_len_t rec_len;
395 : } bestfit;
396 69790364 : float multiplier = 1.0;
397 3389462 : bool merge_created_candidate;
398 :
399 : /* over-allocate to reduce fragmentation */
400 69790364 : length *= 1.25;
401 :
402 : /* Extra bytes required for tailer */
403 69790364 : length += sizeof(tdb_off_t);
404 69790364 : length = TDB_ALIGN(length, TDB_ALIGNMENT);
405 :
406 60543309 : again:
407 71898215 : merge_created_candidate = false;
408 71898215 : last_ptr = FREELIST_TOP;
409 :
410 : /* read in the freelist top */
411 71898215 : if (tdb_ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1)
412 0 : return 0;
413 :
414 71898215 : modified = false;
415 71898215 : tdb_chainwalk_init(&chainwalk, rec_ptr);
416 :
417 71898215 : bestfit.rec_ptr = 0;
418 71898215 : bestfit.last_ptr = 0;
419 71898215 : bestfit.rec_len = 0;
420 :
421 : /*
422 : this is a best fit allocation strategy. Originally we used
423 : a first fit strategy, but it suffered from massive fragmentation
424 : issues when faced with a slowly increasing record size.
425 : */
426 491735107 : while (rec_ptr) {
427 41491023 : int ret;
428 41491023 : tdb_off_t left_ptr;
429 41491023 : struct tdb_record left_rec;
430 :
431 421508616 : if (tdb_rec_free_read(tdb, rec_ptr, rec) == -1) {
432 1 : return 0;
433 : }
434 :
435 421508616 : ret = check_merge_with_left_record(tdb, rec_ptr, rec,
436 : &left_ptr, &left_rec);
437 421508616 : if (ret == -1) {
438 0 : return 0;
439 : }
440 421508616 : if (ret == 1) {
441 : /* merged */
442 454482 : rec_ptr = rec->next;
443 454482 : ret = tdb_ofs_write(tdb, last_ptr, &rec->next);
444 454482 : if (ret == -1) {
445 0 : return 0;
446 : }
447 :
448 : /*
449 : * We have merged the current record into the left
450 : * neighbour. So our traverse of the freelist will
451 : * skip it and consider the next record in the chain.
452 : *
453 : * But the enlarged left neighbour may be a candidate.
454 : * If it is, we can not directly use it, though.
455 : * The only thing we can do and have to do here is to
456 : * update the current best fit size in the chain if the
457 : * current best fit is the left record. (By that we may
458 : * worsen the best fit we already had, bit this is not a
459 : * problem.)
460 : *
461 : * If the current best fit is not the left record,
462 : * all we can do is remember the fact that a merge
463 : * created a new candidate so that we can trigger
464 : * a second walk of the freelist if at the end of
465 : * the first walk we have not found any fit.
466 : * This way we can avoid expanding the database.
467 : */
468 :
469 454482 : if (bestfit.rec_ptr == left_ptr) {
470 17135 : bestfit.rec_len = left_rec.rec_len;
471 : }
472 :
473 454482 : if (left_rec.rec_len > length) {
474 408587 : merge_created_candidate = true;
475 : }
476 :
477 454482 : modified = true;
478 :
479 454482 : continue;
480 : }
481 :
482 421054134 : if (rec->rec_len >= length) {
483 73056275 : if (bestfit.rec_ptr == 0 ||
484 2876391 : rec->rec_len < bestfit.rec_len) {
485 70876133 : bestfit.rec_len = rec->rec_len;
486 70876133 : bestfit.rec_ptr = rec_ptr;
487 70876133 : bestfit.last_ptr = last_ptr;
488 : }
489 : }
490 :
491 : /* move to the next record */
492 421054134 : last_ptr = rec_ptr;
493 421054134 : rec_ptr = rec->next;
494 :
495 421054134 : if (!modified) {
496 34171628 : bool ok;
497 387477481 : ok = tdb_chainwalk_check(tdb, &chainwalk, rec_ptr);
498 387477481 : if (!ok) {
499 1 : return 0;
500 : }
501 : }
502 :
503 : /* if we've found a record that is big enough, then
504 : stop searching if its also not too big. The
505 : definition of 'too big' changes as we scan
506 : through */
507 421054133 : if (bestfit.rec_len > 0 &&
508 154471178 : bestfit.rec_len < length * multiplier) {
509 1517773 : break;
510 : }
511 :
512 : /* this multiplier means we only extremely rarely
513 : search more than 50 or so records. At 50 records we
514 : accept records up to 11 times larger than what we
515 : want */
516 419382410 : multiplier *= 1.05;
517 : }
518 :
519 71898214 : if (bestfit.rec_ptr != 0) {
520 69790363 : if (tdb_rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) {
521 0 : return 0;
522 : }
523 :
524 69790363 : newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr,
525 : rec, bestfit.last_ptr);
526 69790363 : return newrec_ptr;
527 : }
528 :
529 2107851 : if (merge_created_candidate) {
530 461 : goto again;
531 : }
532 :
533 : /* we didn't find enough space. See if we can expand the
534 : database and if we can then try again */
535 2107390 : if (tdb_expand(tdb, length + sizeof(*rec)) == 0)
536 2107390 : goto again;
537 :
538 0 : return 0;
539 : }
540 :
541 1106182 : static bool tdb_alloc_dead(
542 : struct tdb_context *tdb, int hash, tdb_len_t length,
543 : tdb_off_t *rec_ptr, struct tdb_record *rec)
544 : {
545 5332 : tdb_off_t last_ptr;
546 :
547 1106182 : *rec_ptr = tdb_find_dead(tdb, hash, rec, length, &last_ptr);
548 1106182 : if (*rec_ptr == 0) {
549 373400 : return false;
550 : }
551 : /*
552 : * Unlink the record from the hash chain, it's about to be moved into
553 : * another one.
554 : */
555 728484 : return (tdb_ofs_write(tdb, last_ptr, &rec->next) == 0);
556 : }
557 :
558 69790364 : static void tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
559 : {
560 69790364 : int max_dead_records = tdb->max_dead_records;
561 :
562 69790364 : tdb->max_dead_records = 0;
563 :
564 69790364 : tdb_trim_dead(tdb, hash);
565 :
566 69790364 : tdb->max_dead_records = max_dead_records;
567 66400902 : }
568 :
569 : /*
570 : * Chain "hash" is assumed to be locked
571 : */
572 :
573 70518848 : tdb_off_t tdb_allocate(struct tdb_context *tdb, int hash, tdb_len_t length,
574 : struct tdb_record *rec)
575 : {
576 3390496 : tdb_off_t ret;
577 3390496 : uint32_t i;
578 :
579 70518848 : if (tdb->max_dead_records == 0) {
580 : /*
581 : * No dead records to expect anywhere. Do the blocking
582 : * freelist lock without trying to steal from others
583 : */
584 69415391 : goto blocking_freelist_allocate;
585 : }
586 :
587 : /*
588 : * The following loop tries to get the freelist lock nonblocking. If
589 : * it gets the lock, allocate from there. If the freelist is busy,
590 : * instead of waiting we try to steal dead records from other hash
591 : * chains.
592 : *
593 : * Be aware that we do nonblocking locks on the other hash chains as
594 : * well and fail gracefully. This way we avoid deadlocks (we block two
595 : * hash chains, something which is pretty bad normally)
596 : */
597 :
598 1106182 : for (i=0; i<tdb->hash_size; i++) {
599 :
600 5332 : int list;
601 :
602 1106182 : list = BUCKET(hash+i);
603 :
604 1106182 : if (tdb_lock_nonblock(tdb, list, F_WRLCK) == 0) {
605 5332 : bool got_dead;
606 :
607 1106182 : got_dead = tdb_alloc_dead(tdb, list, length, &ret, rec);
608 1106182 : tdb_unlock(tdb, list, F_WRLCK);
609 :
610 1106182 : if (got_dead) {
611 728484 : return ret;
612 : }
613 : }
614 :
615 377698 : if (tdb_lock_nonblock(tdb, -1, F_WRLCK) == 0) {
616 : /*
617 : * Under the freelist lock take the chance to give
618 : * back our dead records.
619 : */
620 374973 : tdb_purge_dead(tdb, hash);
621 :
622 374973 : ret = tdb_allocate_from_freelist(tdb, length, rec);
623 374973 : tdb_unlock(tdb, -1, F_WRLCK);
624 374973 : return ret;
625 : }
626 : }
627 :
628 0 : blocking_freelist_allocate:
629 :
630 69415391 : if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
631 0 : return 0;
632 : }
633 : /*
634 : * Dead records can happen even if max_dead_records==0, they
635 : * are older than the max_dead_records concept: They happen if
636 : * tdb_delete happens concurrently with a traverse.
637 : */
638 69415391 : tdb_purge_dead(tdb, hash);
639 69415391 : ret = tdb_allocate_from_freelist(tdb, length, rec);
640 69415391 : tdb_unlock(tdb, -1, F_WRLCK);
641 69415391 : return ret;
642 : }
643 :
644 : /**
645 : * Merge adjacent records in the freelist.
646 : */
647 4 : static int tdb_freelist_merge_adjacent(struct tdb_context *tdb,
648 : int *count_records, int *count_merged)
649 : {
650 2 : tdb_off_t cur, next;
651 4 : int count = 0;
652 4 : int merged = 0;
653 2 : int ret;
654 :
655 4 : ret = tdb_lock(tdb, -1, F_RDLCK);
656 4 : if (ret == -1) {
657 0 : return -1;
658 : }
659 :
660 2 : cur = FREELIST_TOP;
661 4 : while (tdb_ofs_read(tdb, cur, &next) == 0 && next != 0) {
662 0 : tdb_off_t next2;
663 :
664 0 : count++;
665 :
666 0 : ret = check_merge_ptr_with_left_record(tdb, next, &next2);
667 0 : if (ret == -1) {
668 0 : goto done;
669 : }
670 0 : if (ret == 1) {
671 : /*
672 : * merged:
673 : * now let cur->next point to next2 instead of next
674 : */
675 :
676 0 : ret = tdb_ofs_write(tdb, cur, &next2);
677 0 : if (ret != 0) {
678 0 : goto done;
679 : }
680 :
681 0 : next = next2;
682 0 : merged++;
683 : }
684 :
685 0 : cur = next;
686 : }
687 :
688 4 : if (count_records != NULL) {
689 4 : *count_records = count;
690 : }
691 :
692 4 : if (count_merged != NULL) {
693 0 : *count_merged = merged;
694 : }
695 :
696 2 : ret = 0;
697 :
698 4 : done:
699 4 : tdb_unlock(tdb, -1, F_RDLCK);
700 4 : return ret;
701 : }
702 :
703 : /**
704 : * return the size of the freelist - no merging done
705 : */
706 0 : static int tdb_freelist_size_no_merge(struct tdb_context *tdb)
707 : {
708 0 : tdb_off_t ptr;
709 0 : int count=0;
710 :
711 0 : if (tdb_lock(tdb, -1, F_RDLCK) == -1) {
712 0 : return -1;
713 : }
714 :
715 0 : ptr = FREELIST_TOP;
716 0 : while (tdb_ofs_read(tdb, ptr, &ptr) == 0 && ptr != 0) {
717 0 : count++;
718 : }
719 :
720 0 : tdb_unlock(tdb, -1, F_RDLCK);
721 0 : return count;
722 : }
723 :
724 : /**
725 : * return the size of the freelist - used to decide if we should repack
726 : *
727 : * As a side effect, adjacent records are merged unless the
728 : * database is read-only, in order to reduce the fragmentation
729 : * without repacking.
730 : */
731 4 : _PUBLIC_ int tdb_freelist_size(struct tdb_context *tdb)
732 : {
733 :
734 4 : int count = 0;
735 :
736 4 : if (tdb->read_only) {
737 0 : count = tdb_freelist_size_no_merge(tdb);
738 : } else {
739 2 : int ret;
740 4 : ret = tdb_freelist_merge_adjacent(tdb, &count, NULL);
741 4 : if (ret != 0) {
742 0 : return -1;
743 : }
744 : }
745 :
746 4 : return count;
747 : }
|