Skip to content

Commit 8222c48

Browse files
author
Jun
committed
[TDB-20] With checkpoint disabled, closing FT file still rotates header.
This is an attempt based on @BohuTANG's patch percona#331 in particular, the ctest is all his credits. I included his commit history in the git log, 1) The issue is real. 2) @bohu's patch works for mediating the issue but somewhat introduces a ft leak that the ft with ref=0 may be left until next open/close or checkpoint. 3) If ft_close should be held off upon a backup, then it simply means a backup should hold a ref to the ft and responsible for closing ft up if it is the last one to hold it, like any other possible referencer. including the checkpoint, the txn and ft handlers. 4) mtr test contributed by @BohuTANG still passes
1 parent 126e294 commit 8222c48

10 files changed

+87
-36
lines changed

ft/cachetable/cachetable-internal.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@ struct cachefile {
161161
void (*end_checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // after checkpointing cachefiles call this function.
162162
void (*note_pin_by_checkpoint)(CACHEFILE cf, void *userdata); // add a reference to the userdata to prevent it from being removed from memory
163163
void (*note_unpin_by_checkpoint)(CACHEFILE cf, void *userdata); // add a reference to the userdata to prevent it from being removed from memory
164+
165+
void (*note_pin_by_backup)(CACHEFILE cf, void *userdata); // add a reference to the userdata to prevent it from being removed from memory
166+
void (*note_unpin_by_backup)(CACHEFILE cf, void *userdata); // add a reference to the userdata to prevent it from being removed from memory
164167
BACKGROUND_JOB_MANAGER bjm;
165168
};
166169

@@ -604,7 +607,7 @@ struct cachetable {
604607
KIBBUTZ client_kibbutz; // pool of worker threads and jobs to do asynchronously for the client.
605608
KIBBUTZ ct_kibbutz; // pool of worker threads and jobs to do asynchronously for the cachetable
606609
KIBBUTZ checkpointing_kibbutz; // small pool for checkpointing cloned pairs
607-
bool in_backup; // we are in back up or NOT, default is false
610+
//bool in_backup; // we are in back up or NOT, default is false
608611

609612
char *env_dir;
610613
};

ft/cachetable/cachetable.cc

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ int toku_cachetable_create_ex(CACHETABLE *ct_result, long size_limit,
269269
CACHETABLE XCALLOC(ct);
270270
ct->list.init();
271271
ct->cf_list.init();
272-
ct->in_backup = false;
272+
//ct->in_backup = false;
273273

274274
int num_processors = toku_os_get_number_active_processors();
275275
int checkpointing_nworkers = (num_processors/4) ? num_processors/4 : 1;
@@ -2784,24 +2784,40 @@ void toku_cachetable_end_checkpoint(CHECKPOINTER cp, TOKULOGGER UU(logger),
27842784
}
27852785

27862786
// in_backup begin
2787+
2788+
struct iterate_note_pin_backup {
2789+
static int fn(const CACHEFILE &cf, uint32_t UU(idx), void **UU(extra)) {
2790+
assert(cf->note_pin_by_backup);
2791+
cf->note_pin_by_backup(cf, cf->userdata);
2792+
return 0;
2793+
}
2794+
};
2795+
2796+
struct iterate_note_unpin_backup {
2797+
static int fn(const CACHEFILE &cf, uint32_t UU(idx), void **UU(extra)) {
2798+
assert(cf->note_unpin_by_backup);
2799+
cf->note_unpin_by_backup(cf, cf->userdata);
2800+
return 0;
2801+
}
2802+
};
27872803
void toku_cachetable_begin_backup(CACHETABLE ct)
27882804
{
27892805
ct->cf_list.read_lock();
2790-
ct->in_backup = true;
2806+
ct->cf_list.m_active_fileid.iterate<void *, iterate_note_pin_backup::fn>(nullptr);
27912807
ct->cf_list.read_unlock();
27922808
}
27932809

27942810
void toku_cachetable_end_backup(CACHETABLE ct)
27952811
{
2796-
ct->cf_list.read_lock();
2797-
ct->in_backup = false;
2798-
ct->cf_list.read_unlock();
2812+
ct->cf_list.m_active_fileid.iterate<void *, iterate_note_unpin_backup::fn>(nullptr);
27992813
}
28002814

2815+
#if 0
28012816
bool toku_cachefile_in_backup(CACHEFILE cf)
28022817
{
28032818
return cf->cachetable->in_backup;
28042819
}
2820+
#endif
28052821
// in_backup end
28062822

28072823
TOKULOGGER toku_cachefile_logger (CACHEFILE cf) {
@@ -2920,7 +2936,9 @@ toku_cachefile_set_userdata (CACHEFILE cf,
29202936
void (*begin_checkpoint_userdata)(LSN, void*),
29212937
void (*end_checkpoint_userdata)(CACHEFILE, int, void*),
29222938
void (*note_pin_by_checkpoint)(CACHEFILE, void*),
2923-
void (*note_unpin_by_checkpoint)(CACHEFILE, void*)) {
2939+
void (*note_unpin_by_checkpoint)(CACHEFILE, void*),
2940+
void (*note_pin_by_backup)(CACHEFILE, void*),
2941+
void (*note_unpin_by_backup)(CACHEFILE, void*)) {
29242942
cf->userdata = userdata;
29252943
cf->log_fassociate_during_checkpoint = log_fassociate_during_checkpoint;
29262944
cf->close_userdata = close_userdata;
@@ -2930,6 +2948,8 @@ toku_cachefile_set_userdata (CACHEFILE cf,
29302948
cf->end_checkpoint_userdata = end_checkpoint_userdata;
29312949
cf->note_pin_by_checkpoint = note_pin_by_checkpoint;
29322950
cf->note_unpin_by_checkpoint = note_unpin_by_checkpoint;
2951+
cf->note_pin_by_backup = note_pin_by_backup;
2952+
cf->note_unpin_by_backup = note_unpin_by_backup;
29332953
}
29342954

29352955
void *toku_cachefile_get_userdata(CACHEFILE cf) {

ft/cachetable/cachetable.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,9 @@ void toku_cachefile_set_userdata(CACHEFILE cf, void *userdata,
289289
void (*begin_checkpoint_userdata)(LSN, void*),
290290
void (*end_checkpoint_userdata)(CACHEFILE, int, void*),
291291
void (*note_pin_by_checkpoint)(CACHEFILE, void*),
292-
void (*note_unpin_by_checkpoint)(CACHEFILE, void*));
292+
void (*note_unpin_by_checkpoint)(CACHEFILE, void*),
293+
void (*note_pin_by_backup)(CACHEFILE, void*),
294+
void (*note_unpin_by_backup)(CACHEFILE, void*));
293295
// Effect: Store some cachefile-specific user data. When the last reference to a cachefile is closed, we call close_userdata().
294296
// Before starting a checkpoint, we call checkpoint_prepare_userdata().
295297
// When the cachefile needs to be checkpointed, we call checkpoint_userdata().

ft/ft-internal.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,9 @@ struct ft {
197197
uint32_t num_txns;
198198
// A checkpoint is running. If true, then keep this header around for checkpoint, like a transaction
199199
bool pinned_by_checkpoint;
200-
200+
// Number of backups that are using this FT. If it is nonzero, keep this header around until backup
201+
// is completd.
202+
uint32_t num_backups;
201203
// is this ft a blackhole? if so, all messages are dropped.
202204
bool blackhole;
203205

ft/ft.cc

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -293,13 +293,7 @@ static void ft_note_pin_by_checkpoint (CACHEFILE UU(cachefile), void *header_v)
293293
FT ft = (FT) header_v;
294294
toku_ft_grab_reflock(ft);
295295
assert(!ft->pinned_by_checkpoint);
296-
297-
// because we add in_backup for cachetable, with this case:
298-
// a cachefile in_backup is true and then do a checkpoint
299-
// this assertion fails since nobody references to the ft
300-
#if 0
301296
assert(toku_ft_needed_unlocked(ft));
302-
#endif
303297
ft->pinned_by_checkpoint = true;
304298
toku_ft_release_reflock(ft);
305299
}
@@ -319,6 +313,35 @@ static void ft_note_unpin_by_checkpoint (CACHEFILE UU(cachefile), void *header_v
319313
toku_ft_remove_reference(ft, false, ZERO_LSN, unpin_by_checkpoint_callback, NULL);
320314
}
321315

316+
317+
// maps to cf->note_pin_by_backup
318+
//Must be protected by ydb lock.
319+
//Is only called by backup begin, which holds it
320+
static void ft_note_pin_by_backup (CACHEFILE UU(cachefile), void *header_v) {
321+
// Note: open_close lock is held by checkpoint begin
322+
FT ft = (FT) header_v;
323+
toku_ft_grab_reflock(ft);
324+
assert(toku_ft_needed_unlocked(ft));
325+
ft->num_backups ++;
326+
toku_ft_release_reflock(ft);
327+
}
328+
329+
// Requires: the reflock is held.
330+
static void unpin_by_backup_callback(FT ft, void *extra) {
331+
invariant(extra == NULL);
332+
invariant(ft->num_backups>0);
333+
ft->num_backups --;
334+
}
335+
336+
// maps to cf->note_unpin_by_backup
337+
//Must be protected by ydb lock.
338+
//Called by end_backup, which grabs ydb lock around note_unpin
339+
static void ft_note_unpin_by_backup (CACHEFILE UU(cachefile), void *header_v) {
340+
FT ft = (FT) header_v;
341+
toku_ft_remove_reference(ft, false, ZERO_LSN, unpin_by_backup_callback, NULL);
342+
}
343+
344+
322345
//
323346
// End of Functions that are callbacks to the cachefile
324347
/////////////////////////////////////////////////////////////////////////
@@ -364,7 +387,9 @@ static void ft_init(FT ft, FT_OPTIONS options, CACHEFILE cf) {
364387
ft_begin_checkpoint,
365388
ft_end_checkpoint,
366389
ft_note_pin_by_checkpoint,
367-
ft_note_unpin_by_checkpoint);
390+
ft_note_unpin_by_checkpoint,
391+
ft_note_pin_by_backup,
392+
ft_note_unpin_by_backup);
368393

369394
ft->blocktable.verify_no_free_blocknums();
370395
}
@@ -461,7 +486,9 @@ int toku_read_ft_and_store_in_cachefile (FT_HANDLE ft_handle, CACHEFILE cf, LSN
461486
ft_begin_checkpoint,
462487
ft_end_checkpoint,
463488
ft_note_pin_by_checkpoint,
464-
ft_note_unpin_by_checkpoint);
489+
ft_note_unpin_by_checkpoint,
490+
ft_note_pin_by_backup,
491+
ft_note_unpin_by_backup);
465492
*header = ft;
466493
return 0;
467494
}
@@ -481,7 +508,7 @@ static int
481508
ft_get_reference_count(FT ft) {
482509
uint32_t pinned_by_checkpoint = ft->pinned_by_checkpoint ? 1 : 0;
483510
int num_handles = toku_list_num_elements_est(&ft->live_ft_handles);
484-
return pinned_by_checkpoint + ft->num_txns + num_handles;
511+
return pinned_by_checkpoint + ft->num_txns + ft-> num_backups + num_handles;
485512
}
486513

487514
// a ft is needed in memory iff its reference count is non-zero
@@ -949,12 +976,8 @@ void toku_ft_remove_reference(
949976
assert(!needed);
950977
}
951978
if (!needed) {
952-
// close header if we are not in backup
953-
// if close header during backup, the FT files would be inconsistency
954-
bool in_backup = toku_cachefile_in_backup(ft->cf);
955-
if (!in_backup) {
956-
toku_ft_evict_from_memory(ft, oplsn_valid, oplsn);
957-
}
979+
// close header
980+
toku_ft_evict_from_memory(ft, oplsn_valid, oplsn);
958981
}
959982

960983
toku_ft_open_close_unlock();

ft/tests/cachetable-pin-checkpoint.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,9 @@ cachetable_test (void) {
376376
&test_begin_checkpoint,
377377
&dummy_end,
378378
&dummy_note_pin,
379-
&dummy_note_unpin
379+
&dummy_note_unpin,
380+
&dummy_note_pin,
381+
&dummy_note_unpin
380382
);
381383

382384
toku_pthread_t time_tid;

ft/tests/cachetable-put-checkpoint.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,9 @@ cachetable_test (void) {
508508
test_begin_checkpoint, // called in begin_checkpoint
509509
&dummy_end,
510510
&dummy_note_pin,
511-
&dummy_note_unpin
511+
&dummy_note_unpin,
512+
&dummy_note_pin,
513+
&dummy_note_unpin
512514
);
513515

514516
toku_pthread_t time_tid;

ft/tests/cachetable-simple-close.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@ static void set_cf_userdata(CACHEFILE f1) {
6060
&dummy_begin,
6161
&dummy_end,
6262
&dummy_note_pin,
63-
&dummy_note_unpin
63+
&dummy_note_unpin,
64+
&dummy_note_pin,
65+
&dummy_note_unpin
6466
);
6567
}
6668

ft/tests/cachetable-test.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,5 +68,7 @@ create_dummy_functions(CACHEFILE cf)
6868
&dummy_begin,
6969
&dummy_end,
7070
&dummy_note_pin,
71+
&dummy_note_unpin,
72+
&dummy_note_pin,
7173
&dummy_note_unpin);
7274
};

ft/tests/ft-in-backup-test.cc

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,6 @@ static void test_in_backup() {
5050
unlink(fname);
5151

5252
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
53-
CHECKPOINTER cp = toku_cachetable_get_checkpointer(ct);
54-
5553
// TEST1 : for normal
5654
r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
5755
assert_zero(r);
@@ -76,7 +74,6 @@ static void test_in_backup() {
7674

7775
// TEST2: in fly without checkpoint test
7876
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
79-
cp = toku_cachetable_get_checkpointer(ct);
8077
r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
8178
assert_zero(r);
8279

@@ -97,7 +94,6 @@ static void test_in_backup() {
9794
// here has a leak for this ft evicts from memroy, but that makes sense
9895
r = toku_close_ft_handle_nolsn(ft, 0);
9996
assert_zero(r);
100-
toku_cachetable_end_backup(ct);
10197
toku_cachetable_close(&ct);
10298

10399
// check the in fly key/value, it shouldn't exist
@@ -107,11 +103,11 @@ static void test_in_backup() {
107103
ft_lookup_and_fail_nodup(ft, (char*)"halou");
108104
r = toku_close_ft_handle_nolsn(ft, 0);
109105
assert_zero(r);
106+
toku_cachetable_end_backup(ct);
110107
toku_cachetable_close(&ct);
111108

112109
// TEST3: in fly with checkpoint test
113-
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
114-
cp = toku_cachetable_get_checkpointer(ct);
110+
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
115111
r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
116112
assert_zero(r);
117113

@@ -132,11 +128,8 @@ static void test_in_backup() {
132128
r = toku_close_ft_handle_nolsn(ft, 0);
133129
assert_zero(r);
134130
toku_cachetable_end_backup(ct);
135-
r = toku_checkpoint(cp, NULL, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT);
136-
assert_zero(r);
137131
toku_cachetable_close(&ct);
138132

139-
// check after checkpoint
140133
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
141134
r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
142135
assert_zero(r);

0 commit comments

Comments
 (0)