Details
-
Bug
-
Resolution: Fixed
-
Major
-
master
Description
During the kv-engine-jepsen-post-commit-259 for patch 96ed3ebe9260eed0e71b0811807e9a2c0efa509d there have been multiple crash dumps generated due to a GSL precondition failure in EPBucket::flushVBucket of ep_bucket.cc:573.
536
|
if (hcs) { |
537
|
Expects(hcs > vbstate.highCompletedSeqno);
|
538
|
vbstate.highCompletedSeqno = *hcs;
|
539
|
}
|
This failure and then crash occurred over ten times on each node we have a cbcollect for all for the same pre-contidation failure.
Full backtrace of the crash bellow for dump file 09788a51-b53d-8ae5-1561e6b7-00d295f4.dmp:
(gdb) bt full
|
#0 0x00007f49f44d7428 in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54
|
resultvar = 0
|
pd = <optimized out>
|
pid = 13534
|
selftid = 13670
|
#1 0x00007f49f44d902a in __GI_abort () at abort.c:89
|
save_stage = 2
|
act = {__sigaction_handler = {sa_handler = 0xfffdd6, sa_sigaction = 0xfffdd6}, sa_mask = {__val = {16777050, 0, 0, 0, 0, 139955611792835, 139955529410704, 139954696982000, 139955608339465, 139955611792704, 139955611792704, 10, 139954596356896, 139955529410704, 139955608340507, 139955611792704}}, sa_flags = -184237792,
|
sa_restorer = 0x809880 <stderr@@GLIBC_2.2.5>}
|
sigs = {__val = {32, 0 <repeats 15 times>}}
|
#2 0x00007f49f5041242 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.26
|
No symbol table info available.
|
#3 0x00000000005587d5 in backtrace_terminate_handler () at /home/couchbase/jenkins/workspace/kv-engine-jepsen-post-commit/kv_engine/utilities/terminate_handler.cc:86
|
No locals.
|
#4 0x00007f49f504ce86 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.26
|
No symbol table info available.
|
#5 0x00007f49f504ced1 in std::terminate() () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.26
|
No symbol table info available.
|
#6 0x00007f49f504d105 in __cxa_throw () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.26
|
No symbol table info available.
|
#7 0x00007f49efa4bee4 in gsl::fail_fast_assert (cond=<optimized out>, message=0x7f49efbe2eb0 "GSL: Precondition failure at /home/couchbase/jenkins/workspace/kv-engine-jepsen-post-commit/kv_engine/engines/ep/src/ep_bucket.cc: 537") at /home/couchbase/jenkins/workspace/kv-engine-jepsen-post-commit/third_party/gsl-lite/include/gsl/gsl-lite.h:473
|
No locals.
|
#8 0x00007f49efa5ef47 in EPBucket::flushVBucket (this=0x7f49e29bc000, vbid=...) at /home/couchbase/jenkins/workspace/kv-engine-jepsen-post-commit/kv_engine/engines/ep/src/ep_bucket.cc:537
|
rlh = {lock_ = 0x7f49e2c614a0, token_ = {type_ = folly::SharedMutexToken::Type::INLINE_SHARED, slot_ = 0}}
|
options = VBStatePersist::VBSTATE_CACHE_UPDATE_ONLY
|
prev = <optimized out>
|
maxSeqno = 40
|
minSeqno = 40
|
mustCheckpointVBState = <optimized out>
|
collectionFlush = {needsPurge = false, mutated = std::unordered_set with 0 elements, manifest = @0x7f49f3aa8b60}
|
flush_end = <optimized out>
|
persistedVbState = <optimized out>
|
inMemoryVbState = {static CurrentVersion = 3, state = vbucket_state_replica, checkpointId = 5, maxDeletedSeqno = {counter = {_M_elems = "\000\000\000\000\000"}}, highSeqno = 40, purgeSeqno = 0, lastSnapStart = 35, lastSnapEnd = 39, maxCas = 1566214782942904320, hlcCasEpochSeqno = 1, mightContainXattrs = false,
|
failovers = <error: Cannot access memory at address 0x7f49f3b92000>, supportsNamespaces = true, replicationTopology = {m_type = nlohmann::detail::value_t::null, m_value = {object = 0x0, array = 0x0, string = 0x0, boolean = false, number_integer = 0, number_unsigned = 0, number_float = 0}}, version = 3, highCompletedSeqno = 0, highPreparedSeqno = 0,
|
onDiskPrepares = 0}
|
hcs = {<boost::optional_detail::tc_optional_base<unsigned long>> = {<boost::optional_detail::optional_tag> = {<No data fields>}, m_initialized = true, m_storage = 36}, <No data fields>}
|
vbstate = {static CurrentVersion = 3, state = vbucket_state_replica, checkpointId = 0, maxDeletedSeqno = {counter = {_M_elems = "\000\000\000\000\000"}}, highSeqno = 39, purgeSeqno = 0, lastSnapStart = 39, lastSnapEnd = 40, maxCas = 1566214781553344512, hlcCasEpochSeqno = 1, mightContainXattrs = false,
|
failovers = <error: Cannot access memory at address 0x7f49f3be97a0>, supportsNamespaces = true, replicationTopology = {m_type = nlohmann::detail::value_t::null, m_value = {object = 0x0, array = 0x0, string = 0x0, boolean = false, number_integer = 0, number_unsigned = 0, number_float = 0}}, version = 3, highCompletedSeqno = 36,
|
highPreparedSeqno = 38, onDiskPrepares = 20}
|
hps = <optimized out>
|
trans_time = <optimized out>
|
toFlush = {items = std::vector of length 2, capacity 2 = {<error reading variable>
|
items = std::vector of length 2, capacity 2 = {<error reading variable items (Cannot access memory at address 0x7f49f3b4e500)>
|
range = @0x7f49bdff8e28: {start = 39, end = 40}
|
rwUnderlying = 0x7f49e29e2000
|
shard = <optimized out>
|
items_flushed = <optimized out>
|
moreAvailable = false
|
flush_start = <optimized out>
|
vb = {vb = <error reading variable: Cannot access memory at address 0x7f49e2c51e28>, lock = {_M_device = 0x7f49e28c2948, _M_owns = true}}
|
#9 0x00007f49efaa9575 in Flusher::flushVB (this=0x7f49e2947cc0) at /home/couchbase/jenkins/workspace/kv-engine-jepsen-post-commit/kv_engine/engines/ep/src/flusher.cc:303
|
vbid = {vbid = 21}
|
#10 0x00007f49efaa9975 in Flusher::step (this=0x7f49e2947cc0, task=0x7f49e29394f0) at /home/couchbase/jenkins/workspace/kv-engine-jepsen-post-commit/kv_engine/engines/ep/src/flusher.cc:210
|
currentState = Flusher::State::Running
|
#11 0x00007f49efaa2c4c in ExecutorThread::run (this=0x7f49e2947400) at /home/couchbase/jenkins/workspace/kv-engine-jepsen-post-commit/kv_engine/engines/ep/src/executorthread.cc:153
|
engine = <optimized out>
|
curTaskDescr = <error: Cannot access memory at address 0x7f49e2905e60>
|
woketime = <optimized out>
|
scheduleOverhead = <optimized out>
|
again = <optimized out>
|
runtime = <optimized out>
|
q = <optimized out>
|
tick = <optimized out>
|
#12 0x00007f49f68529c7 in CouchbaseThread::run (this=0x7f49e2806aa0) at /home/couchbase/jenkins/workspace/kv-engine-jepsen-post-commit/platform/src/cb_pthreads.cc:58
|
No locals.
|
#13 platform_thread_wrap (arg=0x7f49e2806aa0) at /home/couchbase/jenkins/workspace/kv-engine-jepsen-post-commit/platform/src/cb_pthreads.cc:71
|
context = std::unique_ptr<CouchbaseThread> = {get() = 0x7f49e2806aa0}
|
#14 0x00007f49f48736ba in start_thread (arg=0x7f49bdffb700) at pthread_create.c:333
|
__res = <optimized out>
|
pd = 0x7f49bdffb700
|
now = <optimized out>
|
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {139954696992512, 1977156117213450083, 0, 139955309365999, 139954696993216, 139955310720000, -1881590965332897949, -1881471046620180637}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
|
not_first_call = <optimized out>
|
pagesize_m1 = <optimized out>
|
sp = <optimized out>
|
freesize = <optimized out>
|
__PRETTY_FUNCTION__ = "start_thread"
|
#15 0x00007f49f45a941d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
|
No locals.
|
I've also attached are the nodes cbcollects with tcp packet dumps, these will also contain all the crash dumps that occurred. I've also attached a zip of the install directory that was built for the post commit to help with crash dump analysis.
Attachments
Issue Links
- is cloned by
-
MB-35631 [Jepsen] Crash due to uncaught pre-condition failure in EPBucket::flushVBucket
- Closed