Details
-
Improvement
-
Resolution: Fixed
-
Major
-
4.5.0
Description
For background see: https://forums.couchbase.com/t/memcached-crash-every-10-minutes/12241/2
For some unknown reason (best guess - corrupted couchstore file) CouchKVStore::initScanContext() failed to run couchstore_changes_count. This is handled by throwing a runtime_error, however this isn't caught by the DCPBackfill task and hence terminates memcached.
The callers of this function treat a NULL return value as an error (which is handled), so we can use the same method of propagating the failure and simply return NULL (after logging the failure).
Backtrace of failure:
Thread 1 (LWP 6408):
|
#0 0x00007f0d6cc46c37 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
|
resultvar = 0
|
pid = 6372
|
selftid = 6408
|
#1 0x00007f0d6cc4a028 in __GI_abort () at abort.c:89
|
save_stage = 2
|
act = {__sigaction_handler = {sa_handler = 0x152, sa_sigaction = 0x152}, sa_mask = {__val = {4246544, 139690097486600, 139695678043431,
|
139693811302405, 0, 1, 139695635918128, 6860408, 139690097486600, 139686635513728, 139695678072021, 139695203780232, 139695636854573, 1,
|
139695388499024, 139690108686672}}, sa_flags = 10, sa_restorer = 0x7f0d52ff8700}
|
sigs = {__val = {32, 0 <repeats 15 times>}}
|
#2 0x00007f0d6d24b535 in __gnu_cxx::__verbose_terminate_handler() () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.19
|
No symbol table info available.
|
#3 0x0000000000427fcf in backtrace_terminate_handler () at /home/couchbase/jenkins/workspace/watson-unix/memcached/utilities/terminate_handler.cc:63
|
buffer = " /opt/couchbase/bin/../lib/libplatform.so.0.1.0(print_backtrace_to_buffer+0x2f) [0x7f0d6e5d526f]\n /opt/couchbase/bin/memcached() [0x427f9c]\n /usr/lib/x86_64-linux-gnu/libstdc++.so.6() [0x7f0d"...
|
format_str = "*** Fatal error encountered during exception handling ***\nCall stack:\n%s"
|
#4 0x00007f0d6d2496d6 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.19
|
No symbol table info available.
|
#5 0x00007f0d6d249703 in std::terminate() () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.19
|
No symbol table info available.
|
#6 0x00007f0d6d249922 in __cxa_throw () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.19
|
No symbol table info available.
|
#7 0x00007f0d6957174d in CouchKVStore::initScanContext (this=<optimized out>, cb=<error reading variable: Cannot access memory at address 0x7f0b544a2728>,
|
cl=<error reading variable: Cannot access memory at address 0x7f0b544a2768>, vbid=<optimized out>, startSeqno=1, options=ALL_ITEMS,
|
valOptions=VALUES_DECOMPRESSED) at /home/couchbase/jenkins/workspace/watson-unix/ep-engine/src/couch-kvstore/couch-kvstore.cc:1150
|
err = <error reading variable: Cannot access memory at address 0x7f0c22a3aaf0>
|
db = 0x7f0b89de8800
|
errorCode = <optimized out>
|
info = {filename = 0x7f0b540d43b0 <error: Cannot access memory at address 0x7f0b540d43b0>, last_sequence = 204612, doc_count = 22700,
|
deleted_count = 39, space_used = 15592853, file_size = 24137819, header_position = 24137728, purge_seq = 0}
|
count = 0
|
lh = <optimized out>
|
backfillId = <optimized out>
|
#8 0x00007f0d694bfa8c in DCPBackfill::create (this=this@entry=0x7f0c3c68eae0)
|
at /home/couchbase/jenkins/workspace/watson-unix/ep-engine/src/dcp/backfill.cc:186
|
valFilter = VALUES_DECOMPRESSED
|
lastPersistedSeqno = <optimized out>
|
kvstore = 0x7f0d5d330000
|
cb = <error reading variable: Cannot access memory at address 0x7f0b544a2728>
|
cl = <error reading variable: Cannot access memory at address 0x7f0b544a2768>
|
#9 0x00007f0d694bfe84 in DCPBackfill::run (this=this@entry=0x7f0c3c68eae0) at /home/couchbase/jenkins/workspace/watson-unix/ep-engine/src/dcp/backfill.cc:130
|
lh = {mutex = @0x7f0c3c68eb10, locked = true}
|
#10 0x00007f0d694baf01 in BackfillManager::backfill (this=0x7f0d6bfb7020)
|
at /home/couchbase/jenkins/workspace/watson-unix/ep-engine/src/dcp/backfill-manager.cc:247
|
lh = {mutex = @0x7f0d6bfb7028, locked = false}
|
backfill = 0x7f0c3c68eae0
|
status = <optimized out>
|
#11 0x00007f0d694bb1ed in BackfillManagerTask::run (this=0x7f0c16e441e0)
|
at /home/couchbase/jenkins/workspace/watson-unix/ep-engine/src/dcp/backfill-manager.cc:43
|
status = <optimized out>
|
#12 0x00007f0d6951f310 in ExecutorThread::run (this=0x7f0d58a0d7e0) at /home/couchbase/jenkins/workspace/watson-unix/ep-engine/src/executorthread.cc:115
|
startReltime = 750
|
again = <optimized out>
|
runtime = <optimized out>
|
q = 0x7f0d5e020fc0
|
tick = 46 '.'
|
#13 0x00007f0d6e5d284a in run (this=0x7f0d56816b20) at /home/couchbase/jenkins/workspace/watson-unix/platform/src/cb_pthreads.cc:54
|
No locals.
|
#14 platform_thread_wrap (arg=0x7f0d56816b20) at /home/couchbase/jenkins/workspace/watson-unix/platform/src/cb_pthreads.cc:66
|
context = std::unique_ptr<CouchbaseThread> containing 0x7f0d56816b20
|
#15 0x00007f0d6e3b7184 in start_thread (arg=0x7f0d52ff8700) at pthread_create.c:312
|
__res = <optimized out>
|
pd = 0x7f0d52ff8700
|
now = <optimized out>
|
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {139695203780352, 6367368655378585353, 0, 0, 139695203781056, 139695203780352, -6464963510805981431,
|
-6465009106333329655}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
|
not_first_call = <optimized out>
|
pagesize_m1 = <optimized out>
|
sp = <optimized out>
|
freesize = <optimized out>
|
__PRETTY_FUNCTION__ = "start_thread"
|
#16 0x00007f0d6cd0a37d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
|