Details
-
Bug
-
Resolution: Unresolved
-
Critical
-
Elixir
-
7.5.0-3444
Description
Steps to repro:
- Created a 4 node cluster (replicas =1)
- Loaded 5 million items(doc size = 1024)
- Started New doc ops (create:expiry).
- During doc ops keep killing memcached (sigkill, with a sleep of 60 to 90 seconds between two kills)
- Observed Memcached crashed in cb::SemaphoreGuard<cb::Semaphore*>::reset (this=0x7f8214670820)
Core found on node 172.23.108.197
BackTrace:
(gdb) bt full
|
#0 0x0000000000beeb93 in cb::SemaphoreGuard<cb::Semaphore*>::reset (this=0x7f8214670820)
|
at /home/couchbase/jenkins/workspace/couchbase-server-unix/platform/src/semaphore_guard.cc:93
|
93 /home/couchbase/jenkins/workspace/couchbase-server-unix/platform/src/semaphore_guard.cc: No such file or directory.
|
[Current thread is 1 (LWP 4577)]
|
(gdb) bt full
|
#0 0x0000000000beeb93 in cb::SemaphoreGuard<cb::Semaphore*>::reset (this=0x7f8214670820)
|
at /home/couchbase/jenkins/workspace/couchbase-server-unix/platform/src/semaphore_guard.cc:93
|
No locals.
|
#1 0x0000000000beec19 in cb::SemaphoreGuard<cb::Semaphore*>::~SemaphoreGuard (this=<optimized out>, __in_chrg=<optimized out>)
|
at /home/couchbase/jenkins/workspace/couchbase-server-unix/platform/src/semaphore_guard.cc:78
|
No locals.
|
#2 0x000000000080c1b5 in ItemAccessVisitor::~ItemAccessVisitor (this=0x7f8214670700, __in_chrg=<optimized out>)
|
at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/access_scanner.cc:70
|
No locals.
|
#3 ItemAccessVisitor::~ItemAccessVisitor (this=0x7f8214670700, __in_chrg=<optimized out>)
|
at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/access_scanner.cc:71
|
No locals.
|
#4 0x0000000000775af5 in std::default_delete<InterruptableVBucketVisitor>::operator() (__ptr=<optimized out>, this=0x7f81e96095f0)
|
at /opt/gcc-10.2.0/include/c++/10.2.0/bits/stl_deque.h:675
|
No locals.
|
#5 std::unique_ptr<InterruptableVBucketVisitor, std::default_delete<InterruptableVBucketVisitor> >::~unique_ptr (this=0x7f81e96095f0,
|
__in_chrg=<optimized out>) at /opt/gcc-10.2.0/include/c++/10.2.0/bits/unique_ptr.h:361
|
__ptr = <error reading variable>
|
#6 VBCBAdaptor::~VBCBAdaptor (this=0x7f81e9609590, __in_chrg=<optimized out>)
|
at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/vb_adapters.h:35
|
No locals.
|
#7 0x0000000000aed3e3 in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release (this=0x7f81e9609580)
|
at /opt/gcc-10.2.0/include/c++/10.2.0/ext/atomicity.h:70
|
No locals.
|
#8 std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release (this=0x7f81e9609580)
|
at /opt/gcc-10.2.0/include/c++/10.2.0/bits/shared_ptr_base.h:151
|
No locals.
|
#9 std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count (this=<optimized out>, __in_chrg=<optimized out>)
|
at /opt/gcc-10.2.0/include/c++/10.2.0/bits/shared_ptr_base.h:733
|
No locals.
|
#10 std::__shared_ptr<GlobalTask, (__gnu_cxx::_Lock_policy)2>::~__shared_ptr (this=<optimized out>, __in_chrg=<optimized out>)
|
at /opt/gcc-10.2.0/include/c++/10.2.0/bits/shared_ptr_base.h:1183
|
No locals.
|
#11 std::__shared_ptr<GlobalTask, (__gnu_cxx::_Lock_policy)2>::reset (this=0x7f8215ff2160)
|
at /opt/gcc-10.2.0/include/c++/10.2.0/bits/shared_ptr_base.h:1301
|
No locals.
|
#12 FollyExecutorPool::TaskProxy::resetTaskPtr(std::atomic<int>&, bool)::{lambda()#2}::operator()() (__closure=0x7f8215ff2160)
|
at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/executor/folly_executorpool.cc:215
|
guard = {previous = 0x0}
|
resetOnScheduler = true
|
--Type <RET> for more, q to quit, c to continue without paging--
|
pendingResets = <error reading variable>
|
ptrToReset = {<std::__shared_ptr<GlobalTask, (__gnu_cxx::_Lock_policy)2>> = {<std::__shared_ptr_access<GlobalTask, (__gnu_cxx::_Lock_policy)2, false, false>> = {<No data fields>}, _M_ptr = 0x0, _M_refcount = {_M_pi = 0x0}}, <No data fields>}
|
resetOnScheduler = <optimized out>
|
pendingResets = <optimized out>
|
ptrToReset = <optimized out>
|
_logger_ = <optimized out>
|
guard = <optimized out>
|
#13 FollyExecutorPool::TaskProxy::resetTaskPtr (resetOnScheduler=<optimized out>, pendingResets=..., this=<optimized out>)
|
at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/executor/folly_executorpool.cc:221
|
resetLambda = {
|
__ptrToReset = {<std::__shared_ptr<GlobalTask, (__gnu_cxx::_Lock_policy)2>> = {<std::__shared_ptr_access<GlobalTask, (__gnu_cxx::_Lock_policy)2, false, false>> = {<No data fields>}, _M_ptr = 0x0, _M_refcount = {_M_pi = 0x0}}, <No data fields>}, __pendingResets = @0x7f81e8572b90,
|
__resetOnScheduler = true}
|
resetLambda = <optimized out>
|
_logger_ = <optimized out>
|
#14 FollyExecutorPool::State::cancelTask (this=<optimized out>, taskId=<optimized out>, force=<optimized out>)
|
at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/executor/folly_executorpool.cc:585
|
owner = <error reading variable>
|
__for_range = <optimized out>
|
__for_begin = <optimized out>
|
__for_end = <optimized out>
|
tasks = <error reading variable>
|
it = <optimized out>
|
#15 0x0000000000c7475b in folly::detail::function::FunctionTraits<void ()>::operator()() (this=0x7f8215ff2230)
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/io/async/EventBase.cpp:124
|
fn = @0x7f8215ff2230: {<folly::detail::function::FunctionTraits<void()>> = {<No data fields>}, data_ = {big = 0x7f7f2b8dec10, tiny = {
|
__data = "\020\354\215+\177\177\000\000\000\000\000\000\000\000\000\000p\"\377\025\202\177\000\000\000\352\350\247\202\177\000\000\000\036\377\247\202\177\000\000x\"\377\025\202\177\000", __align = {<No data fields>}}},
|
call_ = 0xaeddf0 <folly::detail::function::FunctionTraits<void ()>::callSmall<FollyExecutorPool::TaskProxy::scheduleViaCPUPool()::{lambda()#2}::operator()() const::{lambda()#2}>(folly::detail::function::Data&)>
|
|
, exec_ = 0x5c4290
|
<folly::detail::function::DispatchSmallTrivial::exec_<16ul>(folly::detail::function::Op, folly::detail::function::Data*, folly::detail::function::Data*)>}
|
#16 folly::EventBase::FuncRunner::operator()(folly::Function<void ()>) (func=..., this=<optimized out>)
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/io/async/EventBase.cpp:124
|
No locals.
|
#17 folly::detail::invokeConsumerWithTask<folly::Function<void ()>, folly::EventBase::FuncRunner&, void, void, void>(folly::EventBase::FuncRunner&, folly::Function<void ()>&&, std::shared_ptr<folly::RequestContext>&&) (consumer=..., rctx=..., task=...)
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/io/async/AtomicNotificationQueue-inl.h:282
|
--Type <RET> for more, q to quit, c to continue without paging--
|
rcsg = {
|
prev_ = {<std::__shared_ptr<folly::RequestContext, (__gnu_cxx::_Lock_policy)2>> = {<std::__shared_ptr_access<folly::RequestContext, (__gnu_cxx::_Lock_policy)2, false, false>> = {<No data fields>}, _M_ptr = 0x0, _M_refcount = {_M_pi = 0x0}}, <No data fields>}}
|
rcsg = <optimized out>
|
#18 folly::AtomicNotificationQueue<folly::Function<void ()> >::drive<folly::EventBase::FuncRunner&>(folly::EventBase::FuncRunner&) (
|
this=0x7f82a7ff1f00, consumer=...)
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/io/async/AtomicNotificationQueue-inl.h:340
|
curNode = <error reading variable>
|
consumeTaskStatus = <optimized out>
|
numConsumed = 0
|
nextQueue = {head_ = 0x0, size_ = 0}
|
wasAnyProcessed = <optimized out>
|
#19 0x0000000000c7bf1d in folly::EventBaseAtomicNotificationQueue<folly::Function<void ()>, folly::EventBase::FuncRunner>::drive<folly::EventBase::FuncRunner&>(folly::EventBase::FuncRunner&) (consumer=..., this=0x7f82a7ff1e00)
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/io/async/EventBaseAtomicNotificationQueue-inl.h:266
|
wasEmpty = <optimized out>
|
wasEmpty = <optimized out>
|
#20 folly::EventBaseAtomicNotificationQueue<folly::Function<void ()>, folly::EventBase::FuncRunner>::execute() (this=0x7f82a7ff1e00)
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/io/async/EventBaseAtomicNotificationQueue-inl.h:270
|
No locals.
|
#21 0x0000000000c7bfad in non-virtual thunk to folly::EventBaseAtomicNotificationQueue<folly::Function<void ()>, folly::EventBase::FuncRunner>::handlerReady(unsigned short) ()
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/io/async/EventBaseAtomicNotificationQueue-inl.h:260
|
No symbol table info available.
|
#22 0x00007f82aaa9bfaa in event_persist_closure (ev=<optimized out>, base=0x7f82a7e8ea00)
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/libevent/libevent-prefix/src/libevent/event.c:1629
|
evcb_callback = 0xc80d40 <folly::EventHandler::libeventCallback(int, short, void*)>
|
evcb_fd = 58
|
evcb_res = 2
|
evcb_arg = 0x7f82a7ff1e28
|
evcb_callback = <optimized out>
|
evcb_fd = <optimized out>
|
evcb_res = <optimized out>
|
evcb_arg = <optimized out>
|
run_at = <optimized out>
|
relative_to = <optimized out>
|
--Type <RET> for more, q to quit, c to continue without paging--
|
delay = <optimized out>
|
now = <optimized out>
|
usec_mask = <optimized out>
|
#23 event_process_active_single_queue (base=base@entry=0x7f82a7e8ea00, max_to_process=max_to_process@entry=2147483647, endtime=endtime@entry=0x0,
|
activeq=<optimized out>)
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/libevent/libevent-prefix/src/libevent/event.c:1688
|
ev = <optimized out>
|
evcb = <optimized out>
|
count = 1
|
#24 0x00007f82aaa9c54f in event_process_active (base=0x7f82a7e8ea00)
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/libevent/libevent-prefix/src/libevent/event.c:1789
|
activeq = <optimized out>
|
i = 0
|
c = 0
|
tv = {tv_sec = 1665151, tv_usec = 969389}
|
maxcb = 2147483647
|
endtime = 0x0
|
limit_after_prio = 2147483647
|
activeq = <optimized out>
|
i = <optimized out>
|
c = <optimized out>
|
endtime = <optimized out>
|
tv = <optimized out>
|
maxcb = <optimized out>
|
limit_after_prio = <optimized out>
|
#25 event_base_loop (base=0x7f82a7e8ea00, flags=flags@entry=1)
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/libevent/libevent-prefix/src/libevent/event.c:2012
|
n = <optimized out>
|
evsel = 0x7f82aacafc20 <epollops>
|
tv = {tv_sec = 0, tv_usec = 278000}
|
tv_p = <optimized out>
|
res = <optimized out>
|
done = 0
|
retval = 0
|
__func__ = "event_base_loop"
|
#26 0x0000000000c756f6 in (anonymous namespace)::EventBaseBackend::eb_event_base_loop (flags=1, this=<optimized out>)
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/io/async/EventBase.cpp:74
|
No locals.
|
#27 folly::EventBase::loopBody (this=0x7f829c071000, flags=<optimized out>, ignoreKeepAlive=<optimized out>)
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/io/async/EventBase.cpp:381
|
--Type <RET> for more, q to quit, c to continue without paging--
|
callbacks = {<boost::intrusive::list_impl<boost::intrusive::bhtraits<folly::EventBase::LoopCallback, boost::intrusive::list_node_traits<void*>, (boost::intrusive::link_mode_type)2, boost::intrusive::dft_tag, 1>, unsigned long, false, void>> = {static constant_time_size = false,
|
|
static stateful_value_traits = <optimized out>, static has_container_from_iterator = <optimized out>,
|
static safemode_or_autounlink = true,
|
data_ = {<boost::intrusive::bhtraits<folly::EventBase::LoopCallback, boost::intrusive::list_node_traits<void*>, (boost::intrusive::link_mode_type)2, boost::intrusive::dft_tag, 1>> = {<boost::intrusive::bhtraits_base<folly::EventBase::LoopCallback, boost::intrusive::list_node<void*>*, boost::intrusive::dft_tag, 1>> = {<No data fields>}, static link_mode = boost::intrusive::auto_unlink},
|
root_plus_size_ = {<boost::intrusive::detail::size_holder<false, unsigned long, void>> = {static constant_time_size = <optimized out>},
|
m_header = {<boost::intrusive::list_node<void*>> = {next_ = 0x7f8215ff2430,
|
prev_ = 0x7f8215ff2430}, <No data fields>}}}}, <No data fields>}
|
message = 0xd70b00 "Your code just tried to loop over an event base from inside another event base loop. Since libevent is not reentrant, this leads to undefined behavior in opt builds. Please fix immediately. For the co"...
|
SCOPE_EXIT_STATE8 = {<folly::detail::ScopeGuardImplBase> = {dismissed_ = false}, function_ = {__this = 0x7f829c071000}}
|
res = 0
|
ranLoopCallbacks = <optimized out>
|
blocking = <optimized out>
|
once = <optimized out>
|
prev = {__d = {__r = 1664975636741483}}
|
idleStart = {__d = {__r = 1665151973294012}}
|
busy = <optimized out>
|
idle = <optimized out>
|
prevLoopThread = {_M_thread = 0}
|
#28 0x0000000000c75bfe in folly::EventBase::loop (this=0x7f829c071000)
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/io/async/EventBase.cpp:305
|
guard = {list_ = {prev = 0x7f8215ff2530, curr = {forbid = true, allowTerminationOnBlocking = false, ex = 0x7f829c071010, tag = {
|
static npos = <optimized out>, b_ = 0x7f82a7f2f448 <error: Cannot access memory at address 0x7f82a7f2f448>,
|
e_ = 0x7f82a7f2f455 <error: Cannot access memory at address 0x7f82a7f2f455>}}}}
|
#29 0x0000000000c781a8 in folly::EventBase::loopForever (this=0x7f829c071000)
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/io/async/EventBase.cpp:524
|
SCOPE_EXIT_STATE9 = <optimized out>
|
SCOPE_EXIT_STATE10 = <optimized out>
|
ret = <optimized out>
|
#30 0x0000000000c34779 in folly::IOThreadPoolExecutor::threadRun (this=0x7f82a7f2f400, thread=...)
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/executors/IOThreadPoolExecutor.cpp:250
|
guard = {list_ = {prev = 0x12ab9a0 <folly::emptyList>, curr = {forbid = true, allowTerminationOnBlocking = false, ex = 0x7f82a7f2f400, tag = {
|
static npos = <optimized out>, b_ = 0x7f82a7f2f448 <error: Cannot access memory at address 0x7f82a7f2f448>,
|
e_ = 0x7f82a7f2f455 <error: Cannot access memory at address 0x7f82a7f2f455>}}}}
|
ioThread = {<std::__shared_ptr<folly::IOThreadPoolExecutor::IOThread, (__gnu_cxx::_Lock_policy)2>> = {<std::__shared_ptr_access<folly::IOThreadPoolExecutor::IOThread, (__gnu_cxx::_Lock_policy)2, false, false>> = {<No data fields>}, _M_ptr = 0x7f82a7e8fa10, _M_refcount = {
|
--Type <RET> for more, q to quit, c to continue without paging--
|
_M_pi = 0x7f82a7e8fa00}}, <No data fields>}
|
tid = 4577
|
SCOPE_EXIT_STATE6 = {<folly::detail::ScopeGuardImplBase> = {dismissed_ = false}, function_ = {__this = 0x7f82a7f2f400,
|
double type_info node = <synthetic pointer><error reading variable>}}
|
idler = {_M_t = {<std::__uniq_ptr_impl<folly::MemoryIdlerTimeout, std::default_delete<folly::MemoryIdlerTimeout> >> = {
|
_M_t = {<std::_Tuple_impl<0, folly::MemoryIdlerTimeout*, std::default_delete<folly::MemoryIdlerTimeout> >> = {<std::_Tuple_impl<1, std::default_delete<folly::MemoryIdlerTimeout> >> = {<std::_Head_base<1, std::default_delete<folly::MemoryIdlerTimeout>, true>> = {<std::default_delete<folly::MemoryIdlerTimeout>> = {<No data fields>}, <No data fields>}, <No data fields>}, <std::_Head_base<0, folly::MemoryIdlerTimeout*, false>> = {
|
_M_head_impl = 0x7f82a7e887c0}, <No data fields>}, <No data fields>}}, <No data fields>}}
|
guard = <optimized out>
|
#31 0x0000000000c3ffaa in std::__invoke_impl<void, void (folly::ThreadPoolExecutor::*&)(std::shared_ptr<folly::ThreadPoolExecutor::Thread>), folly::ThreadPoolExecutor*&, std::shared_ptr<folly::ThreadPoolExecutor::Thread>&> (__f=<optimized out>, __t=<optimized out>, __f=<optimized out>,
|
__t=<optimized out>) at /opt/gcc-10.2.0/include/c++/10.2.0/ext/atomicity.h:100
|
No locals.
|
#32 std::__invoke<void (folly::ThreadPoolExecutor::*&)(std::shared_ptr<folly::ThreadPoolExecutor::Thread>), folly::ThreadPoolExecutor*&, std::shared_ptr<folly::ThreadPoolExecutor::Thread>&> (__fn=<optimized out>) at /opt/gcc-10.2.0/include/c++/10.2.0/bits/invoke.h:95
|
No locals.
|
#33 std::_Bind<void (folly::ThreadPoolExecutor::*(folly::ThreadPoolExecutor*, std::shared_ptr<folly::ThreadPoolExecutor::Thread>))(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)>::__call<void, , 0ul, 1ul>(std::tuple<>&&, std::_Index_tuple<0ul, 1ul>) (__args=..., this=<optimized out>)
|
at /opt/gcc-10.2.0/include/c++/10.2.0/functional:416
|
No locals.
|
#34 std::_Bind<void (folly::ThreadPoolExecutor::*(folly::ThreadPoolExecutor*, std::shared_ptr<folly::ThreadPoolExecutor::Thread>))(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)>::operator()<, void>() (this=<optimized out>) at /opt/gcc-10.2.0/include/c++/10.2.0/functional:499
|
No locals.
|
#35 folly::detail::function::FunctionTraits<void ()>::callSmall<std::_Bind<void (folly::ThreadPoolExecutor::*(folly::ThreadPoolExecutor*, std::shared_ptr<folly::ThreadPoolExecutor::Thread>))(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)> >(folly::detail::function::Data&) (p=...)
|
at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/Function.h:363
|
fn = <optimized out>
|
#36 0x00007f82a9c1fd40 in std::execute_native_thread_routine (__p=0x7f82a8051ec0)
|
at /tmp/deploy/objdir/../gcc-10.2.0/libstdc++-v3/src/c++11/thread.cc:80
|
__t = <optimized out>
|
#37 0x00007f82aa423fa3 in start_thread (arg=<optimized out>) at pthread_create.c:486
|
ret = <optimized out>
|
pd = <optimized out>
|
now = <optimized out>
|
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140196691556096, -9059238594222426742, 140734146855518, 140734146855519, 140196691556096,
|
18244768, 9026116907888866698, 9025859046867924362}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0,
|
cleanup = 0x0, canceltype = 0}}}
|
not_first_call = <optimized out>
|
#38 0x00007f82a96edeff in __init_misc (argc=<optimized out>, argv=0x7f8215ffb700, envp=0x1) at init-misc.c:33
|
--Type <RET> for more, q to quit, c to continue without paging--
|
p = <optimized out>
|
#39 0x0000000000000000 in ?? ()
|
No symbol table info available.
|
QE-TEST:
guides/gradlew --refresh-dependencies testrunner -P jython=/opt/jython/bin/jython -P 'args=-i /tmp/testexec.75349.ini bucket_storage=magma,rerun=false,GROUP=P1;graceful_replica,randomize_value=true,doc_size=256,bucket_eviction_policy=fullEviction,nodes_init=4,enable_dp=false,collect_pcaps=True,get-cbcollect-info=True,autoCompactionDefined=true,client_version=3.4.0,upgrade_version=7.5.0-3444 -t storage.magma.magma_crash_recovery.MagmaCrashTests.test_crash_during_ops,num_items=5000000,doc_size=1024,sdk_timeout=60,graceful=True,doc_ops=create:expiry,replicas=1,GROUP=P1;graceful_replica,multiplier=20'
|