Details
-
Bug
-
Resolution: Fixed
-
Major
-
4.5.1
-
None
-
Untriaged
-
Unknown
Description
Critical as this could lead to a total deadlock. (may affect 4.1.2)
Seen whilst testing sherlock->watson merge, analysed the code and it seems this is a latent issue and hard to re-produce.
The fix maybe just to remove the locking from ~CheckpointManager, if we need to lock in the destructor to protect consistency we have other issues!
The issue is that when the executor thread does a reset on the current task, the VBCBAdapator is the last one holding the ref-counted vbucket, so destruction occurs and ~vbucket calls the destructor of the checkpoint manager, which is the reverse locks ordering of a previous code path.
TSAN output, I've annotated it so the M123 etc... is more descriptive.
WARNING: ThreadSanitizer: lock-order-inversion (potential deadlock) (pid=170834)
|
Cycle in lock order graph: M_checkpoint (0x7d640002e9a8) => M_exepool (0x7d4c00008288) => M_taskqueue (0x7d4400008e80) => M_exethread (0x7d380000df60) => M_checkpoint
|
|
Mutex M_exepool acquired here while holding mutex M_checkpoint in thread T35:
|
#0 pthread_mutex_lock <null> (engine_testapp+0x000000486760)
|
#1 std::mutex::lock() /usr/bin/../lib/gcc/x86_64-linux-gnu/4.9/../../../../include/x86_64-linux-gnu/c++/4.9/bits/gthr-default.h:748 (ep.so+0x0000000f47b0)
|
#2 ExecutorPool::wake(unsigned long) ep-engine/src/executorpool.cc:355 (ep.so+0x0000000f48f1)
|
#3 Flusher::wake() ep-engine/src/flusher.cc:155 (ep.so+0x000000101ee6)
|
#4 NotifyFlusherCB::callback(unsigned short&) ep-engine/src/flusher.h:88 (ep.so+0x00000010d194)
|
#5 Checkpoint::queueDirty(SingleThreadedRCPtr<Item> const&, CheckpointManager*) ep-engine/src/checkpoint.h:675 (ep.so+0x0000000271b0)
|
#6 CheckpointManager::closeOpenCheckpoint_UNLOCKED() ep-engine/src/checkpoint.cc:454 (ep.so+0x000000028dcb)
|
#7 CheckpointManager::addNewCheckpoint_UNLOCKED(unsigned long, unsigned long, unsigned long) ep-engine/src/checkpoint.cc:371 (ep.so+0x00000002881f)
|
#8 CheckpointManager::checkOpenCheckpoint_UNLOCKED(bool, bool) ep-engine/src/checkpoint.cc:361 (ep.so+0x00000002bd71)
|
#9 CheckpointVisitor::visitBucket(RCPtr<VBucket>&) ep-engine/src/checkpoint_remover.cc:43 (ep.so+0x00000003c3bd)
|
#10 VBCBAdaptor::run() ep-engine/src/ep.cc:3924 (ep.so+0x0000000a6174)
|
#11 ExecutorThread::run() ep-engine/src/executorthread.cc:115 (ep.so+0x0000000fe1b6)
|
#12 launch_executor_thread(void*) ep-engine/src/executorthread.cc:33 (ep.so+0x0000000fdd15)
|
#13 platform_thread_wrap(void*) platform/src/cb_pthreads.cc:54 (libplatform.so.0.1.0+0x0000000057fb)
|
|
Mutex M_checkpoint previously acquired by the same thread here:
|
#0 pthread_mutex_lock <null> (engine_testapp+0x000000486760)
|
#1 std::mutex::lock() /usr/bin/../lib/gcc/x86_64-linux-gnu/4.9/../../../../include/x86_64-linux-gnu/c++/4.9/bits/gthr-default.h:748 (ep.so+0x00000002b9dc)
|
#2 CheckpointVisitor::visitBucket(RCPtr<VBucket>&) ep-engine/src/checkpoint_remover.cc:43 (ep.so+0x00000003c3bd)
|
#3 VBCBAdaptor::run() ep-engine/src/ep.cc:3924 (ep.so+0x0000000a6174)
|
#4 ExecutorThread::run() ep-engine/src/executorthread.cc:115 (ep.so+0x0000000fe1b6)
|
#5 launch_executor_thread(void*) ep-engine/src/executorthread.cc:33 (ep.so+0x0000000fdd15)
|
#6 platform_thread_wrap(void*) platform/src/cb_pthreads.cc:54 (libplatform.so.0.1.0+0x0000000057fb)
|
|
Mutex M_taskqueue acquired here while holding mutex M_exepool in main thread:
|
#0 pthread_mutex_lock <null> (engine_testapp+0x000000486760)
|
#1 std::mutex::lock() /usr/bin/../lib/gcc/x86_64-linux-gnu/4.9/../../../../include/x86_64-linux-gnu/c++/4.9/bits/gthr-default.h:748 (ep.so+0x000000143a8f)
|
#2 TaskQueue::wake(SingleThreadedRCPtr<GlobalTask>&) ep-engine/src/taskqueue.cc:300 (ep.so+0x0000001451be)
|
#3 ExecutorPool::_wake(unsigned long) ep-engine/src/executorpool.cc:347 (ep.so+0x0000000f4869)
|
#4 ExecutorPool::wake(unsigned long) ep-engine/src/executorpool.cc:355 (ep.so+0x0000000f48f1)
|
#5 EventuallyPersistentEngine::memoryCondition() ep-engine/src/ep.h:849 (ep.so+0x0000000e7710)
|
#6 EventuallyPersistentEngine::store(void const*, void*, unsigned long*, ENGINE_STORE_OPERATION, unsigned short) ep-engine/src/ep_engine.cc:2385 (ep.so+0x0000000be4cc)
|
#7 EvpStore(engine_interface*, void const*, void*, unsigned long*, ENGINE_STORE_OPERATION, unsigned short) ep-engine/src/ep_engine.cc:240 (ep.so+0x0000000bb2a7)
|
#8 mock_store(engine_interface*, void const*, void*, unsigned long*, ENGINE_STORE_OPERATION, unsigned short) memcached/programs/engine_testapp/engine_testapp.cc:275 (engine_testapp+0x0000004cdf42)
|
#9 storeCasVb11(engine_interface*, engine_interface_v1*, void const*, ENGINE_STORE_OPERATION, char const*, char const*, unsigned long, unsigned int, void**, unsigned long, unsigned short, unsigned int, unsigned char) ep-engine/tests/ep_test_apis.cc:978 (ep_testsuite.so+0x00000006cecf)
|
#10 store(engine_interface*, engine_interface_v1*, void const*, ENGINE_STORE_OPERATION, char const*, char const*, void**, unsigned long, unsigned short, unsigned int, unsigned char) ep-engine/tests/ep_test_apis.cc:929 (ep_testsuite.so+0x00000006a88b)
|
#11 test_access_scanner(engine_interface*, engine_interface_v1*) ep-engine/tests/ep_testsuite.cc:2812 (ep_testsuite.so+0x00000001f80a)
|
#12 execute_test(test, char const*, char const*) memcached/programs/engine_testapp/engine_testapp.cc:1145 (engine_testapp+0x0000004cc8f7)
|
#13 __libc_start_main /build/buildd/eglibc-2.15/csu/libc-start.c:226 (libc.so.6+0x00000002176c)
|
|
Mutex M_exepool previously acquired by the same thread here:
|
#0 pthread_mutex_lock <null> (engine_testapp+0x000000486760)
|
#1 std::mutex::lock() /usr/bin/../lib/gcc/x86_64-linux-gnu/4.9/../../../../include/x86_64-linux-gnu/c++/4.9/bits/gthr-default.h:748 (ep.so+0x0000000f47b0)
|
#2 ExecutorPool::wake(unsigned long) ep-engine/src/executorpool.cc:355 (ep.so+0x0000000f48f1)
|
#3 EventuallyPersistentEngine::memoryCondition() ep-engine/src/ep.h:849 (ep.so+0x0000000e7710)
|
#4 EventuallyPersistentEngine::store(void const*, void*, unsigned long*, ENGINE_STORE_OPERATION, unsigned short) ep-engine/src/ep_engine.cc:2385 (ep.so+0x0000000be4cc)
|
#5 EvpStore(engine_interface*, void const*, void*, unsigned long*, ENGINE_STORE_OPERATION, unsigned short) ep-engine/src/ep_engine.cc:240 (ep.so+0x0000000bb2a7)
|
#6 mock_store(engine_interface*, void const*, void*, unsigned long*, ENGINE_STORE_OPERATION, unsigned short) memcached/programs/engine_testapp/engine_testapp.cc:275 (engine_testapp+0x0000004cdf42)
|
#7 storeCasVb11(engine_interface*, engine_interface_v1*, void const*, ENGINE_STORE_OPERATION, char const*, char const*, unsigned long, unsigned int, void**, unsigned long, unsigned short, unsigned int, unsigned char) ep-engine/tests/ep_test_apis.cc:978 (ep_testsuite.so+0x00000006cecf)
|
#8 store(engine_interface*, engine_interface_v1*, void const*, ENGINE_STORE_OPERATION, char const*, char const*, void**, unsigned long, unsigned short, unsigned int, unsigned char) ep-engine/tests/ep_test_apis.cc:929 (ep_testsuite.so+0x00000006a88b)
|
#9 test_access_scanner(engine_interface*, engine_interface_v1*) ep-engine/tests/ep_testsuite.cc:2812 (ep_testsuite.so+0x00000001f80a)
|
#10 execute_test(test, char const*, char const*) memcached/programs/engine_testapp/engine_testapp.cc:1145 (engine_testapp+0x0000004cc8f7)
|
#11 __libc_start_main /build/buildd/eglibc-2.15/csu/libc-start.c:226 (libc.so.6+0x00000002176c)
|
|
Mutex M_exethread acquired here while holding mutex M_taskqueue in thread T36:
|
#0 pthread_mutex_lock <null> (engine_testapp+0x000000486760)
|
#1 std::mutex::lock() /usr/bin/../lib/gcc/x86_64-linux-gnu/4.9/../../../../include/x86_64-linux-gnu/c++/4.9/bits/gthr-default.h:748 (ep.so+0x0000000fed9e)
|
#2 TaskQueue::_fetchNextTask(ExecutorThread&, bool) ep-engine/src/taskqueue.cc:142 (ep.so+0x000000142d03)
|
#3 TaskQueue::fetchNextTask(ExecutorThread&, bool) ep-engine/src/taskqueue.cc:161 (ep.so+0x000000143577)
|
#4 ExecutorPool::_nextTask(ExecutorThread&, unsigned char) ep-engine/src/executorpool.cc:231 (ep.so+0x0000000f392b)
|
#5 ExecutorPool::nextTask(ExecutorThread&, unsigned char) ep-engine/src/executorpool.cc:251 (ep.so+0x0000000f39f6)
|
#6 ExecutorThread::run() ep-engine/src/executorthread.cc:81 (ep.so+0x0000000fdf29)
|
#7 launch_executor_thread(void*) ep-engine/src/executorthread.cc:33 (ep.so+0x0000000fdd15)
|
#8 platform_thread_wrap(void*) platform/src/cb_pthreads.cc:54 (libplatform.so.0.1.0+0x0000000057fb)
|
|
Mutex M_taskqueue previously acquired by the same thread here:
|
#0 pthread_mutex_lock <null> (engine_testapp+0x000000486760)
|
#1 std::mutex::lock() /usr/bin/../lib/gcc/x86_64-linux-gnu/4.9/../../../../include/x86_64-linux-gnu/c++/4.9/bits/gthr-default.h:748 (ep.so+0x000000142934)
|
#2 TaskQueue::fetchNextTask(ExecutorThread&, bool) ep-engine/src/taskqueue.cc:161 (ep.so+0x000000143577)
|
#3 ExecutorPool::_nextTask(ExecutorThread&, unsigned char) ep-engine/src/executorpool.cc:231 (ep.so+0x0000000f392b)
|
#4 ExecutorPool::nextTask(ExecutorThread&, unsigned char) ep-engine/src/executorpool.cc:251 (ep.so+0x0000000f39f6)
|
#5 ExecutorThread::run() ep-engine/src/executorthread.cc:81 (ep.so+0x0000000fdf29)
|
#6 launch_executor_thread(void*) ep-engine/src/executorthread.cc:33 (ep.so+0x0000000fdd15)
|
#7 platform_thread_wrap(void*) platform/src/cb_pthreads.cc:54 (libplatform.so.0.1.0+0x0000000057fb)
|
|
Mutex M_checkpoint acquired here while holding mutex M_exethread in thread T36:
|
#0 pthread_mutex_lock <null> (engine_testapp+0x000000486760)
|
#1 CheckpointManager::~CheckpointManager() /usr/bin/../lib/gcc/x86_64-linux-gnu/4.9/../../../../include/x86_64-linux-gnu/c++/4.9/bits/gthr-default.h:748 (ep.so+0x000000027fdd)
|
#2 VBucket::~VBucket() ep-engine/src/vbucket.cc:152 (ep.so+0x00000014a018)
|
#3 PagingVisitor::~PagingVisitor() ep-engine/src/atomic.h:190 (ep.so+0x00000010a5e6)
|
#4 PagingVisitor::~PagingVisitor() ep-engine/src/item_pager.cc:43 (ep.so+0x00000010a645)
|
#5 std::_Sp_counted_ptr<PagingVisitor*, (__gnu_cxx::_Lock_policy)2>::_M_dispose() /usr/bin/../lib/gcc/x86_64-linux-gnu/4.9/../../../../include/c++/4.9/bits/shared_ptr_base.h:373 (ep.so+0x00000010a2b0)
|
#6 VBCBAdaptor::~VBCBAdaptor() /usr/bin/../lib/gcc/x86_64-linux-gnu/4.9/../../../../include/c++/4.9/bits/shared_ptr_base.h:149 (ep.so+0x0000000aea7e)
|
#7 ExecutorThread::run() ep-engine/src/atomic.h:325 (ep.so+0x0000000fdee4)
|
#8 launch_executor_thread(void*) ep-engine/src/executorthread.cc:33 (ep.so+0x0000000fdd15)
|
#9 platform_thread_wrap(void*) platform/src/cb_pthreads.cc:54 (libplatform.so.0.1.0+0x0000000057fb)
|
|
Mutex M_exethread previously acquired by the same thread here:
|
#0 pthread_mutex_lock <null> (engine_testapp+0x000000486760)
|
#1 std::mutex::lock() /usr/bin/../lib/gcc/x86_64-linux-gnu/4.9/../../../../include/x86_64-linux-gnu/c++/4.9/bits/gthr-default.h:748 (ep.so+0x0000000fde94)
|
#2 launch_executor_thread(void*) ep-engine/src/executorthread.cc:33 (ep.so+0x0000000fdd15)
|
#3 platform_thread_wrap(void*) platform/src/cb_pthreads.cc:54 (libplatform.so.0.1.0+0x0000000057fb)
|
|
Thread T33 'mc:nonio_16' (tid=170911, running) created by main thread at:
|
#0 pthread_create <null> (engine_testapp+0x000000467911)
|
#1 cb_create_named_thread platform/src/cb_pthreads.cc:104 (libplatform.so.0.1.0+0x000000005655)
|
#2 ExecutorThread::start() ep-engine/src/executorthread.cc:46 (ep.so+0x0000000fd6b7)
|
#3 ExecutorPool::_startWorkers() ep-engine/src/executorpool.cc:538 (ep.so+0x0000000f764c)
|
#4 ExecutorPool::_registerTaskable(Taskable&) ep-engine/src/executorpool.cc:488 (ep.so+0x0000000f648f)
|
#5 ExecutorPool::registerTaskable(Taskable&) ep-engine/src/executorpool.cc:493 (ep.so+0x0000000f653e)
|
#6 EventuallyPersistentStore::EventuallyPersistentStore(EventuallyPersistentEngine&) ep-engine/src/ep.cc:301 (ep.so+0x00000008849a)
|
#7 EventuallyPersistentEngine::initialize(char const*) ep-engine/src/ep_engine.cc:2168 (ep.so+0x0000000bd55e)
|
#8 EvpInitialize(engine_interface*, char const*) ep-engine/src/ep_engine.cc:144 (ep.so+0x0000000bae98)
|
#9 init_engine_instance <null> (libmcd_util.so.1.0.0+0x0000000057ab)
|
#10 create_bucket(bool, char const*) memcached/programs/engine_testapp/engine_testapp.cc:988 (engine_testapp+0x0000004cd787)
|
#11 reload_engine(engine_interface**, engine_interface_v1**, char const*, char const*, bool, bool) memcached/programs/engine_testapp/engine_testapp.cc:1018 (engine_testapp+0x0000004cd10d)
|
#12 test_access_scanner(engine_interface*, engine_interface_v1*) ep-engine/tests/ep_testsuite.cc:2779 (ep_testsuite.so+0x00000001f49d)
|
#13 execute_test(test, char const*, char const*) memcached/programs/engine_testapp/engine_testapp.cc:1145 (engine_testapp+0x0000004cc8f7)
|
#14 __libc_start_main /build/buildd/eglibc-2.15/csu/libc-start.c:226 (libc.so.6+0x00000002176c)
|
|
Thread T34 'mc:nonio_17' (tid=170912, running) created by main thread at:
|
#0 pthread_create <null> (engine_testapp+0x000000467911)
|
#1 cb_create_named_thread platform/src/cb_pthreads.cc:104 (libplatform.so.0.1.0+0x000000005655)
|
#2 ExecutorThread::start() ep-engine/src/executorthread.cc:46 (ep.so+0x0000000fd6b7)
|
#3 ExecutorPool::_startWorkers() ep-engine/src/executorpool.cc:538 (ep.so+0x0000000f764c)
|
#4 ExecutorPool::_registerTaskable(Taskable&) ep-engine/src/executorpool.cc:488 (ep.so+0x0000000f648f)
|
#5 ExecutorPool::registerTaskable(Taskable&) ep-engine/src/executorpool.cc:493 (ep.so+0x0000000f653e)
|
#6 EventuallyPersistentStore::EventuallyPersistentStore(EventuallyPersistentEngine&) ep-engine/src/ep.cc:301 (ep.so+0x00000008849a)
|
#7 EventuallyPersistentEngine::initialize(char const*) ep-engine/src/ep_engine.cc:2168 (ep.so+0x0000000bd55e)
|
#8 EvpInitialize(engine_interface*, char const*) ep-engine/src/ep_engine.cc:144 (ep.so+0x0000000bae98)
|
#9 init_engine_instance <null> (libmcd_util.so.1.0.0+0x0000000057ab)
|
#10 create_bucket(bool, char const*) memcached/programs/engine_testapp/engine_testapp.cc:988 (engine_testapp+0x0000004cd787)
|
#11 reload_engine(engine_interface**, engine_interface_v1**, char const*, char const*, bool, bool) memcached/programs/engine_testapp/engine_testapp.cc:1018 (engine_testapp+0x0000004cd10d)
|
#12 test_access_scanner(engine_interface*, engine_interface_v1*) ep-engine/tests/ep_testsuite.cc:2779 (ep_testsuite.so+0x00000001f49d)
|
#13 execute_test(test, char const*, char const*) memcached/programs/engine_testapp/engine_testapp.cc:1145 (engine_testapp+0x0000004cc8f7)
|
#14 __libc_start_main /build/buildd/eglibc-2.15/csu/libc-start.c:226 (libc.so.6+0x00000002176c)
|
Attachments
Issue Links
- blocks
-
MB-19612 4.5.1 Minor Release
- Closed