Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-49832

Data race in magma::TaskQueue::Enqueue

    XMLWordPrintable

Details

    • Bug
    • Resolution: Not a Bug
    • Major
    • 7.1.0
    • 7.1.0
    • storage-engine
    • Untriaged
    • 1
    • Unknown

    Description

      Summary

      When running a TAF test under cluster_run, the following race is observed in Magma's task management:

       WARNING: ThreadSanitizer: data race (pid=27242)
         Write of size 8 at 0x000120e0bd88 by thread T23 (mutexes: write M36503, write M546342934386906912):
           #0 magma::TaskQueue::Enqueue(magma::Task const&, std::__1::chrono::duration<long long, std::__1::ratio<1l, 1000l> >) <null>:2 (memcached:x86_64+0x10069da72)
           #1 magma::ThreadPool::Schedule(magma::ThreadPool::WorkerType, magma::Task const&) <null>:2 (memcached:x86_64+0x1006a1f68)
           #2 magma::KVStoreConfig::ScheduleTask(std::__1::function<void ()>, magma::WaitGroup&) config.cc:205 (memcached:x86_64+0x100586343)
           #3 magma::KVStore::flushMemTables(magma::WAL*, magma::WALOffset, magma::FlushMode, magma::BlockingMode) <null>:2 (memcached:x86_64+0x1006658b2)
           #4 magma::KVStore::FlushMemTables(magma::WAL*, magma::FlushMode, magma::BlockingMode) <null>:2 (memcached:x86_64+0x10066613f)
           #5 magma::Magma::Impl::tryWriteCacheFlush(magma::BlockingMode, magma::FlushMode) db.cc:1129 (memcached:x86_64+0x1005a0391)
           #6 magma::Magma::Impl::WriteDocs(unsigned short, std::__1::vector<magma::Magma::WriteOperation, std::__1::allocator<magma::Magma::WriteOperation> > const&, unsigned int, std::__1::function<void (magma::Magma::WriteOperation const&, bool, magma::Slice)>, std::__1::function<std::__1::pair<magma::Status, std::__1::vector<magma::Magma::WriteOperation, std::__1::allocator<magma::Magma::WriteOperation> > const*> ()>) db_write.cc:50 (memcached:x86_64+0x1005c2335)
           #7 magma::Magma::WriteDocs(unsigned short, std::__1::vector<magma::Magma::WriteOperation, std::__1::allocator<magma::Magma::WriteOperation> > const&, unsigned int, std::__1::function<void (magma::Magma::WriteOperation const&, bool, magma::Slice)>, std::__1::function<std::__1::pair<magma::Status, std::__1::vector<magma::Magma::WriteOperation, std::__1::allocator<magma::Magma::WriteOperation> > const*> ()>) db_write.cc:59 (memcached:x86_64+0x1005c2635)
           #8 MagmaMemoryTrackingProxy::WriteDocs(unsigned short, std::__1::vector<magma::Magma::WriteOperation, std::__1::allocator<magma::Magma::WriteOperation> > const&, unsigned int, std::__1::function<void (magma::Magma::WriteOperation const&, bool, magma::Slice)>, std::__1::function<std::__1::pair<magma::Status, std::__1::vector<magma::Magma::WriteOperation, std::__1::allocator<magma::Magma::WriteOperation> > const*> ()>) magma-memory-tracking-proxy.cc:362 (memcached:x86_64+0x10025c010)
           #9 MagmaKVStore::saveDocs(MagmaKVStoreTransactionContext&, VB::Commit&, kvstats_ctx&) magma-kvstore.cc:1411 (memcached:x86_64+0x10025b1ea)
           #10 MagmaKVStore::commit(std::__1::unique_ptr<TransactionContext, std::__1::default_delete<TransactionContext> >, VB::Commit&) magma-kvstore.cc:707 (memcached:x86_64+0x100255b28)
           #11 EPBucket::commit(KVStoreIface&, std::__1::unique_ptr<TransactionContext, std::__1::default_delete<TransactionContext> >, VB::Commit&) ep_bucket.cc:927 (memcached:x86_64+0x10041ceae)
           #12 EPBucket::flushVBucket_UNLOCKED(LockedVBucketPtr) ep_bucket.cc:799 (memcached:x86_64+0x10041b78b)
           #13 EPBucket::flushVBucket(Vbid) ep_bucket.cc:376 (memcached:x86_64+0x10041a1fa)
           #14 Flusher::flushVB() flusher.cc:285 (memcached:x86_64+0x1003e31ef)
           #15 Flusher::step(GlobalTask*) flusher.cc:200 (memcached:x86_64+0x1003e2c8d)
           #16 FlusherTask::run() tasks.cc:28 (memcached:x86_64+0x100356896)
           #17 FollyExecutorPool::TaskProxy::scheduleViaCPUPool()::'lambda0'()::operator()() const folly_executorpool.cc:189 (memcached:x86_64+0x10070651c)
           #18 void folly::detail::function::FunctionTraits<void ()>::callSmall<FollyExecutorPool::TaskProxy::scheduleViaCPUPool()::'lambda0'()>(folly::detail::function::Data&) Function.h:387 (memcached:x86_64+0x100706169)
           #19 folly::ThreadPoolExecutor::runTask(std::__1::shared_ptr<folly::ThreadPoolExecutor::Thread> const&, folly::ThreadPoolExecutor::Task&&) ThreadPoolExecutor.cpp:97 (memcached:x86_64+0x1008b81f8)
           #20 folly::CPUThreadPoolExecutor::threadRun(std::__1::shared_ptr<folly::ThreadPoolExecutor::Thread>) CPUThreadPoolExecutor.cpp:265 (memcached:x86_64+0x100893540)
           #21 void folly::detail::function::FunctionTraits<void ()>::callSmall<std::__1::__bind<void (folly::ThreadPoolExecutor::*)(std::__1::shared_ptr<folly::ThreadPoolExecutor::Thread>), folly::ThreadPoolExecutor*, std::__1::shared_ptr<folly::ThreadPoolExecutor::Thread>&> >(folly::detail::function::Data&) Function.h:387 (memcached:x86_64+0x1008bdc18)
           #22 CBRegisteredThreadFactory::newThread(folly::Function<void ()>&&)::'lambda'()::operator()() folly_executorpool.cc:47 (memcached:x86_64+0x1007037c4)
           #23 void folly::detail::function::FunctionTraits<void ()>::callBig<CBRegisteredThreadFactory::newThread(folly::Function<void ()>&&)::'lambda'()>(folly::detail::function::Data&) Function.h:401 (memcached:x86_64+0x100703681)
           #24 void* std::__1::__thread_proxy<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, folly::NamedThreadFactory::newThread(folly::Function<void ()>&&)::'lambda'()> >(void*) thread:291 (memcached:x86_64+0x10070423f)
       
         Previous write of size 8 at 0x000120e0bd88 by thread T46 (mutexes: write M5066554427358624):
           #0 magma::TimedTask::Complete(bool) <null>:2 (memcached:x86_64+0x10069f0ab)
           #1 std::__1::__shared_ptr_emplace<magma::TimedTask, std::__1::allocator<magma::TimedTask> >::__on_zero_shared() <null>:2 (memcached:x86_64+0x1006afd36)
           #2 magma::TaskWorker::loop(void*) <null>:2 (memcached:x86_64+0x10069efe1)
           #3 platform_thread_wrap(void*) cb_pthreads.cc:64 (memcached:x86_64+0x10085ff2c)
       
         Mutex M36503 (0x0001120eb000) created at:
           #0 pthread_mutex_trylock <null>:3 (libclang_rt.tsan_osx_dynamic.dylib:x86_64h+0x2dfbe)
           #1 std::__1::mutex::try_lock() <null>:2 (libc++.1.dylib:x86_64+0x393f6)
           #2 Flusher::flushVB() flusher.cc:285 (memcached:x86_64+0x1003e31ef)
           #3 Flusher::step(GlobalTask*) flusher.cc:200 (memcached:x86_64+0x1003e2c8d)
           #4 FlusherTask::run() tasks.cc:28 (memcached:x86_64+0x100356896)
           #5 FollyExecutorPool::TaskProxy::scheduleViaCPUPool()::'lambda0'()::operator()() const folly_executorpool.cc:189 (memcached:x86_64+0x10070651c)
           #6 void folly::detail::function::FunctionTraits<void ()>::callSmall<FollyExecutorPool::TaskProxy::scheduleViaCPUPool()::'lambda0'()>(folly::detail::function::Data&) Function.h:387 (memcached:x86_64+0x100706169)
           #7 folly::ThreadPoolExecutor::runTask(std::__1::shared_ptr<folly::ThreadPoolExecutor::Thread> const&, folly::ThreadPoolExecutor::Task&&) ThreadPoolExecutor.cpp:97 (memcached:x86_64+0x1008b81f8)
           #8 folly::CPUThreadPoolExecutor::threadRun(std::__1::shared_ptr<folly::ThreadPoolExecutor::Thread>) CPUThreadPoolExecutor.cpp:265 (memcached:x86_64+0x100893540)
           #9 void folly::detail::function::FunctionTraits<void ()>::callSmall<std::__1::__bind<void (folly::ThreadPoolExecutor::*)(std::__1::shared_ptr<folly::ThreadPoolExecutor::Thread>), folly::ThreadPoolExecutor*, std::__1::shared_ptr<folly::ThreadPoolExecutor::Thread>&> >(folly::detail::function::Data&) Function.h:387 (memcached:x86_64+0x1008bdc18)
           #10 CBRegisteredThreadFactory::newThread(folly::Function<void ()>&&)::'lambda'()::operator()() folly_executorpool.cc:47 (memcached:x86_64+0x1007037c4)
           #11 void folly::detail::function::FunctionTraits<void ()>::callBig<CBRegisteredThreadFactory::newThread(folly::Function<void ()>&&)::'lambda'()>(folly::detail::function::Data&) Function.h:401 (memcached:x86_64+0x100703681)
           #12 void* std::__1::__thread_proxy<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, folly::NamedThreadFactory::newThread(folly::Function<void ()>&&)::'lambda'()> >(void*) thread:291 (memcached:x86_64+0x10070423f)
       
         Mutex M546342934386906912 is already destroyed.
       
         Mutex M5066554427358624 is already destroyed.
       
         Thread T23 (tid=1479076, running) created by main thread at:
           #0 pthread_create <null>:3 (libclang_rt.tsan_osx_dynamic.dylib:x86_64h+0x2cd8d)
           #1 std::__1::thread::thread<folly::NamedThreadFactory::newThread(folly::Function<void ()>&&)::'lambda'(), void>(folly::NamedThreadFactory::newThread(folly::Function<void ()>&&)::'lambda'()&&) thread:307 (memcached:x86_64+0x1007040d4)
           #2 folly::NamedThreadFactory::newThread(folly::Function<void ()>&&) NamedThreadFactory.h:37 (memcached:x86_64+0x100703c20)
           #3 CBRegisteredThreadFactory::newThread(folly::Function<void ()>&&) folly_executorpool.cc:42 (memcached:x86_64+0x100703559)
           #4 folly::ThreadPoolExecutor::addThreads(unsigned long) ThreadPoolExecutor.cpp:215 (memcached:x86_64+0x1008b8f12)
      

      Magma SHA: 832cef05c
      KV_engine SHA: eeb315140

      Steps to Reproduce

      1. Build CB Server with TSan enabled - e.g.

        mkdir build && cd build
        cmake -G Ninja -DCB_THREADSANITIZER=1 -DCMAKE_INSTALL_PREFIX=../install ..
        ninja install
        

      2. Start a local 3 node cluster:

        COUCHBASE_NUM_VBUCKETS=64 COUCHBASE_CPU_COUNT=4 ./cluster_run --nodes=3 --dont-rename
        

      3. Run the following TAF test:

        JAVA_HOME=/usr/local/opt/openjdk\@8 nice ./gradlew testrunner -P jython=$(which jython) -P 'args=-i b/resources/dev-3-nodes.ini rerun=False,disk_optimized_thread_settings=True,get-cbcollect-info=True,GROUP=rebalance_out -t bucket_collections.collections_rebalance.CollectionsRebalance.test_data_load_collections_with_rebalance_out,nodes_init=3,nodes_out=1,bucket_spec=magma_dgm.10_percent_dgm.5_node_1_replica_magma_256_single_bucket,doc_size=256,randomize_value=True,data_load_spec=volume_test_load_with_CRUD_on_collections,data_load_stage=during,quota_percent=80,skip_validations=False,GROUP=rebalance_out,skip_cleanup=True,skip_collections_cleanup=True,log_level=debug,infra_log_level=debug'
        

      Check the ns_server/logs directory for WARNING: ThreadSanitizer messages.

      Attachments

        Issue Links

          For Gerrit Dashboard: MB-49832
          # Subject Branch Project Status CR V

          Activity

            People

              scott.lashley Scott Lashley
              drigby Dave Rigby (Inactive)
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Gerrit Reviews

                  There are no open Gerrit changes

                  PagerDuty