Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-58261

Empty checkpoint (empty by expel) can trigger a skip of backfill and a corrupt replica.

    XMLWordPrintable

Details

    Description

      Steps:

      1. Create a 4 node cluster
      2. Create1 couchstore and 2 magma buckets(each with repliacs=2)
      3. Start async data load
      4. While data loading is going on simulate split-brain scenario by introducing network partition
      5. Hard Failover the node while data loading is going on.
      6. Rebalance out the node
      7. Observed memacached crashed in Collections::VB::Manifest::throwException<std::invalid_argument>

      BackTrace:
      Below core dump is from node 172.23.108.182
      Backtrace of b7770909-a6d1-4045-42e447af-3315680a.dmp

      (gdb) bt full
      #0  0x00007f2f442527bb in raise () from /lib/x86_64-linux-gnu/libc.so.6
      No symbol table info available.
      #1  0x00007f2f4423d535 in abort () from /lib/x86_64-linux-gnu/libc.so.6
      No symbol table info available.
      #2  0x00007f2f4481163c in __gnu_cxx::__verbose_terminate_handler () at /tmp/deploy/objdir/../gcc-10.2.0/libstdc++-v3/libsupc++/vterminate.cc:95
              terminating = false
              t = <optimized out>
      #3  0x0000000000b5399b in backtrace_terminate_handler ()
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/utilities/terminate_handler.cc:88
      No locals.
      #4  0x00007f2f4481c8f6 in __cxxabiv1::__terminate (handler=<optimized out>)
          at /tmp/deploy/objdir/../gcc-10.2.0/libstdc++-v3/libsupc++/eh_terminate.cc:48
      No locals.
      #5  0x00007f2f4481c961 in std::terminate () at /tmp/deploy/objdir/../gcc-10.2.0/libstdc++-v3/libsupc++/eh_terminate.cc:58
      No locals.
      #6  0x00007f2f4481cbf4 in __cxxabiv1::__cxa_throw (obj=obj@entry=0x7f2e5c001850, tinfo=0x109fda8 <typeinfo for std::invalid_argument>,
          dest=0x443af0 <std::invalid_argument::~invalid_argument()@plt>) at /tmp/deploy/objdir/../gcc-10.2.0/libstdc++-v3/libsupc++/eh_throw.cc:95
              globals = <optimized out>
              header = 0x7f2e5c0017d0
      #7  0x00000000004ac37e in Collections::VB::Manifest::throwException<std::invalid_argument> (this=this@entry=0x7f2e3ca3cb60, thrower=..., error=...)
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/collections/vbucket_manifest.h:1203
      No locals.
      #8  0x00000000004bdc7c in Collections::VB::Manifest::updateDataSize (this=0x7f2e3ca3cb60, sid=..., delta=<optimized out>)
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/collections/vbucket_manifest.cc:1741
              itr = {<std::__detail::_Node_iterator_base<std::pair<ScopeID const, Collections::VB::ScopeEntry>, true>> = {_M_cur = 0x0}, <No data fields>}
              __FUNCTION__ = "updateDataSize"
      #9  0x00000000007b8f6b in Collections::VB::StatsReadHandle::updateScopeDataSize (delta=<optimized out>, this=0x7f2e63fec790)
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/collections/vbucket_manifest_handles.h:589
      No locals.
      #10 Collections::VB::Manifest::applyFlusherStats (this=0x7f2e3ca3cb60, cid=..., flushStats=...)
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/collections/vbucket_manifest.cc:307
              collection = {<Collections::VB::ReadHandle> = {readLock = {lock_ = 0x7f2e3ca3cbf8, token_ = {
                      type_ = folly::SharedMutexToken::Type::INLINE_SHARED, slot_ = 25598}}, manifest = 0x7f2e3ca3cb60},
                itr = {<folly::f14::detail::BaseIter<std::pair<CollectionID const, Collections::VB::ManifestEntry> const*, std::pair<CollectionID const, Collections::VB::ManifestEntry>*>> = {<No data fields>}, underlying_ = {itemPtr_ = 0x7f2e3a9adbc0, index_ = 6}}}
      #11 0x00000000008bb8eb in Collections::VB::Flush::postCommitMakeStatsVisible (this=0x7f2e63fecee0)
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/collections/flush.cc:121
              __for_range = @0x7f2e63fecfc0: {
                _M_h = {<std::__detail::_Hashtable_base<CollectionID, std::pair<CollectionID const, Collections::VB::FlushAccounting::StatisticsUpdate>, std::__detail::_Select1st, std::equal_to<CollectionID>, std::hash<CollectionID>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, st--Type <RET> for more, q to quit, c to continue without paging--
      d::__detail::_Hashtable_traits<true, false, true> >> = {<std::__detail::_Hash_code_base<CollectionID, std::pair<CollectionID const, Collections::VB::FlushAccounting::StatisticsUpdate>, std::__detail::_Select1st, std::hash<CollectionID>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, true>> = {<std::__detail::_Hashtable_ebo_helper<0, std::__detail::_Select1st, true>> = {<std::__detail::_Select1st> = {<No data fields>}, <No data fields>}, <std::__detail::_Hashtable_ebo_helper<1, std::hash<CollectionID>, true>> = {<std::hash<CollectionID>> = {<No data fields>}, <No data fields>}, <std::__detail::_Hashtable_ebo_helper<2, std::__detail::_Mod_range_hashing, true>> = {<std::__detail::_Mod_range_hashing> = {<No data fields>}, <No data fields>}, <No data fields>}, <std::__detail::_Hashtable_ebo_helper<0, std::equal_to<CollectionID>, true>> = {<std::equal_to<CollectionID>> = {<std::binary_function<CollectionID, CollectionID, bool>> = {<No data fields>}, <No data fields>}, <No data fields>}, <No data fields>}, <std::__detail::_Map_base<CollectionID, std::pair<CollectionID const, Collections::VB::FlushAccounting::StatisticsUpdate>, std::allocator<std::pair<CollectionID const, Collections::VB::FlushAccounting::StatisticsUpdate> >, std::__detail::_Select1st, std::equal_to<CollectionID>, std::hash<CollectionID>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, false, true>, true>> = {<No data fields>}, <std::__detail::_Insert<CollectionID, std::pair<CollectionID const, Collections::VB::FlushAccounting::StatisticsUpdate>, std::allocator<std::pair<CollectionID const, Collections::VB::FlushAccounting::StatisticsUpdate> >, std::__detail::_Select1st, std::equal_to<CollectionID>, std::hash<CollectionID>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, false, true>, false>> = {<std::__detail::_Insert_base<CollectionID, std::pair<CollectionID const, Collections::VB::FlushAccounting::StatisticsUpdate>, std::allocator<std::pair<CollectionID const, Collections::VB::FlushAccounting::StatisticsUpdate> >, std::__detail::_Select1st, std::equal_to<CollectionID>, std::hash<CollectionID>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, false, true> >> = {<No data fields>}, <No data fields>}, <std::__detail::_Rehash_base<CollectionID, std::pair<CollectionID const, Collections::VB::FlushAccounting::StatisticsUpdate>, std::allocator<std::pair<CollectionID const, Collections::VB::FlushAccounting::StatisticsUpdate> >, std::__detail::_Select1st, std::equal_to<CollectionID>, std::hash<CollectionID>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, false, true>, std::integral_constant<bool, true> >> = {<No data fields>}, <std::__detail::_Equality<CollectionID, std::pair<CollectionID const, Collections::VB::FlushAccounting::StatisticsUpdate>, std::allocator<std::pair<CollectionID const, Collections::VB::FlushAccounting::StatisticsUpdate> >, std::__detail::_Select1st, std::equal_to<CollectionID>, std::hash<CollectionID>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, false, true>, true>> = {<No data fields>}, <std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pair<CollectionID const, Collections::VB::FlushAccounting::StatisticsUpdate>, true> > >> = {<std::__detail::_Hashtable_ebo_helper<0, std::allocator<std::__detail::_Hash_node<std::pair<CollectionID const, Collections::VB::FlushAccounting::StatisticsUpdate>, true> >, true>> = {<std::allocator<std::__detail::_Hash_node<std::pair<CollectionID const, Collections::VB::FlushAccounting::StatisticsUpdate>, true> >> = {<__gnu_cxx::new_allocator<std::__detail::_Hash_node<std::pair<CollectionID const, Collections::VB::FlushAccounting::StatisticsUpdate>, true> >> = {<No data fields>}, <No data fields>}, <No data fields>}, <No data fields>}, _M_buckets = 0x7f2e45c4cc00, _M_bucket_count = 127, _M_before_begin = {_M_nxt = 0x7f2e46de7bc0},
                  _M_element_count = 85, _M_rehash_policy = {static _S_growth_factor = 2, _M_max_load_factor = 1, _M_next_resize = 127},
                  _M_single_bucket = 0x0}}
              __for_begin = <optimized out>
              __for_end = <optimized out>
              cid = <error reading variable>
              flushStats = <error reading variable>
      #12 0x00000000008ac2ee in MagmaKVStore::saveDocs (this=<optimized out>, txnCtx=..., commitData=..., kvctx=..., historyMode=<optimized out>)
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/kvstore/magma-kvstore/magma-kvstore.cc:1599
              ninserts = 88
              ndeletes = 2
              vbid = {vbid = 722}
      --Type <RET> for more, q to quit, c to continue without paging--
              writeDocsCB = <optimized out>
              localDbReqs = {<std::_Vector_base<MagmaKVStore::MagmaLocalReq, std::allocator<MagmaKVStore::MagmaLocalReq> >> = {
                  _M_impl = {<std::allocator<MagmaKVStore::MagmaLocalReq>> = {<__gnu_cxx::new_allocator<MagmaKVStore::MagmaLocalReq>> = {<No data fields>}, <No data fields>}, <std::_Vector_base<MagmaKVStore::MagmaLocalReq, std::allocator<MagmaKVStore::MagmaLocalReq> >::_Vector_impl_data> = {
                      _M_start = 0x7f2dfc156800, _M_finish = 0x7f2dfc158588, _M_end_of_storage = 0x7f2dfc158c00}, <No data fields>}}, <No data fields>}
       
              magmaDbStats = {<magma::UserStats> = {_vptr.UserStats = 0x108c6c0 <vtable for MagmaDbStats+16>}, docCount = 86,
                purgeSeqno = {<IgnorePolicy<unsigned long>> = {<No data fields>}, val = 0}, highSeqno = {<IgnorePolicy<long>> = {<No data fields>},
                  val = 3455}, droppedCollectionCounts = {
                  _M_h = {<std::__detail::_Hashtable_base<unsigned int, std::pair<unsigned int const, long>, std::__detail::_Select1st, std::equal_to<unsigned int>, std::hash<unsigned int>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Hashtable_traits<false, false, true> >> = {<std::__detail::_Hash_code_base<unsigned int, std::pair<unsigned int const, long>, std::__detail::_Select1st, std::hash<unsigned int>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, false>> = {<std::__detail::_Hashtable_ebo_helper<0, std::__detail::_Select1st, true>> = {<std::__detail::_Select1st> = {<No data fields>}, <No data fields>}, <std::__detail::_Hashtable_ebo_helper<1, std::hash<unsigned int>, true>> = {<std::hash<unsigned int>> = {<std::__hash_base<unsigned long, unsigned int>> = {<No data fields>}, <No data fields>}, <No data fields>}, <std::__detail::_Hashtable_ebo_helper<2, std::__detail::_Mod_range_hashing, true>> = {<std::__detail::_Mod_range_hashing> = {<No data fields>}, <No data fields>}, <No data fields>}, <std::__detail::_Hashtable_ebo_helper<0, std::equal_to<unsigned int>, true>> = {<std::equal_to<unsigned int>> = {<std::binary_function<unsigned int, unsigned int, bool>> = {<No data fields>}, <No data fields>}, <No data fields>}, <No data fields>}, <std::__detail::_Map_base<unsigned int, std::pair<unsigned int const, long>, std::allocator<std::pair<unsigned int const, long> >, std::__detail::_Select1st, std::equal_to<unsigned int>, std::hash<unsigned int>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<false, false, true>, true>> = {<No data fields>}, <std::__detail::_Insert<unsigned int, std::pair<unsigned int const, long>, std::allocator<std::pair<unsigned int const, long> >, std::__detail::_Select1st, std::equal_to<unsigned int>, std::hash<unsigned int>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<false, false, true>, false>> = {<std::__detail::_Insert_base<unsigned int, std::pair<unsigned int const, long>, std::allocator<std::pair<unsigned int const, long> >, std::__detail::_Select1st, std::equal_to<unsigned int>, std::hash<unsigned int>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<false, false, true> >> = {<No data fields>}, <No data fields>}, <std::__detail::_Rehash_base<unsigned int, std::pair<unsigned int const, long>, std::allocator<std::pair<unsigned int const, long> >, std::__detail::_Select1st, std::equal_to<unsigned int>, std::hash<unsigned int>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<false, false, true>, std::integral_constant<bool, true> >> = {<No data fields>}, <std::__detail::_Equality<unsigned int, std::pair<unsigned int const, long>, std::allocator<std::pair<unsigned int const, long> >, std::__detail::_Select1st, std::equal_to<unsigned int>, std::hash<unsigned int>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<false, false, true>, true>> = {<No data fields>}, <std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pair<unsigned int const, long>, false> > >> = {<std::__detail::_Hashtable_ebo_helper<0, std::allocator<std::__detail::_Hash_node<std::pair<unsigned int const, long>, false> >, true>> = {<std::allocator<std::__detail::_Hash_node<std::pair<unsigned int const, long>, false> >> = {<__gnu_cxx::new_allocator<std::__detail::_Hash_node<std::pair<unsigned int const, long>, false> >> = {<No data fields>}, <No data fields>}, <No data fields>}, <No data fields>},
                    _M_buckets = 0x7f2e36033800, _M_bucket_count = 29, _M_before_begin = {_M_nxt = 0x7f2e2e61aac0}, _M_element_count = 15,
                    _M_rehash_policy = {static _S_growth_factor = 2, _M_max_load_factor = 1, _M_next_resize = 29}, _M_single_bucket = 0x0}}}
              postWriteOps = {<std::_Vector_base<magma::Magma::WriteOperation, std::allocator<magma::Magma::WriteOperation> >> = {
                  _M_impl = {<std::allocator<magma::Magma::WriteOperation>> = {<__gnu_cxx::new_allocator<magma::Magma::WriteOperation>> = {<No data fields>}, <No data fields>}, <std::_Vector_base<magma::Magma::WriteOperation, std::allocator<magma::Magma::WriteOperation> >::_Vector_impl_data> = {
                      _M_start = 0x7f2dfaef8000, _M_finish = 0x7f2dfaefab10, _M_end_of_storage = 0x7f2dfaefb400}, <No data fields>}}, <No data fields>}
      --Type <RET> for more, q to quit, c to continue without paging--
              lastSeqno = 3455
              beginTime = {__d = {__r = 3140273582594994}}
              saveDocsDuration = {__r = 9577}
              postWriteDocsCB = <optimized out>
              ctx = <optimized out>
              writeOps = {<std::_Vector_base<magma::Magma::WriteOperation, std::allocator<magma::Magma::WriteOperation> >> = {
                  _M_impl = {<std::allocator<magma::Magma::WriteOperation>> = {<__gnu_cxx::new_allocator<magma::Magma::WriteOperation>> = {<No data fields>}, <No data fields>}, <std::_Vector_base<magma::Magma::WriteOperation, std::allocator<magma::Magma::WriteOperation> >::_Vector_impl_data> = {
                      _M_start = 0x7f2dfc333000, _M_finish = 0x7f2dfc3359d8, _M_end_of_storage = 0x7f2dfc3359d8}, <No data fields>}}, <No data fields>}
              status = {s = {_M_t = {<std::__uniq_ptr_impl<magma::Status::state, std::default_delete<magma::Status::state> >> = {
                      _M_t = {<std::_Tuple_impl<0, magma::Status::state*, std::default_delete<magma::Status::state> >> = {<std::_Tuple_impl<1, std::default_delete<magma::Status::state> >> = {<std::_Head_base<1, std::default_delete<magma::Status::state>, true>> = {<std::default_delete<magma::Status::state>> = {<No data fields>}, <No data fields>}, <No data fields>}, <std::_Head_base<0, magma::Status::state*, false>> = {
                            _M_head_impl = 0x0}, <No data fields>}, <No data fields>}}, <No data fields>}}}
       
      #13 0x0000000000894705 in MagmaKVStore::commit (this=0x7f2e4da97f00, txnCtx=..., commitData=...)
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/kvstore/magma-kvstore/magma-kvstore.h:808
              ctx = <error reading variable>
              kvctx = {commitData = @0x7f2e63fecee0, onDiskPrepareDelta = 0, onDiskPrepareBytesDelta = 0}
              success = true
              errCode = <optimized out>
      #14 0x000000000083aaa5 in EPBucket::commit (this=0x7f2e4da9c000, kvstore=..., txnCtx=..., commitData=...)
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/ep_bucket.cc:964
              timer = {dest = 0x7f2e4da007e0, start = {__d = {__r = 3140273573013469}}, name = 0xc9e70d "disk_commit", out = 0x0}
              commit_start = <optimized out>
              vbid = {vbid = 722}
              res = <optimized out>
              commit_time = <optimized out>
      

      QE-TEST:

      guides/gradlew --refresh-dependencies testrunner -P jython=/opt/jython/bin/jython -P 'args=-i /tmp/testexec.104023.ini rerun=False,disk_optimized_thread_settings=True,get-cbcollect-info=True,autoCompactionDefined=true,get-cbcollect-info=True,infra_log_level=info,log_level=info,upgrade_version=7.2.1-5912,sirius_url=http://172.23.120.103:4000 -t bucket_collections.collections_network_split.CollectionsNetworkSplit.test_collections_crud_with_network_split,nodes_init=4,bucket_spec=magma_dgm.30_percent_dgm.4_node_2_replica_magma_512,doc_size=512,randomize_value=True,subsequent_action=rebalance-out,allow_unsafe=True'
      

      Job Name: magma-nsserv_network_split_30DGM
      Job: http://qe-jenkins1.sc.couchbase.com/job/test_suite_executor-TAF/30723/consoleFull

      Attachments

        For Gerrit Dashboard: MB-58261
        # Subject Branch Project Status CR V

        Activity

          People

            ankush.sharma Ankush Sharma
            ankush.sharma Ankush Sharma
            Votes:
            0 Vote for this issue
            Watchers:
            8 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Gerrit Reviews

                There are no open Gerrit changes

                PagerDuty