Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-35631

[Jepsen] Crash due to uncaught pre-condition failure in EPBucket::flushVBucket

    XMLWordPrintable

Details

    • Untriaged
    • Unknown
    • KV-Engine MH 2nd Beta

    Description

      During the kv-engine-jepsen-nightly-226 for EE build 6.5.0-4072. there have been multiple crash dumps generated due to a GSL precondition failure in EPBucket::flushVBucket of  ep_bucket.cc:547.

      546
                      if (hcs) {
      547
                          Expects(hcs > vbstate.highCompletedSeqno);
      548
                          vbstate.highCompletedSeqno = *hcs;
      549
                      }
      

      This failure and then crash occurred over ten times on each node we have a cbcollect for all for the same pre-contidation failure.
      Full backtrace of the crash bellow for dump file 0645c7d4-d9f1-9382-5bb30fd8-422cdd7e.dmp:

      gdb) bt full
      #0  0x00007fd033d7d428 in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54
              resultvar = 0
              pd = <optimized out>
              pid = 3381
              selftid = 3402
      #1  0x00007fd033d7f02a in __GI_abort () at abort.c:89
              save_stage = 2
              act = {__sigaction_handler = {sa_handler = 0xfffdd6, sa_sigaction = 0xfffdd6}, sa_mask = {__val = {16777050, 0, 0, 0, 0, 140532203443651, 140531388629056, 140531778994176, 140532199990281, 140532203443520, 140532203443520, 10, 140530860165648, 140531388629056, 140532199991323, 140532203443520}}, sa_flags = 881704208, sa_restorer = 0x7fd01b36be38}
              sigs = {__val = {32, 0 <repeats 15 times>}}
      #2  0x00007fd0348ded25 in __gnu_cxx::__verbose_terminate_handler () at /tmp/deploy/objdir/../gcc-7.3.0/libstdc++-v3/libsupc++/vterminate.cc:95
              terminating = false
              t = <optimized out>
      #3  0x000000000053f03d in backtrace_terminate_handler () at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/utilities/terminate_handler.cc:86
      No locals.
      #4  0x00007fd0348dcb16 in __cxxabiv1::__terminate (handler=<optimized out>) at /tmp/deploy/objdir/../gcc-7.3.0/libstdc++-v3/libsupc++/eh_terminate.cc:47
      No locals.
      #5  0x00007fd0348dcb61 in std::terminate () at /tmp/deploy/objdir/../gcc-7.3.0/libstdc++-v3/libsupc++/eh_terminate.cc:57
      No locals.
      #6  0x00007fd0348dcda3 in __cxxabiv1::__cxa_throw (obj=obj@entry=0x7fcfe40009f0, tinfo=tinfo@entry=0x7fd02fa9d8b0 <typeinfo for gsl::fail_fast>, dest=dest@entry=0x7fd02f5c8590 <gsl::fail_fast::~fail_fast()>) at /tmp/deploy/objdir/../gcc-7.3.0/libstdc++-v3/libsupc++/eh_throw.cc:93
              globals = <optimized out>
              header = 0x7fcfe4000970
      #7  0x00007fd02f643f08 in gsl::fail_fast_assert (cond=<optimized out>, message=0x7fd02f7f69e8 "GSL: Precondition failure at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/ep_bucket.cc: 547") at /home/couchbase/jenkins/workspace/couchbase-server-unix/third_party/gsl-lite/include/gsl/gsl-lite.h:473
      No locals.
      #8  0x00007fd02f658427 in EPBucket::flushVBucket (this=0x7fd01a97e000, vbid=...) at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/ep_bucket.cc:547
              rlh = {lock_ = 0x7fd01b60e2a0, token_ = {type_ = folly::SharedMutexToken::Type::INLINE_SHARED, slot_ = 0}}
              options = VBStatePersist::VBSTATE_CACHE_UPDATE_ONLY
              prev = <optimized out>
              maxSeqno = 959
              minSeqno = 958
              mustCheckpointVBState = <optimized out>
              collectionFlush = {needsPurge = false, mutated = {
                  _M_h = {<std::__detail::_Hashtable_base<CollectionID, CollectionID, std::__detail::_Identity, std::equal_to<CollectionID>, std::hash<CollectionID>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Hashtable_traits<true, true, true> >> = {<std::__detail::_Hash_code_base<CollectionID, CollectionID, std::__detail::_Identity, std::hash<CollectionID>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, true>> = {<std::__detail::_Hashtable_ebo_helper<0, std::__detail::_Identity, true>> = {<std::__detail::_Identity> = {<No data fields>}, <No data fields>}, <std::__detail::_Hashtable_ebo_helper<1, std::hash<CollectionID>, true>> = {<std::hash<CollectionID>> = {<No data fields>}, <No data fields>}, <std::__detail::_Hashtable_ebo_helper<2, std::__detail::_Mod_range_hashing, true>> = {<std::__detail::_Mod_range_hashing> = {<No data fields>}, <No data fields>}, <No data fields>}, <std::__detail::_Hashtable_ebo_helper<0, std::equal_to<CollectionID>, true>> = {<std::equal_to<CollectionID>> = {<std::binary_function<CollectionID, CollectionID, bool>> = {<No data fields>}, <No data fields>}, <No data fields>}, <No data fields>}, <std::__detail::_Map_base<CollectionID, CollectionID, std::allocator<CollectionID>, std::__detail::_Identity, std::equal_to<CollectionID>, std::hash<CollectionID>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, true, true>, true>> = {<No data fields>}, <std::__detail::_Insert<CollectionID, CollectionID, std::allocator<CollectionID>, std::__detail::_Identity, std::equal_to<CollectionID>, std::hash<CollectionID>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, true, true>, true>> = {<std::__detail::_Insert_base<CollectionID, CollectionID, std::allocator<CollectionID>, std::__detail::_Identity, std::equal_to<CollectionID>, std::hash<CollectionID>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, true, true> >> = {<No data fields>}, <No data fields>}, <std::__detail::_Rehash_base<CollectionID, CollectionID, std::allocator<CollectionID>, std::__detail::_Identity, std::equal_to<CollectionID>, std::hash<CollectionID>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, true, true>, std::integral_constant<bool, true> >> = {<No data fields>}, <std::__detail::_Equality<CollectionID, CollectionID, std::allocator<CollectionID>, std::__detail::_Identity, std::equal_to<CollectionID>, std::hash<CollectionID>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, true, true>, true>> = {<No data fields>}, <std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<CollectionID, true> > >> = {<std::__detail::_Hashtable_ebo_helper<0, std::allocator<std::__detail::_Hash_node<CollectionID, true> >, true>> = {<std::allocator<std::__detail::_Hash_node<CollectionID, true> >> = {<__gnu_cxx::new_allocator<std::__detail::_Hash_node<CollectionID, true> >> = {<No data fields>}, <No data fields>}, <No data fields>}, <No data fields>}, _M_buckets = 0x7fd0037fbf78, _M_bucket_count = 1, _M_before_begin = {_M_nxt = 0x0}, _M_element_count = 0, _M_rehash_policy = {static _S_growth_factor = 2, _M_max_load_factor = 1, 
                      _M_next_resize = 0}, _M_single_bucket = 0x0}}, manifest = @0x7fd01b599300}
              flush_end = <optimized out>
              persistedVbState = <optimized out>
              inMemoryVbState = {static CurrentVersion = 3, state = vbucket_state_replica, checkpointId = 0, maxDeletedSeqno = {counter = {_M_elems = "\000\000\000\000\000"}}, highSeqno = 960, purgeSeqno = 0, lastSnapStart = 912, lastSnapEnd = 938, maxCas = 1566370827597971456, hlcCasEpochSeqno = 23, mightContainXattrs = false, failovers = {
                  static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x7fd033679250 <error: Cannot access memory at address 0x7fd033679250>}, _M_string_length = 65, {_M_local_buf = "A\000\000\000\000\000\000\000\000\023?5?\177\000", 
                    _M_allocated_capacity = 65}}, supportsNamespaces = true, replicationTopology = {m_type = nlohmann::detail::value_t::null, m_value = {object = 0x0, array = 0x0, string = 0x0, boolean = false, number_integer = 0, number_unsigned = 0, number_float = 0}}, version = 3, highCompletedSeqno = 0, highPreparedSeqno = 0, onDiskPrepares = 0}
              hcs = {<boost::optional_detail::tc_optional_base<unsigned long>> = {<boost::optional_detail::optional_tag> = {<No data fields>}, m_initialized = true, m_storage = 693}, <No data fields>}
              vbstate = {static CurrentVersion = 3, state = vbucket_state_replica, checkpointId = 0, maxDeletedSeqno = {counter = {_M_elems = "\000\000\000\000\000"}}, highSeqno = 938, purgeSeqno = 0, lastSnapStart = 938, lastSnapEnd = 959, maxCas = 1566370818577006592, hlcCasEpochSeqno = 23, mightContainXattrs = false, failovers = {
                  static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x7fd01b6cdb60 <error: Cannot access memory at address 0x7fd01b6cdb60>}, _M_string_length = 65, {_M_local_buf = "A\000\000\000\000\000\000\000\000ި\032?\177\000", 
                    _M_allocated_capacity = 65}}, supportsNamespaces = true, replicationTopology = {m_type = nlohmann::detail::value_t::null, m_value = {object = 0x0, array = 0x0, string = 0x0, boolean = false, number_integer = 0, number_unsigned = 0, number_float = 0}}, version = 3, highCompletedSeqno = 937, highPreparedSeqno = 937, onDiskPrepares = 1}
              hps = <optimized out>
              trans_time = <optimized out>
              toFlush = {items = {<std::_Vector_base<SingleThreadedRCPtr<Item, Item*, std::default_delete<Item> >, std::allocator<SingleThreadedRCPtr<Item, Item*, std::default_delete<Item> > > >> = {
                    _M_impl = {<std::allocator<SingleThreadedRCPtr<Item, Item*, std::default_delete<Item> > >> = {<__gnu_cxx::new_allocator<SingleThreadedRCPtr<Item, Item*, std::default_delete<Item> > >> = {<No data fields>}, <No data fields>}, _M_start = 0x7fd01ac714a0, _M_finish = 0x7fd01ac714c0, _M_end_of_storage = 0x7fd01ac714c0}}, <No data fields>}, range = {
                  start = 938, end = 959}, moreAvailable = true, highCompletedSeqno = {<boost::optional_detail::tc_optional_base<unsigned long>> = {<boost::optional_detail::optional_tag> = {<No data fields>}, m_initialized = true, m_storage = 693}, <No data fields>}}
              items = @0x7fd0037fbf00: {<std::_Vector_base<SingleThreadedRCPtr<Item, Item*, std::default_delete<Item> >, std::allocator<SingleThreadedRCPtr<Item, Item*, std::default_delete<Item> > > >> = {
                  _M_impl = {<std::allocator<SingleThreadedRCPtr<Item, Item*, std::default_delete<Item> > >> = {<__gnu_cxx::new_allocator<SingleThreadedRCPtr<Item, Item*, std::default_delete<Item> > >> = {<No data fields>}, <No data fields>}, _M_start = 0x7fd01ac714a0, _M_finish = 0x7fd01ac714c0, _M_end_of_storage = 0x7fd01ac714c0}}, <No data fields>}
              range = @0x7fd0037fbf18: {start = 938, end = 959}
              rwUnderlying = 0x7fd01ac44000
              shard = <optimized out>
              items_flushed = <optimized out>
              moreAvailable = true
              flush_start = <optimized out>
              vb = {vb = {<std::__shared_ptr<VBucket, (__gnu_cxx::_Lock_policy)2>> = {<std::__shared_ptr_access<VBucket, (__gnu_cxx::_Lock_policy)2, false, false>> = {<No data fields>}, _M_ptr = 0x7fd01b60e000, _M_refcount = {_M_pi = 0x7fd01b63a4a0}}, <No data fields>}, lock = {_M_device = 0x7fd01ad28c58, _M_owns = true}}
      #9  0x00007fd02f6a61f5 in Flusher::flushVB (this=0x7fd01adb7a00) at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/flusher.cc:304
              vbid = {vbid = 591}
      #10 0x00007fd02f6a66bd in Flusher::step (this=0x7fd01adb7a00, task=0x7fd01a8a4bd0) at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/flusher.cc:211
              currentState = <optimized out>
      #11 0x00007fd02f69fa9f in ExecutorThread::run (this=0x7fd01a8e6c40) at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/executorthread.cc:153
              engine = <optimized out>
              curTaskDescr = {static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x7fd01acec8c0 <error: Cannot access memory at address 0x7fd01acec8c0>}, _M_string_length = 31, {_M_local_buf = "\037\000\000\000\000\000\000\000??\177\003?\177\000", 
                  _M_allocated_capacity = 31}}
              woketime = <optimized out>
              scheduleOverhead = <optimized out>
              again = <optimized out>
              runtime = <optimized out>
              q = <optimized out>
              tick = <optimized out>
      #12 0x00007fd0366ebe77 in CouchbaseThread::run (this=0x7fd01ad16350) at /home/couchbase/jenkins/workspace/couchbase-server-unix/platform/src/cb_pthreads.cc:58
      No locals.
      #13 platform_thread_wrap (arg=0x7fd01ad16350) at /home/couchbase/jenkins/workspace/couchbase-server-unix/platform/src/cb_pthreads.cc:71
              context = {_M_t = {
                  _M_t = {<std::_Tuple_impl<0, CouchbaseThread*, std::default_delete<CouchbaseThread> >> = {<std::_Tuple_impl<1, std::default_delete<CouchbaseThread> >> = {<std::_Head_base<1, std::default_delete<CouchbaseThread>, true>> = {<std::default_delete<CouchbaseThread>> = {<No data fields>}, <No data fields>}, <No data fields>}, <std::_Head_base<0, CouchbaseThread*, false>> = {_M_head_impl = 0x7fd01ad16350}, <No data fields>}, <No data fields>}}}
      #14 0x00007fd0341196ba in start_thread (arg=0x7fd0037fe700) at pthread_create.c:333
              __res = <optimized out>
              pd = 0x7fd0037fe700
              now = <optimized out>
              unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140531388638976, -3344730857694655508, 0, 140532011706863, 140531388639680, 140531775466560, 3329611471395130348, 3329647912591854572}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
              not_first_call = <optimized out>
              pagesize_m1 = <optimized out>
              sp = <optimized out>
              freesize = <optimized out>
              __PRETTY_FUNCTION__ = "start_thread"
      #15 0x00007fd033e4f41d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
      No locals.
      

      I've also attached are the nodes cbcollects with tcp packet dumps, these will also contain all the crash dumps that occurred.

      Attachments

        Issue Links

          No reviews matched the request. Check your Options in the drop-down menu of this sections header.

          Activity

            People

              richard.demellow Richard deMellow
              richard.demellow Richard deMellow
              Votes:
              0 Vote for this issue
              Watchers:
              6 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                PagerDuty