Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-55709

[CDC] Memcached crashed in Cookie::initialize(cb::mcbp::Header const&, bool) () at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/daemon/cookie.cc

    XMLWordPrintable

Details

    Description

      Steps To Recreate:

      1. Create a 3 node cluster
      2. Create a magma buckets with (bucket_history_retention_seconds=86400,bucket_history_retention_bytes=99636764160,) (vbuckets = 16, replicas = 2)
      3. Create 9 collections(total collection count is 10, including default collection)
      4. After creating collection update the collection history setting to true
      5. Create 500000 docs in each of the collection
      6. Perform cont. dedupe mutations(for 10000 docs)(100 iterations)
      7. Keep killing memcached (sleep between two memcached kills is 30 to 60 seconds)
      8. while data loading and sigkills are going on delete three collections, recreate collections with same name
      9. Set bucket_history_retention_bytes=10000000000(total disk size at this moment is 40GB) and trigger compaction (SIGKILLs and data loading is also going on
      10. Observed Memcached crashed in Cookie::initialize(cb::mcbp::Header const&, bool) () at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/daemon/cookie.cc

      Below Core Dump is on node 172.23.107.76

      BackTrace:

      (gdb) bt full
      #0  0x0000000000609331 in _S_right (__x=0x7463656c6c6f6320) at /opt/gcc-10.2.0/include/c++/10.2.0/bits/stl_tree.h:1917
      No locals.
      #1  std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, nlohmann::basic_json<std::map, std::vector, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, bool, long, unsigned long, double, std::allocator, nlohmann::adl_serializer, std::vector<unsigned char, std::allocator<unsigned char> > > >, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, nlohmann::basic_json<std::map, std::vector, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, bool, long, unsigned long, double, std::allocator, nlohmann::adl_serializer, std::vector<unsigned char, std::allocator<unsigned char> > > > >, std::less<void>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, nlohmann::basic_json<std::map, std::vector, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, bool, long, unsigned long, double, std::allocator, nlohmann::adl_serializer, std::vector<unsigned char, std::allocator<unsigned char> > > > > >::_M_erase
          (__x=0x7463656c6c6f6320, this=0x7fbf5fd082c0) at /opt/gcc-10.2.0/include/c++/10.2.0/bits/stl_tree.h:1919
              __y = <optimized out>
      #2  0x0000000000609dd0 in clear (this=0x7fbf5fd082c0) at /opt/gcc-10.2.0/include/c++/10.2.0/ext/atomicity.h:84
      No locals.
      #3  clear (this=0x7fbf5fd082c0) at /opt/gcc-10.2.0/include/c++/10.2.0/bits/stl_map.h:1134
      No locals.
      #4  clear (this=<optimized out>, this=<optimized out>)
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/server_build/tlm/deps/json.exploded/include/nlohmann/json.hpp:5196
      No locals.
      #5  clear (this=0x7fc02c37f140)
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/server_build/tlm/deps/json.exploded/include/nlohmann/json.hpp:5148
      No locals.
      #6  Cookie::reset() () at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/daemon/cookie.cc:550
      No locals.
      #7  0x0000000000609ee5 in Cookie::initialize(cb::mcbp::Header const&, bool) ()
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/daemon/cookie.cc:439
      No locals.
      #8  0x00000000005ef425 in Connection::executeCommandPipeline() ()
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/daemon/connection.cc:1529
              drainSize = <optimized out>
              limit = <optimized out>
              status = <optimized out>
              input = 0x7fc06458ff40
              stop = false
              maxActiveCommands = 32
              active = false
      ---Type <return> to continue, or q <return> to quit---
              maxSendQueueSize = 41943040
      #9  0x00000000005ef972 in Connection::executeCommandsCallback() ()
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/daemon/connection.cc:801
              start = {__d = {__r = 39380422152823131}}
              ns = <optimized out>
      #10 0x00000000005efffb in Connection::rw_callback(bufferevent*, void*) ()
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/daemon/connection.cc:950
              instance = <optimized out>
              thread = <optimized out>
              phosphor_internal_category_enabled_945 = {_M_b = {_M_p = 0x0}, static is_always_lock_free = <optimized out>}
              phosphor_internal_category_enabled_temp_945 = <optimized out>
              phosphor_internal_tpi_wait_945 = {category = 0xcc0feb "mutex", name = 0xc5f5f0 "Connection::rw_callback::threadLock.wait",
                type = phosphor::Complete, argument_names = {_M_elems = {0xcbadf9 "this", 0xcc1a82 ""}}, argument_types = {_M_elems = {
                    phosphor::is_pointer, phosphor::is_none}}}
              phosphor_internal_tpi_held_945 = {category = 0xcc0feb "mutex", name = 0xc5f5c0 "Connection::rw_callback::threadLock.held",
                type = phosphor::Complete, argument_names = {_M_elems = {0xcc1a82 "", 0xcc1a82 ""}}, argument_types = {_M_elems = {
                    phosphor::is_pointer, phosphor::is_none}}}
              phosphor_internal_guard_945 = {tpiWait = 0xc5fe00 <Connection::rw_callback(bufferevent*, void*)::phosphor_internal_tpi_wait_945>,
                tpiHeld = 0xc5fdc0 <Connection::rw_callback(bufferevent*, void*)::phosphor_internal_tpi_held_945>, enabled = true,
                mutex = @0x7fc06452e6f0, threshold = {__r = 10000000}, start = {__d = {__r = 39380422152820879}}, lockedAt = {__d = {
                    __r = 39380422152821918}}, releasedAt = {__d = {__r = 0}}}
      #11 0x00007fc06748ad8e in bufferevent_run_deferred_callbacks_unlocked (cb=<optimized out>, arg=0x7fc02c37f300)
          at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/libevent/libevent-prefix/src/libevent/bufferevent.c:208
              readcb = 0x5efed0 <Connection::rw_callback(bufferevent*, void*)>
              cbarg = 0x7fc02c37eb80
              bufev_private = 0x7fc02c37f300
              bufev = 0x7fc02c37f300
      #12 0x00007fc067493d01 in event_process_active_single_queue (base=base@entry=0x7fc06448c200,
          max_to_process=max_to_process@entry=2147483647, endtime=endtime@entry=0x0, activeq=<optimized out>)
          at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/libevent/libevent-prefix/src/libevent/event.c:1726
              evcb_cbfinalize = <optimized out>
              ev = 0x0
              evcb = 0x7fc02c37f4a0
              count = 2
      #13 0x00007fc06749454f in event_process_active (base=0x7fc06448c200)
          at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/libevent/libevent-prefix/src/libevent/event.c:1789
      ---Type <return> to continue, or q <return> to quit---
              activeq = <optimized out>
              i = 0
              c = 0
              tv = {tv_sec = 4476704, tv_usec = 1}
              maxcb = 2147483647
              endtime = 0x0
              limit_after_prio = 2147483647
      #14 event_base_loop (base=0x7fc06448c200, flags=1)
          at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/libevent/libevent-prefix/src/libevent/event.c:2012
              evsel = 0x7fc0676a7c20 <epollops>
              tv = {tv_sec = 48, tv_usec = 140464293602464}
              tv_p = <optimized out>
              res = <optimized out>
              done = 0
              retval = 0
              __func__ = "event_base_loop"
      #15 0x0000000000c1e9c7 in folly::EventBase::loopBody (this=this@entry=0x7fc06452e4a0, flags=flags@entry=0,
          ignoreKeepAlive=ignoreKeepAlive@entry=false)
          at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/io/async/EventBase.cpp:397
              callbacks = {<boost::intrusive::list_impl<boost::intrusive::bhtraits<folly::EventBase::LoopCallback, boost::intrusive::list_node_traits<void*>, (boost::intrusive::link_mode_type)2, boost::intrusive::dft_tag, 1>, unsigned long, false, void>> = {
                  static constant_time_size = false, static stateful_value_traits = <optimized out>,
                  static has_container_from_iterator = <optimized out>, static safemode_or_autounlink = true,
                  data_ = {<boost::intrusive::bhtraits<folly::EventBase::LoopCallback, boost::intrusive::list_node_traits<void*>, (boost::intrusive::link_mode_type)2, boost::intrusive::dft_tag, 1>> = {<boost::intrusive::bhtraits_base<folly::EventBase::LoopCallback, boost::intrusive::list_node<void*>*, boost::intrusive::dft_tag, 1>> = {<No data fields>}, static link_mode = boost::intrusive::auto_unlink},
                    root_plus_size_ = {<boost::intrusive::detail::size_holder<false, unsigned long, void>> = {
                        static constant_time_size = <optimized out>}, m_header = {<boost::intrusive::list_node<void*>> = {
                          next_ = 0x7fc05a7ea920, prev_ = 0x7fc05a7ea920}, <No data fields>}}}}, <No data fields>}
              message = 0xcdbee8 "Your code just tried to loop over an event base from inside another event base loop. Since libevent is not reentrant, this leads to undefined behavior in opt builds. Please fix immediately. For the co"...
              SCOPE_EXIT_STATE13 = {<folly::detail::ScopeGuardImplBase> = {dismissed_ = false}, function_ = {__this = 0x7fc06452e4a0}}
              res = 0
              ranLoopCallbacks = <optimized out>
              blocking = true
              once = false
      ---Type <return> to continue, or q <return> to quit---
              prev = {__d = {__r = 39380370803616103}}
              idleStart = {__d = {__r = 39380422152810793}}
              busy = <optimized out>
              idle = <optimized out>
              prevLoopThread = {_M_thread = 0}
      #16 0x0000000000c1ee96 in folly::EventBase::loop (this=this@entry=0x7fc06452e4a0)
          at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/io/async/EventBase.cpp:315
              guard = {list_ = {forbid = true, prev = 0x0, curr = {name = {static npos = <optimized out>, b_ = 0xcdbb5b "EventBase",
                      e_ = 0xcdbb64 ""}}}}
      #17 0x0000000000c20856 in folly::EventBase::loopForever (this=this@entry=0x7fc06452e4a0)
          at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/io/async/EventBase.cpp:538
              ret = <optimized out>
      #18 0x00000000005ba3c9 in worker_libevent (arg=0x7fc06452e490)
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/daemon/thread.cc:257
              me = @0x7fc06452e490: <error reading variable>
      #19 0x0000000000ba3599 in run (this=0x7fc0649b7520)
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/platform/src/cb_pthreads.cc:51
      No locals.
      #20 platform_thread_wrap(void*) () at /home/couchbase/jenkins/workspace/couchbase-server-unix/platform/src/cb_pthreads.cc:64
      No locals.
      #21 0x00007fc0681c8ea5 in start_thread () from /lib64/libpthread.so.0
      No symbol table info available.
      #22 0x00007fc065b09b0d in clone () from /lib64/libc.so.6
      No symbol table info available.
      

      QE-TEST:

      guides/gradlew --refresh-dependencies testrunner -P jython=/opt/jython/bin/jython -P 'args=-i /tmp/temp_vol.ini -p bucket_storage=magma,bucket_ram_quota=1024,init_loading=True,bucket_eviction_policy=fullEviction,rerun=False -t storage.magma.magma_crash_recovery.MagmaCrashTests.test_crash_during_dedupe,nodes_init=3,skip_cleanup=True,num_items=500000,doc_size=1024,batch_size=100,sdk_timeout=60,log_level=info,infra_log_level=info,key_size=12,num_collections=10,ops_rate=20000,key_type=RandomKey,vbuckets=16,replicas=2,test_itr=3,bucket_history_retention_seconds=86400,bucket_history_retention_bytes=99636764160,standard_buckets=1,magma_buckets=1,num_scopes=1,autoCompactionDefined=true,meta_purge_interval=120,randomize_value=True -m rest'
      

      Attachments

        Issue Links

          No reviews matched the request. Check your Options in the drop-down menu of this sections header.

          Activity

            People

              ankush.sharma Ankush Sharma
              ankush.sharma Ankush Sharma
              Votes:
              0 Vote for this issue
              Watchers:
              10 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Gerrit Reviews

                  There are no open Gerrit changes

                  PagerDuty