Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-45645

Rebalance In a node to a cluster where disk is full on all the nodes leads to magma crash.

    XMLWordPrintable

Details

    • Bug
    • Resolution: Fixed
    • Critical
    • 7.0.0
    • Cheshire-Cat
    • storage-engine
    • 7.0.0-4907
    • Untriaged
    • 1
    • Unknown

    Description

      172.23.120.170: Stack Trace of first crash - 6432f949-b23e-4299-a1fe4086-60c0606d.dmp

      Core was generated by `/opt/couchbase/bin/memcached -C /opt/couchbase/var/lib/couchbase/config/memcach'.
       #0  0x00007f0e14209387 in raise () from /lib64/libc.so.6
       #0  0x00007f0e14209387 in raise () from /lib64/libc.so.6
       No symbol table info available.
       #1  0x00007f0e1420aa78 in abort () from /lib64/libc.so.6
       No symbol table info available.
       #2  0x00007f0e14d7063c in __gnu_cxx::__verbose_terminate_handler () at /tmp/deploy/objdir/../gcc-10.2.0/libstdc++-v3/libsupc++/vterminate.cc:95
               terminating = false
               t = <optimized out>
       #3  0x00000000005c434f in backtrace_terminate_handler() () at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/utilities/terminate_handler.cc:93
       No locals.
       #4  0x00007f0e14d7b8f6 in __cxxabiv1::__terminate(void (*)()) () at /tmp/deploy/objdir/../gcc-10.2.0/libstdc++-v3/libsupc++/eh_terminate.cc:48
       No locals.
       #5  0x00007f0e14d7b961 in std::terminate () at /tmp/deploy/objdir/../gcc-10.2.0/libstdc++-v3/libsupc++/eh_terminate.cc:58
       No locals.
       #6  0x00007f0e14d7bbf4 in __cxxabiv1::__cxa_throw (obj=obj@entry=0x7f0d74000940, tinfo=0x892d60 <typeinfo for std::runtime_error>, dest=0x419470 <_ZNSt13runtime_errorD1Ev@plt>) at /tmp/deploy/objdir/../gcc-10.2.0/libstdc++-v3/libsupc++/eh_throw.cc:95
               globals = <optimized out>
               header = 0x7f0d740008c0
       #7  0x00007f0e17caa204 in magma::BySeqIterator::setSeqno (this=0x7f0b5aa32a80, newSeqno=139696143217552) at /home/couchbase/jenkins/workspace/couchbase-server-unix/magma/magma/kvstore/iterator.cc:72
       No locals.
       #8  0x00007f0e17d70e80 in magma::BySeqIterator::doFetch (this=0x7f0b5aa32a80) at /home/couchbase/jenkins/workspace/couchbase-server-unix/magma/magma/kvstore/format.h:89
       No locals.
       #9  0x00007f0e188d4c41 in MagmaKVStore::scan(BySeqnoScanContext&) () at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/magma-kvstore/magma-kvstore.cc:1532
               startSeqno = <optimized out>
               onlyKeys = <optimized out>
               mctx = @0x7f0d4bc29fe0: <error reading variable>
       #10 0x00007f0e187124b3 in DCPBackfillBySeqnoDisk::scan() () at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/dcp/backfill_by_seqno_disk.cc:186
               aliveVBStates = {bits = {<std::_Base_bitset<1>> = {_M_w = 0}, <No data fields>}}
               activeStreamLoggingPrefix = {static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x0}, _M_string_length = 0, {_M_local_buf = '\000' <repeats 15 times>, _M_allocated_capacity = 0}}
               NoopSyncWriteCompleteCb = {<std::_Maybe_unary_or_binary_function<void, void const*, cb::engine_errc>> = {<std::binary_function<void const*, cb::engine_errc, void>> = {<No data fields>}, <No data fields>}, <std::_Function_base> = {static _M_max_size = 16, static _M_max_align = 8, _M_functor = {_M_unused = {_M_object = 0x0, _M_const_object = 0x0, _M_function_pointer = 0x0, _M_member_pointer = NULL}, _M_pod_data = '\000' <repeats 15 times>}, _M_manager = 0x0}, _M_invoker = 0x0}
               std::__ioinit = {static _S_refcount = 0, static _S_synced_with_stdio = true}
               NoopSeqnoAckCb = {<std::_Maybe_unary_or_binary_function<void, Vbid, long>> = {<std::binary_function<Vbid, long, void>> = {<No data fields>}, <No data fields>}, <std::_Function_base> = {static _M_max_size = 16, static _M_max_align = 8, _M_functor = {_M_unused = {_M_object = 0x0, _M_const_object = 0x0, _M_function_pointer = 0x0, _M_member_pointer = NULL}, _M_pod_data = '\000' <repeats 15 times>}, _M_manager = 0x0}, _M_invoker = 0x0}
               globalBucketLoggerName = {static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x0}, _M_string_length = 0, {_M_local_buf = '\000' <repeats 15 times>, _M_allocated_capacity = 0}}
               fmt::v5::internal::basic_data<void>::POWERS_OF_10_32 = {0, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000}
               fmt::v5::internal::basic_data<void>::POWERS_OF_10_64 = {0, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000, 10000000000000000, 100000000000000000, 1000000000000000000, 10000000000000000000}
               fmt::v5::internal::basic_data<void>::DIGITS = "00010203040506070809101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899"
               flatbuffers::flatbuffer_version_string = 0x3dd7d5 <Address 0x3dd7d5 out of bounds>
               boost::exception_detail::exception_ptr_static_exception_object<boost::exception_detail::bad_exception_>::e = {ptr_ = {px = 0x0, pn = {pi_ = 0x0}}}
               boost::exception_detail::exception_ptr_static_exception_object<boost::exception_detail::bad_alloc_>::e = {ptr_ = {px = 0x0, pn = {pi_ = 0x0}}}
       #11 0x00007f0e1871661a in DCPBackfillDisk::run() () at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/dcp/backfill_disk.cc:140
               lh = {_M_device = @0x7f0dfc2a7908}
       #12 0x00007f0e18718ba1 in BackfillManager::backfill() () at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/dcp/backfill-manager.cc:338
               aliveVBStates = {bits = {<std::_Base_bitset<1>> = {_M_w = 0}, <No data fields>}}
               std::__ioinit = {static _S_refcount = 0, static _S_synced_with_stdio = true}
               NoopSeqnoAckCb = {<std::_Maybe_unary_or_binary_function<void, Vbid, long>> = {<std::binary_function<Vbid, long, void>> = {<No data fields>}, <No data fields>}, <std::_Function_base> = {static _M_max_size = 16, static _M_max_align = 8, _M_functor = {_M_unused = {_M_object = 0x0, _M_const_object = 0x0, _M_function_pointer = 0x0, _M_member_pointer = NULL}, _M_pod_data = '\000' <repeats 15 times>}, _M_manager = 0x0}, _M_invoker = 0x0}
               NoopSyncWriteCompleteCb = {<std::_Maybe_unary_or_binary_function<void, void const*, cb::engine_errc>> = {<std::binary_function<void const*, cb::engine_errc, void>> = {<No data fields>}, <No data fields>}, <std::_Function_base> = {static _M_max_size = 16, static _M_max_align = 8, _M_functor = {_M_unused = {_M_object = 0x0, _M_const_object = 0x0, _M_function_pointer = 0x0, _M_member_pointer = NULL}, _M_pod_data = '\000' <repeats 15 times>}, _M_manager = 0x0}, _M_invoker = 0x0}
               globalBucketLoggerName = {static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x0}, _M_string_length = 0, {_M_local_buf = '\000' <repeats 15 times>, _M_allocated_capacity = 0}}
               boost::exception_detail::exception_ptr_static_exception_object<boost::exception_detail::bad_alloc_>::e = {ptr_ = {px = 0x0, pn = {pi_ = 0x0}}}
               boost::exception_detail::exception_ptr_static_exception_object<boost::exception_detail::bad_exception_>::e = {ptr_ = {px = 0x0, pn = {pi_ = 0x0}}}
               fmt::v5::internal::basic_data<void>::POWERS_OF_10_32 = {0, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000}
               fmt::v5::internal::basic_data<void>::POWERS_OF_10_64 = {0, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000, 10000000000000000, 100000000000000000, 1000000000000000000, 10000000000000000000}
               fmt::v5::internal::basic_data<void>::DIGITS = "00010203040506070809101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899"
       #13 0x00007f0e18718e14 in BackfillManagerTask::run() () at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/dcp/backfill-manager.cc:76
               aliveVBStates = {bits = {<std::_Base_bitset<1>> = {_M_w = 0}, <No data fields>}}
               std::__ioinit = {static _S_refcount = 0, static _S_synced_with_stdio = true}
               NoopSeqnoAckCb = {<std::_Maybe_unary_or_binary_function<void, Vbid, long>> = {<std::binary_function<Vbid, long, void>> = {<No data fields>}, <No data fields>}, <std::_Function_base> = {static _M_max_size = 16, static _M_max_align = 8, _M_functor = {_M_unused = {_M_object = 0x0, _M_const_object = 0x0, _M_function_pointer = 0x0, _M_member_pointer = NULL}, _M_pod_data = '\000' <repeats 15 times>}, _M_manager = 0x0}, _M_invoker = 0x0}
               NoopSyncWriteCompleteCb = {<std::_Maybe_unary_or_binary_function<void, void const*, cb::engine_errc>> = {<std::binary_function<void const*, cb::engine_errc, void>> = {<No data fields>}, <No data fields>}, <std::_Function_base> = {static _M_max_size = 16, static _M_max_align = 8, _M_functor = {_M_unused = {_M_object = 0x0, _M_const_object = 0x0, _M_function_pointer = 0x0, _M_member_pointer = NULL}, _M_pod_data = '\000' <repeats 15 times>}, _M_manager = 0x0}, _M_invoker = 0x0}
               globalBucketLoggerName = {static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x0}, _M_string_length = 0, {_M_local_buf = '\000' <repeats 15 times>, _M_allocated_capacity = 0}}
               boost::exception_detail::exception_ptr_static_exception_object<boost::exception_detail::bad_alloc_>::e = {ptr_ = {px = 0x0, pn = {pi_ = 0x0}}}
               boost::exception_detail::exception_ptr_static_exception_object<boost::exception_detail::bad_exception_>::e = {ptr_ = {px = 0x0, pn = {pi_ = 0x0}}}
               fmt::v5::internal::basic_data<void>::POWERS_OF_10_32 = {0, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000}
               fmt::v5::internal::basic_data<void>::POWERS_OF_10_64 = {0, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000, 10000000000000000, 100000000000000000, 1000000000000000000, 10000000000000000000}
               fmt::v5::internal::basic_data<void>::DIGITS = "00010203040506070809101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899"
       #14 0x00007f0e187e4347 in GlobalTask::execute() () at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/globaltask.cc:74
               guard = {previous = 0x0}
       #15 0x00007f0e187df210 in FollyExecutorPool::TaskProxy::scheduleViaCPUPool()::{lambda()#1}::operator()() const (__closure=0x7f0d8afe3e30) at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/folly_executorpool.cc:196
               scheduleOverhead = <optimized out>
               start = {__d = {__r = 6569097517053717}}
               runAgain = false
               proxy = @0x7f0e13a52260: <error reading variable>
       #16 0x00007f0e187df210 in FollyExecutorPool::TaskProxy::timeoutExpired (this=<optimized out>) from /opt/couchbase/bin/../lib/libep.so
       No locals.
       #17 0x00007f0e1897b120 in operator() (this=0x7f0d8afe3e30) at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/Function.h:416
               fn = @0x7f0d8afe3e30: {<folly::detail::function::FunctionTraits<void()>> = {<No data fields>}, data_ = {big = 0x7f0e13a52260, tiny = {__data = "`\"\245\023\016\177\000\000\000\000\000\000\000\000\000\000\260#\245\023\016\177\000\000\373S\000\000\000\000\000\000\270\000\000\000\000\000\000\000\000?\376\212\r\177\000", __align = {<No data fields>}}}, call_ = 0x7f0e187df9a0 <folly::detail::function::FunctionTraits<void ()>::callSmall<FollyExecutorPool::TaskProxy::scheduleViaCPUPool()::{lambda()#1}>(folly::detail::function::Data&)>, exec_ = 0x7f0e187dcc10 <folly::detail::function::execSmall<FollyExecutorPool::TaskProxy::scheduleViaCPUPool()::{lambda()#1}>(folly::detail::function::Op, folly::detail::function::Data*, folly::detail::function::Data)>}
       #18 folly::ThreadPoolExecutor::runTask(std::shared_ptr<folly::ThreadPoolExecutor::Thread> const&, folly::ThreadPoolExecutor::Task&&) (this=0x7f0dfc875400, thread=..., task=<unknown type in /usr/lib/debug/opt/couchbase/lib/libep.so.debug, CU 0x3feb4d6, DIE 0x4031941>) at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/executors/ThreadPoolExecutor.cpp:97
               rctx = {prev_ = {<std::__shared_ptr<folly::RequestContext, (__gnu_cxx::_Lock_policy)2>> = {<std::__shared_ptr_access<folly::RequestContext, (__gnu_cxx::_Lock_policy)2, false, false>> = {<No data fields>}, _M_ptr = 0x0, _M_refcount = {_M_pi = 0x0}}, <No data fields>}}
               startTime = {__d = {__r = 6569097517050268}}
               stats = {expired = false, waitTime = {__r = 156728454}, runTime = {__r = 0}, enqueueTime = {__d = {__r = 6569097360321814}}, requestId = 0}
       #19 0x00007f0e18962dfa in folly::CPUThreadPoolExecutor::threadRun (this=0x7f0dfc875400, thread=...) at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/executors/CPUThreadPoolExecutor.cpp:265
               guard = {list_ = {forbid = true, prev = 0x0, curr = {name = {static npos = <optimized out>, b_ = 0x7f0e189f5033 "CPUThreadPoolExecutor", e_ = 0x7f0e189f5048 ""}}}}
       #20 0x00007f0e1897e0d9 in __invoke_impl<void, void (folly::ThreadPoolExecutor::*&)(std::shared_ptr<folly::ThreadPoolExecutor::Thread>), folly::ThreadPoolExecutor*&, std::shared_ptr<folly::ThreadPoolExecutor::Thread>&> (__t=<optimized out>, __f=<optimized out>) at /usr/local/include/c++/7.3.0/bits/invoke.h:73
       No locals.
       #21 __invoke<void (folly::ThreadPoolExecutor::*&)(std::shared_ptr<folly::ThreadPoolExecutor::Thread>), folly::ThreadPoolExecutor*&, std::shared_ptr<folly::ThreadPoolExecutor::Thread>&> (__fn=<optimized out>) at /usr/local/include/c++/7.3.0/bits/invoke.h:95
       No locals.
       #22 __call<void, 0, 1> (__args=<optimized out>, this=<optimized out>) at /usr/local/include/c++/7.3.0/functional:467
       No locals.
       #23 operator()<> (this=<optimized out>) at /usr/local/include/c++/7.3.0/functional:551
       No locals.
       #24 folly::detail::function::FunctionTraits<void ()>::callBig<std::_Bind<void (folly::ThreadPoolExecutor::*(folly::ThreadPoolExecutor*, std::shared_ptr<folly::ThreadPoolExecutor::Thread>))(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)> >(folly::detail::function::Data&) (p=...) at /home/couchbase/jenkins/workspace/cbdeps-platform-build-old/deps/packages/build/folly/folly-prefix/src/folly/folly/Function.h:401
               fn = <optimized out>
       #25 0x00007f0e187ddc4d in operator() (this=0x7f0dfc848280) at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/folly_executorpool.cc:54
               fn = @0x7f0dfc848280: <error reading variable>
       #26 operator() (__closure=0x7f0dfc848280) at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/engines/ep/src/folly_executorpool.cc:54
               threadNameOpt = {storage_ = {{emptyState = -96 '\240', value = {static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x7f0d8afe3fa0 "AuxIoPool0"}, _M_string_length = 10, {_M_local_buf = "AuxIoPool0\000\000\000\000\000", _M_allocated_capacity = 8029725099529106753}}}, hasValue = true}}
               func = <error reading variable func (Cannot access memory at address 0x7f0dfc848280)>
       #27 folly::detail::function::FunctionTraits<void ()>::callBig<CBRegisteredThreadFactory::newThread(folly::Function<void ()>&&)::{lambda()#1}>(folly::detail::function::Data&) (p=...) at /home/couchbase/jenkins/workspace/couchbase-server-unix/server_build/tlm/deps/folly.exploded/include/folly/Function.h:401
               fn = @0x7f0dfc848280: <error reading variable>
       #28 0x00007f0e14da4d40 in execute_native_thread_routine () at /tmp/deploy/objdir/../gcc-10.2.0/libstdc++-v3/src/c++11/thread.cc:80
       No locals.
       #29 0x00007f0e145a8ea5 in start_thread () from /lib64/libpthread.so.0
       No symbol table info available.
       #30 0x00007f0e142d18dd in clone () from /lib64/libc.so.6
       No symbol table info available.
      

      QE Test

      guides/gradlew --refresh-dependencies testrunner -P jython=/opt/jython/bin/jython -P 'args=-i /tmp/magma_temp_job4.ini sdk_timeout=60,bucket_eviction_policy=fullEviction,randomize_value=True,doc_size=1024,bucket_storage=magma,enable_dp=true -t magma.magma_disk_full.MagmaDiskFull.test_disk_full_add_nodes,nodes_init=3,num_items=5000000,doc_size=4096,sdk_timeout=60,replicas=1,GROUP=P0'
       
      Test Input params:
      {'doc_size': '1024', 'conf_file': 'conf/magma/disk_full.conf', 'spec': 'disk_full', 'num_nodes': 4, 'rerun': False, 'GROUP': 'P0', 'enable_dp': 'true', 'sdk_timeout': '60', 'case_number': 8, 'cluster_name': 'magma_temp_job4', 'ini': '/tmp/magma_temp_job4.ini', 'replicas': '1', 'bucket_storage': 'magma', 'bucket_eviction_policy': 'fullEviction', 'logs_folder': '/data/workspace/magma_temp_job4/logs/testrunner-21-Apr-13_23-09-33/test_8', 'nodes_init': '3', 'num_items': '5000000', 'randomize_value': 'True'}
      

      Attachments

        Issue Links

          No reviews matched the request. Check your Options in the drop-down menu of this sections header.

          Activity

            People

              ritesh.agarwal Ritesh Agarwal
              ritesh.agarwal Ritesh Agarwal
              Votes:
              0 Vote for this issue
              Watchers:
              3 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Gerrit Reviews

                  There are no open Gerrit changes

                  PagerDuty