Details
-
Bug
-
Resolution: Fixed
-
Blocker
-
2.0-developer-preview-4
-
Security Level: Public
-
dp4 build 722
10 million docs
4 node cluster
Description
Rebalance fails and UI reports "status stopped by janitor" while adding a node back to the cluster it originally belonged to (see screenshot).
Looks like memcached crashed due to seg fault on the node that is being added. Entire core files are available in /tmp dir of host: 10.1.2.128 (login: root/couchbase). Excerpt below. Diags attached.
Core was generated by `/opt/couchbase/bin/memcached -X /opt/couchbase/lib/memcached/stdin_term_handler'.
Program terminated with signal 11, Segmentation fault.
#0 LoadStorageKVPairCallback::callback (this=0xb9dd8c0, val=...) at stored-value.hh:168
168 stored-value.hh: No such file or directory.
in stored-value.hh
(gdb) thread a a bt
....
....
#0 0x00002aaaaad17bb2 in Internal::map_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, index_entry>, Internal::extract1st<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, index_entry> >, true, std::tr1::hashtable<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, index_entry>, std::allocator<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, index_entry> >, Internal::extract1st<std::pair<std::basic_--Type <return> to continue, or q <return> to quit--
string<char, std::char_traits<char>, std::allocator<char> > const, index_entry> >, std::equal_to<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::tr1::hash<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, Internal::mod_range_hashing, Internal::default_ranged_hash, Internal::prime_rehash_policy, false, false, true> >::operator[] (this=0xbd855a0, k=
"7178d94773bab8a4") at /usr/lib/gcc/x86_64-redhat-linux/4.1.2/../../../../include/c++/4.1.2/tr1/hashtable:682
#1 0x00002aaaaad13537 in Checkpoint::queueDirty (this=0xbd85540, qi=..., checkpointManager=<value optimized out>) at checkpoint.cc:129
#2 0x00002aaaaad13f36 in CheckpointManager::queueDirty (this=0xbdc0380, qi=..., vbucket=...) at checkpoint.cc:742
#3 0x00002aaaaad28416 in EventuallyPersistentStore::queueDirty (this=0xb97c480, key="7178d94773bab8a4", vbid=<value optimized out>,
op=queue_op_set, value=<value optimized out>, flags=0, exptime=0, cas=267433813518097, seqno=0, rowid=1, tapBackfill=false)
at ep.cc:2524
#4 0x00002aaaaad29adc in EventuallyPersistentStore::setWithMeta (this=0xb97c480, itm=..., cas=0, cookie=0xb875340, force=true,
allowExisting=true) at ep.cc:1461
#5 0x00002aaaaad46a36 in EventuallyPersistentEngine::tapNotify(const void *, void *, uint16_t, uint8_t, uint16_t, ._169, uint32_t, const void *, size_t, uint32_t, uint32_t, uint64_t, const void *, size_t, uint16_t) (this=0xb980000, cookie=0xb875340, engine_specific=0x37c1a020,
nengine=4, tap_flags=<value optimized out>, tap_event=TAP_MUTATION, tap_seqno=46907, key=0x37c1a02c, nkey=16, flags=0, exptime=0,
cas=267433813518097, data=0x37c1a03c, ndata=226, vbucket=180) at ep_engine.cc:1779
#6 0x00002aaaaaaaf7ff in bucket_tap_notify (handle=<value optimized out>, cookie=0xb875340, engine_specific=0x37c1a020, nengine=4,
ttl=254 '\376', tap_flags=0, tap_event=TAP_MUTATION, tap_seqno=46907, key=0x37c1a02c, nkey=16, flags=0, exptime=0, cas=267433813518097,
data=0x37c1a03c, ndata=226, vbucket=180) at bucket_engine.c:2003
#7 0x0000000000406d21 in process_bin_tap_packet (event=338681368, c=0xb875340) at daemon/memcached.c:2970
#8 0x0000000000410d73 in process_bin_packet (c=0xb875340) at daemon/memcached.c:3056
#9 complete_nread_binary (c=0xb875340) at daemon/memcached.c:3659
#10 0x0000000000411bcd in complete_nread (c=0xb875340) at daemon/memcached.c:3741
#11 conn_nread (c=0xb875340) at daemon/memcached.c:5583
#12 0x0000000000407234 in event_handler (fd=<value optimized out>, which=<value optimized out>, arg=0xb875340) at daemon/memcached.c:5884
#13 0x00002ba70f163df9 in event_process_active_single_queue (base=0xb8cea00, flags=0) at event.c:1308
#14 event_process_active (base=0xb8cea00, flags=0) at event.c:1375
#15 event_base_loop (base=0xb8cea00, flags=0) at event.c:1572
#16 0x0000000000413694 in worker_libevent (arg=0x7470800) at daemon/thread.c:304
#17 0x0000003797a0673d in start_thread () from /lib64/libpthread.so.0
#18 0x0000003796ed44bd in clone () from /lib64/libc.so.6
Thread 5 (Thread 13528):
#0 0x0000003796ed48a8 in epoll_wait () from /lib64/libc.so.6
#1 0x00002ba70f174c28 in epoll_dispatch (base=0xb8ce780, tv=<value optimized out>) at epoll.c:404
#2 0x00002ba70f163a4c in event_base_loop (base=0xb8ce780, flags=0) at event.c:1558
#3 0x0000000000413694 in worker_libevent (arg=0x7470900) at daemon/thread.c:304
#4 0x0000003797a0673d in start_thread () from /lib64/libpthread.so.0
#5 0x0000003796ed44bd in clone () from /lib64/libc.so.6
Thread 4 (Thread 13529):
#0 0x0000003796e7c863 in _wordcopy_fwd_dest_aligned () from /lib64/libc.so.6
#1 0x0000003796e7adcf in memmove () from /lib64/libc.so.6
#2 0x00002aaaaada6aca in MemcachedEngine::processInput (this=0xb99c800) at mc-kvstore/mc-engine.cc:970
#3 0x00002aaaaada6ba5 in MemcachedEngine::wait (this=0xb99c800) at mc-kvstore/mc-engine.cc:1051
#4 0x00002aaaaada8105 in MemcachedEngine::get (this=0xb99c800, key="1-edbe9f6288001e0b", vb=<value optimized out>, cb=...)
at mc-kvstore/mc-engine.cc:1129
#5 0x00002aaaaadaba54 in MCKVStore::get (this=0x746eb80, key="1-edbe9f6288001e0b", vb=193, cb=...) at mc-kvstore/mc-kvstore.cc:57
#6 0x00002aaaaad8cd78 in TapBGFetchCallback::callback (this=0x137071d0, d=<value optimized out>,
t=std::tr1::shared_ptr (count 0) 0x453edf50) at tapconnection.cc:957
#7 0x00002aaaaad1dabf in Task::run (this=<value optimized out>, d=..., t=<value optimized out>) at dispatcher.hh:139
#8 0x00002aaaaad1cc11 in Dispatcher::run (this=0xb9761c0) at dispatcher.cc:119
#9 0x00002aaaaad1d463 in launch_dispatcher_thread (arg=0x12ae03c0) at dispatcher.cc:28
#10 0x0000003797a0673d in start_thread () from /lib64/libpthread.so.0
--Type <return> to continue, or q <return> to quit--
#11 0x0000003796ed44bd in clone () from /lib64/libc.so.6
Thread 3 (Thread 13531):
#0 0x0000003797a0b150 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
#1 0x00002aaaaad1adc8 in wait (this=0xb992080, d=...) at syncobject.hh:42
#2 IdleTask::run (this=0xb992080, d=...) at dispatcher.cc:244
#3 0x00002aaaaad1cc11 in Dispatcher::run (this=0xb9768c0) at dispatcher.cc:119
#4 0x00002aaaaad1d463 in launch_dispatcher_thread (arg=0xb976914) at dispatcher.cc:28
#5 0x0000003797a0673d in start_thread () from /lib64/libpthread.so.0
#6 0x0000003796ed44bd in clone () from /lib64/libc.so.6
Thread 2 (Thread 13532):
#0 0x0000003797a0b150 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
#1 0x00002aaaaad434f0 in wait (this=0xb980000) at syncobject.hh:42
#2 wait (this=0xb980000) at syncobject.hh:58
#3 wait (this=0xb980000) at tapconnmap.hh:209
#4 EventuallyPersistentEngine::notifyPendingConnections (this=0xb980000) at ep_engine.cc:3197
#5 0x00002aaaaad435c1 in EvpNotifyPendingConns (arg=0xb980000) at ep_engine.cc:971
#6 0x0000003797a0673d in start_thread () from /lib64/libpthread.so.0
#7 0x0000003796ed44bd in clone () from /lib64/libc.so.6
Thread 1 (Thread 0x45def940 (LWP 13530)):
#0 LoadStorageKVPairCallback::callback (this=0xb9dd8c0, val=...) at stored-value.hh:168
#1 0x00002aaaaada9e21 in TapResponseHandler::request(._109 *) (this=0xb9dd840, req=<value optimized out>) at mc-kvstore/mc-engine.cc:434
#2 0x00002aaaaada4a82 in MemcachedEngine::handleRequest(._109 *) (this=<value optimized out>, req=0xbf72000) at mc-kvstore/mc-engine.cc:631
#3 0x00002aaaaada6aa7 in MemcachedEngine::processInput (this=0xb99c800) at mc-kvstore/mc-engine.cc:960
#4 0x00002aaaaada6ba5 in MemcachedEngine::wait (this=0xb99c800) at mc-kvstore/mc-engine.cc:1051
#5 0x00002aaaaada72f6 in MemcachedEngine::tap (this=0xb99c800, cb=std::tr1::shared_ptr (count -1425288768) 0x45deebb0)
at mc-kvstore/mc-engine.cc:1284
#6 0x00002aaaaadacd45 in MCKVStore::dump (this=0x746eb80, cb=std::tr1::shared_ptr (count 0) 0x45deeca0) at mc-kvstore/mc-kvstore.cc:150
#7 0x00002aaaaad256d5 in EventuallyPersistentStore::warmup (this=0xb97c480, st=std::map with 172 elements =
, keysOnly=false)
at ep.cc:2681
#8 0x00002aaaaad61aa1 in Flusher::step (this=0xb97a5a0, d=<value optimized out>, tid=Traceback (most recent call last):
File "/usr/share/gdb/python/libstdcxx/v6/printers.py", line 32, in to_string
return '%s (count %d) %s' % (self.typename,
TypeError: int argument required
) at flusher.cc:211
#9 0x00002aaaaad6215e in FlusherStepper::callback (this=0x3cb8c6e0, d=..., t=<value optimized out>) at flusher.cc:23
#10 0x00002aaaaad1dabf in Task::run (this=<value optimized out>, d=..., t=<value optimized out>) at dispatcher.hh:139
#11 0x00002aaaaad1cc11 in Dispatcher::run (this=0xb976380) at dispatcher.cc:119
#12 0x00002aaaaad1d463 in launch_dispatcher_thread (arg=0x1) at dispatcher.cc:28
#13 0x0000003797a0673d in start_thread () from /lib64/libpthread.so.0
#14 0x0000003796ed44bd in clone () from /lib64/libc.so.6
(gdb)
(gdb)
(gdb) f
#0 LoadStorageKVPairCallback::callback (this=0xb9dd8c0, val=...) at stored-value.hh:168
168 in stored-value.hh