Details
-
Bug
-
Resolution: Fixed
-
Critical
-
5.5.0
-
None
-
Untriaged
-
Release Note
-
Yes
Description
./testrunner -i /tmp/testexec.29355.ini stop-on-failure=False,fail_on_errors=1,get-cbcollect-info=True -t xdcr.rebalanceXDCR.Rebalance.async_rebalance_out_master,items=100000,rdirection=unidirection,ctopology=chain,update=C1,delete=C1,rebalance=C1,GROUP=P1
Above test failed with a rebalance hang - centos - 5.5.0-1632 - following crash report found in diag.log of ejected node .76:
018-01-06T15:21:07.690-08:00, xdcr:0:info:message(ns_1@172.23.123.76) - Replication from bucket "default" to bucket "default" on cluster "remote_cluster_C1-C2" removed.
|
2018-01-06T15:21:07.737-08:00, xdcr:0:info:message(ns_1@172.23.123.76) - Remote cluster reference "remote_cluster_C1-C2" known via 172.23.97.219:8091 removed.
|
2018-01-06T15:21:08.356-08:00, ns_memcached:0:info:message(ns_1@172.23.123.76) - Control connection to memcached on 'ns_1@172.23.123.76' disconnected: {badmatch,
|
{error,
|
closed}}
|
2018-01-06T15:21:08.632-08:00, menelaus_web:11:info:message(ns_1@172.23.123.76) - Deleted bucket "default"
|
|
2018-01-06T15:21:41.781-08:00, ns_cluster:0:info:message(ns_1@172.23.123.76) - Change of address to "172.23.123.76" is requested.
|
2018-01-06T15:21:41.812-08:00, ns_cluster:0:info:message(ns_1@172.23.123.76) - Started node add transaction by adding node 'ns_1@172.23.97.218' to nodes_wanted (group: undefined)
|
2018-01-06T15:21:41.947-08:00, ns_node_disco:4:info:node up(ns_1@172.23.123.76) - Node 'ns_1@172.23.123.76' saw that node 'ns_1@172.23.97.218' came up. Tags: []
|
2018-01-06T15:21:43.749-08:00, menelaus_sup:1:info:web start ok(ns_1@172.23.97.218) - Couchbase Server has started on web port 8091 on node 'ns_1@172.23.97.218'. Version: "5.5.0-1632-enterprise".
|
2018-01-06T15:21:43.808-08:00, ns_cluster:3:info:message(ns_1@172.23.97.218) - Node ns_1@172.23.97.218 joined cluster
|
2018-01-06T15:21:43.879-08:00, ns_orchestrator:4:info:message(ns_1@172.23.123.76) - Starting rebalance, KeepNodes = ['ns_1@172.23.97.218','ns_1@172.23.123.76'], EjectNodes = [], Failed over and being ejected nodes = []; no delta recovery nodes
|
|
2018-01-06T15:21:43.975-08:00, ns_storage_conf:0:info:message(ns_1@172.23.123.76) - Deleting old data files of bucket "default"
|
2018-01-06T15:21:44.562-08:00, ns_orchestrator:0:info:message(ns_1@172.23.123.76) - Rebalance completed successfully.
|
2018-01-06T15:22:48.173-08:00, menelaus_web:12:info:message(ns_1@172.23.123.76) - Created bucket "default" of type: couchbase
|
[{num_replicas,1},
|
{replica_index,true},
|
{ram_quota,1645215744},
|
{flush_enabled,true},
|
{num_threads,3},
|
{eviction_policy,value_only},
|
{conflict_resolution_type,seqno},
|
{storage_mode,couchstore}]
|
2018-01-06T15:22:48.662-08:00, ns_memcached:0:info:message(ns_1@172.23.123.76) - Bucket "default" loaded on node 'ns_1@172.23.123.76' in 0 seconds.
|
2018-01-06T15:22:48.740-08:00, ns_memcached:0:info:message(ns_1@172.23.97.218) - Bucket "default" loaded on node 'ns_1@172.23.97.218' in 0 seconds.
|
2018-01-06T15:22:53.867-08:00, xdcr:0:info:message(ns_1@172.23.123.76) - Created remote cluster reference "remote_cluster_C1-C2" via 172.23.97.219:8091.
|
2018-01-06T15:22:53.970-08:00, xdcr:0:info:message(ns_1@172.23.123.76) - Replication from bucket "default" to bucket "default" on cluster "remote_cluster_C1-C2" created.
|
2018-01-06T15:25:09.822-08:00, ns_orchestrator:4:info:message(ns_1@172.23.123.76) - Starting rebalance, KeepNodes = ['ns_1@172.23.97.218'], EjectNodes = ['ns_1@172.23.123.76'], Failed over and being ejected nodes = []; no delta recovery nodes
|
|
2018-01-06T15:25:09.950-08:00, ns_rebalancer:0:info:message(ns_1@172.23.123.76) - Started rebalancing bucket default
|
2018-01-06T15:25:10.391-08:00, ns_vbucket_mover:0:info:message(ns_1@172.23.123.76) - Bucket "default" rebalance appears to be swap rebalance
|
-------------------------------
|
|
|
per_node_processes('ns_1@172.23.123.76') =
|
{<0.28099.21>,
|
[{registered_name,[]},
|
{status,waiting},
|
{initial_call,{proc_lib,init_p,5}},
|
{backtrace,[<<"Program counter: 0x00007fa67cb52590 (ns_pubsub:do_subscribe_link/4 + 392)">>,
|
<<"CP: 0x0000000000000000 (invalid)">>,<<"arity = 0">>,
|
<<>>,
|
<<"0x00007fa675d10da0 Return addr 0x00007fa67a383640 (proc_lib:init_p_do_apply/3 + 56)">>,
|
<<"y(0) []">>,
|
<<"y(1) {ns_pubsub,#Ref<0.0.14.42325>}">>,
|
<<"y(2) <0.28095.21>">>,<<"y(3) ns_stats_event">>,
|
<<>>,
|
<<"0x00007fa675d10dc8 Return addr 0x0000000000891848 (<terminate process normally>)">>,
|
<<"y(0) Catch 0x00007fa67a383660 (proc_lib:init_p_do_apply/3 + 88)">>,
|
<<>>]},
|
{error_handler,error_handler},
|
{garbage_collection,[{min_bin_vheap_size,46422},
|
{min_heap_size,233},
|
{fullsweep_after,512},
|
{minor_gcs,0}]},
|
{heap_size,233},
|
{total_heap_size,233},
|
{links,[<0.28095.21>,<0.305.0>]},
|
{monitors,[]},
|
{monitored_by,[]},
|
{memory,2744},
|
{messages,[]},
|
{message_queue_len,0},
|
{reductions,21},
|
{trap_exit,true},
|
{current_location,{ns_pubsub,do_subscribe_link,4,
|
[{file,"src/ns_pubsub.erl"},{line,125}]}},
|
{dictionary,[{'$ancestors',['stats_archiver-@xdcr-default',
|
'single_bucket_kv_sup-default',
|
ns_bucket_sup,ns_bucket_worker_sup,
|
ns_server_sup,ns_server_nodes_sup,
|
<0.171.0>,ns_server_cluster_sup,<0.88.0>]},
|
{'$initial_call',{ns_pubsub,do_subscribe_link,4}}]}]}
|
{<0.28098.21>,
|
[{registered_name,'stats_reader-@xdcr-default'},
|
{status,waiting},
|
{initial_call,{proc_lib,init_p,5}},
|
{backtrace,[<<"Program counter: 0x00007fa67a0f3448 (gen_server:loop/6 + 264)">>,
|
<<"CP: 0x0000000000000000 (invalid)">>,<<"arity = 0">>,
|
<<>>,
|
<<"0x00007fa62ed17798 Return addr 0x00007fa67a383640 (proc_lib:init_p_do_apply/3 + 56)">>,
|
<<"y(0) []">>,<<"y(1) infinity">>,
|
<<"y(2) stats_reader">>,
|
<<"y(3) {state,\"@xdcr-default\"}">>,
|
<<"y(4) 'stats_reader-@xdcr-default'">>,
|
<<"y(5) <0.28052.21>">>,<<>>,
|
<<"0x00007fa62ed177d0 Return addr 0x0000000000891848 (<terminate process normally>)">>,
|
<<"y(0) Catch 0x00007fa67a383660 (proc_lib:init_p_do_apply/3 + 88)">>,
|
<<>>]},
|
{error_handler,error_handler},
|
{garbage_collection,[{min_bin_vheap_size,46422},
|
{min_heap_size,233},
|
{fullsweep_after,512},
|
{minor_gcs,0}]},
|
{heap_size,233},
|
{total_heap_size,233},
|
{links,[<0.28052.21>]},
|
{monitors,[]},
|
{monitored_by,[]},
|
{memory,2704},
|
{messages,[]},
|
{message_queue_len,0},
|
{reductions,26},
|
{trap_exit,false},
|
{current_location,{gen_server,loop,6,
|
[{file,"gen_server.erl"},{line,358}]}},
|
{dictionary,[{'$ancestors',['single_bucket_kv_sup-default',
|
ns_bucket_sup,ns_bucket_worker_sup,
|
ns_server_sup,ns_server_nodes_sup,
|
<0.171.0>,ns_server_cluster_sup,<0.88.0>]},
|
{'$initial_call',{stats_reader,init,1}}]}]}
|
Attaching logs and console output
Attachments
Issue Links
- duplicates
-
MB-27739 [FTS-Windows] Indexing stuck at 998 of 711000 docs, only 555/1024 couch files present
- Closed
- is duplicated by
-
MB-26065 rebalance exited with reason janitor_agent_servant_died
- Closed
-
MB-27503 [XDCR] replication took more than 300s to catch up - changes_left does not come down to zero for default bucket in log
- Closed
-
MB-27588 [FTS] Indexing get stuck > 3mins (intermittent, mossStore, DCP backfill create ended prematurely)
- Closed