Details
-
Bug
-
Resolution: Cannot Reproduce
-
Major
-
4.0.0
-
Security Level: Public
-
4.0.0-3241
-
Untriaged
-
Centos 64-bit
-
Unknown
-
Mar 9 - Mar 27
Description
steps:
1. 3 nodes in cluster, 4 buckets. run data loader more then a day
2. setup replication from SRC to cluster DEST for all buckets.
3. rebalance in at SRC cluster
rebalance in at DEST cluster
4. Graceful Fail Over(rebalance) for node on SRC, add back(Delta Recovery) and rebalance - PASS
5. Hard Fail Over for node on SRC, add back(Full Recovery) and rebalance
Event Module Code Server Node Time
Rebalance exited with reason bad_replicas
ns_orchestrator002 ns_1@172.23.105.163 15:33:42 - Fri Jun 26, 2015
Bad replicators after rebalance:
Missing = [
,
{'ns_1@172.23.105.163','ns_1@172.23.105.166',429},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',430},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',431},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',432},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',433},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',434},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',435},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',436},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',437},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',438},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',439},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',440},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',441},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',442},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',443},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',444},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',445},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',446},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',447},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',448},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',449},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',450},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',451},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',452},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',453},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',454},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',455},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',456},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',457},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',458},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',459},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',460},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',461},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',462},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',463},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',464},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',465},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',466},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',467},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',468},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',469},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',470},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',471},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',472},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',473},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',474},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',475},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',476},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',477},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',478},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',479},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',480},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',481},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',482},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',483},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',484},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',485},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',486},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',487},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',488},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',489},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',490},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',491},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',492},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',493},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',494},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',495},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',496},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',497},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',498},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',499},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',500},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',501},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',502},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',503},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',504},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',505},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',506},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',507},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',508},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',509},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',510},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',511},
{'ns_1@172.23.105.163','ns_1@172.23.105.166',512},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',256},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',257},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',258},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',259},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',260},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',261},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',262},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',263},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',264},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',265},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',266},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',267},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',268},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',269},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',270},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',271},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',272},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',273},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',274},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',275},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',276},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',277},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',278},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',279},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',280},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',281},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',282},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',283},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',284},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',285},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',286},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',287},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',288},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',289},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',290},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',291},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',292},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',293},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',294},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',295},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',296},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',297},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',298},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',299},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',300},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',301},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',302},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',303},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',304},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',305},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',306},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',307},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',308},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',309},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',310},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',311},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',312},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',313},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',314},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',315},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',316},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',317},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',318},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',319},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',320},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',321},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',322},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',323},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',324},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',325},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',326},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',327},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',328},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',329},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',330},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',331},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',332},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',333},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',334},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',335},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',336},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',337},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',338},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',339},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',340},
{'ns_1@172.23.105.165','ns_1@172.23.105.166',341}]
Extras = [] ns_rebalancer002 ns_1@172.23.105.163 15:33:42 - Fri Jun 26, 2015
Bucket "RevAB" rebalance does not seem to be swap rebalance ns_vbucket_mover000 ns_1@172.23.105.163 14:48:35 - Fri Jun 26, 2015
Started rebalancing bucket RevAB ns_rebalancer000 ns_1@172.23.105.163 14:48:33 - Fri Jun 26, 2015
Bucket "RevAB" loaded on node 'ns_1@172.23.105.166' in 0 seconds. ns_memcached000 ns_1@172.23.105.166 14:48:32 - Fri Jun 26, 2015
IP address seems to have changed. Unable to listen on 'ns_1@172.23.105.164'. menelaus_web_alerts_srv000 ns_1@172.23.105.164 14:38:57 - Fri Jun 26, 2015
Haven't heard from a higher priority node or a master, so I'm taking over. mb_master000 ns_1@172.23.105.164 14:38:40 - Fri Jun 26, 2015
Could not automatically fail over node ('ns_1@172.23.105.164'). Rebalance is running. auto_failover001 ns_1@172.23.105.163 14:37:58 - Fri Jun 26, 2015
Haven't heard from a higher priority node or a master, so I'm taking over. mb_master000 ns_1@172.23.105.164 14:37:31 - Fri Jun 26, 2015
Bucket "UserInfo" rebalance does not seem to be swap rebalance ns_vbucket_mover000 ns_1@172.23.105.163 14:31:43 - Fri Jun 26, 2015
Started rebalancing bucket UserInfo ns_rebalancer000 ns_1@172.23.105.163 14:31:42 - Fri Jun 26, 2015
Bucket "UserInfo" loaded on node 'ns_1@172.23.105.166' in 0 seconds. ns_memcached000 ns_1@172.23.105.166 14:31:41 - Fri Jun 26, 2015
Deleting old data files of bucket "AbRegNums" ns_storage_conf000 ns_1@172.23.105.166 14:31:34 - Fri Jun 26, 2015
Deleting old data files of bucket "MsgsCalls" ns_storage_conf000 ns_1@172.23.105.166 14:31:34 - Fri Jun 26, 2015
Deleting old data files of bucket "RevAB" ns_storage_conf000 ns_1@172.23.105.166 14:31:34 - Fri Jun 26, 2015
Deleting old data files of bucket "UserInfo" ns_storage_conf000 ns_1@172.23.105.166 14:31:34 - Fri Jun 26, 2015
Starting rebalance, KeepNodes = ['ns_1@172.23.105.163','ns_1@172.23.105.164',
'ns_1@172.23.105.165','ns_1@172.23.105.166'], EjectNodes = [], Failed over and being ejected nodes = []; no delta recovery nodes
ns_orchestrator004 ns_1@172.23.105.163 14:31:32 - Fri Jun 26, 2015
Shutting down bucket "MsgsCalls" on 'ns_1@172.23.105.166' for deletion ns_memcached000 ns_1@172.23.105.166 14:30:53 - Fri Jun 26, 2015
Shutting down bucket "AbRegNums" on 'ns_1@172.23.105.166' for deletion ns_memcached000 ns_1@172.23.105.166 14:30:52 - Fri Jun 26, 2015
Control connection to memcached on 'ns_1@172.23.105.166' disconnected:
ns_memcached000 ns_1@172.23.105.166 14:30:50 - Fri Jun 26, 2015
Control connection to memcached on 'ns_1@172.23.105.166' disconnected: {{badmatch,
{error,
closed}},
[{mc_client_binary,
cmd_vocal_recv,
5,
[
{mc_client_binary,
select_bucket,
2,
[{file, "src/mc_client_binary.erl"}
,
{line, 351}]},
{ns_memcached,
ensure_bucket,
2,
[
{ns_memcached,
handle_info,
2,
[{file, "src/ns_memcached.erl"}
,
{line, 745}]},
{gen_server,
handle_msg,
5,
[
{ns_memcached,
init,
1,
[{file, "src/ns_memcached.erl"}, {line, 174}]},
{gen_server,
init_it,
6,
[{file, "gen_server.erl"}
,
{line, 304}]},
{proc_lib,
init_p_do_apply,
3,
[
,
{line, 239}]}]} ns_memcached000 ns_1@172.23.105.166 14:30:50 - Fri Jun 26, 2015
Failed over 'ns_1@172.23.105.166': ok ns_rebalancer000 ns_1@172.23.105.163 14:30:49 - Fri Jun 26, 2015
Shutting down bucket "RevAB" on 'ns_1@172.23.105.166' for deletion ns_memcached000 ns_1@172.23.105.166 14:30:43 - Fri Jun 26, 2015
Shutting down bucket "UserInfo" on 'ns_1@172.23.105.166' for deletion ns_memcached000 ns_1@172.23.105.166 14:30:42 - Fri Jun 26, 2015
Starting failing over 'ns_1@172.23.105.166' ns_rebalancer000 ns_1@172.23.105.163 14:30:40 - Fri Jun 26, 2015
Reset auto-failover count auto_failover000 ns_1@172.23.105.163 14:26:44 - Fri Jun 26, 2015
Rebalance completed successfully.
ns_orchestrator001 ns_1@172.23.105.163 14:26:44 - Fri Jun 26, 2015