Details
-
Bug
-
Resolution: Fixed
-
Critical
-
7.6.0
-
7.6.0-1859
-
Untriaged
-
0
-
Unknown
Description
Rebalances that have failed (i.e. rebalance that are not test-induced)
Rebalance 1 fail
{"stageInfo":{"index":{"startTime":"2023-11-28T11:51:08.349-08:00","completedTime":false,"timeTaken":20047},"data":{"totalProgress":100,"perNodeProgress":{"ns_1@172.23.96.230":1,"ns_1@172.23.96.245":1,"ns_1@172.23.97.66":1,"ns_1@172.23.96.198":1},"startTime":"2023-11-28T11:51:02.103-08:00","completedTime":"2023-11-28T11:51:08.340-08:00","timeTaken":6237},"query":{"totalProgress":100,"perNodeProgress":{"ns_1@172.23.97.100":1,"ns_1@172.23.97.67":1},"startTime":"2023-11-28T11:51:08.340-08:00","completedTime":"2023-11-28T11:51:08.349-08:00","timeTaken":9}},"rebalanceId":"616d1837685aa055cda639b28a1b6355","nodesInfo":{"active_nodes":["ns_1@172.23.105.122","ns_1@172.23.106.171","ns_1@172.23.106.176","ns_1@172.23.106.30","ns_1@172.23.96.198","ns_1@172.23.96.230","ns_1@172.23.96.245","ns_1@172.23.97.100","ns_1@172.23.97.109","ns_1@172.23.97.66","ns_1@172.23.97.67","ns_1@172.23.97.108"],"keep_nodes":["ns_1@172.23.105.122","ns_1@172.23.106.171","ns_1@172.23.106.176","ns_1@172.23.106.30","ns_1@172.23.96.198","ns_1@172.23.96.230","ns_1@172.23.96.245","ns_1@172.23.97.100","ns_1@172.23.97.109","ns_1@172.23.97.66","ns_1@172.23.97.67"],"eject_nodes":["ns_1@172.23.97.108"],"delta_nodes":[],"failed_nodes":[]},"masterNode":"ns_1@172.23.97.67","startTime":"2023-11-28T11:51:02.090-08:00","completedTime":"2023-11-28T11:51:28.395-08:00","timeTaken":26306,"completionMessage":"Rebalance exited with reason {service_rebalance_failed,index,\n {{badmatch,\n {error,\n {bad_nodes,index,set_service_manager,\n [{'ns_1@172.23.97.108',\n {exit,\n {{linked_process_died,<34822.5516.85>,\n {'ns_1@172.23.97.108',\n {no_connection,\"index-service_api\"}}},\n {gen_server,call,\n [{'service_agent-index',\n 'ns_1@172.23.97.108'},\n {set_service_manager,<0.6351.147>},\n infinity]}}}}]}}},\n [{service_manager,set_service_manager,1,\n [{file,\"src/service_manager.erl\"},\n {line,188}]},\n {service_manager,run_op,1,\n [{file,\"src/service_manager.erl\"},\n {line,146}]},\n {proc_lib,init_p,3,\n [{file,\"proc_lib.erl\"},{line,225}]}]}}."} |
Rebalance 2 fail
{"stageInfo":{"index":{"startTime":"2023-11-28T12:38:38.122-08:00","completedTime":false,"timeTaken":60278},"data":{"totalProgress":100,"perNodeProgress":{"ns_1@172.23.96.230":1,"ns_1@172.23.96.245":1,"ns_1@172.23.97.66":1,"ns_1@172.23.96.198":1},"startTime":"2023-11-28T12:38:34.068-08:00","completedTime":"2023-11-28T12:38:38.108-08:00","timeTaken":4040},"query":{"totalProgress":100,"perNodeProgress":{"ns_1@172.23.97.100":1,"ns_1@172.23.97.67":1},"startTime":"2023-11-28T12:38:38.108-08:00","completedTime":"2023-11-28T12:38:38.122-08:00","timeTaken":14}},"rebalanceId":"fbbcce33eb989b59ab6065a048db46e2","nodesInfo":{"active_nodes":["ns_1@172.23.105.122","ns_1@172.23.106.171","ns_1@172.23.106.176","ns_1@172.23.106.30","ns_1@172.23.96.198","ns_1@172.23.96.230","ns_1@172.23.96.245","ns_1@172.23.97.100","ns_1@172.23.97.108","ns_1@172.23.97.109","ns_1@172.23.97.66","ns_1@172.23.97.67"],"keep_nodes":["ns_1@172.23.105.122","ns_1@172.23.106.171","ns_1@172.23.106.176","ns_1@172.23.106.30","ns_1@172.23.96.198","ns_1@172.23.96.230","ns_1@172.23.96.245","ns_1@172.23.97.100","ns_1@172.23.97.108","ns_1@172.23.97.109","ns_1@172.23.97.66","ns_1@172.23.97.67"],"eject_nodes":[],"delta_nodes":[],"failed_nodes":[]},"masterNode":"ns_1@172.23.97.67","startTime":"2023-11-28T12:38:34.064-08:00","completedTime":"2023-11-28T12:39:38.399-08:00","timeTaken":64335,"completionMessage":"Rebalance exited with reason {service_rebalance_failed,index,\n {agent_died,<34822.29020.91>,\n {linked_process_died,<34822.11308.94>,\n {'ns_1@172.23.97.108',\n {timeout,\n {gen_server,call,\n [<34822.29412.91>,\n {call,\"ServiceAPI.PrepareTopologyChange\",\n #Fun<json_rpc_connection.0.36915653>,\n #{timeout => 60000}},\n 60000]}}}}}}."} |
Rebalance 3 fail
{"stageInfo":{"index":{"startTime":"2023-11-28T12:51:04.933-08:00","completedTime":false,"timeTaken":33520},"data":{"totalProgress":100,"perNodeProgress":{"ns_1@172.23.96.230":1,"ns_1@172.23.96.245":1,"ns_1@172.23.97.66":1,"ns_1@172.23.96.198":1},"startTime":"2023-11-28T12:51:00.625-08:00","completedTime":"2023-11-28T12:51:04.921-08:00","timeTaken":4296},"query":{"totalProgress":100,"perNodeProgress":{"ns_1@172.23.97.100":1,"ns_1@172.23.97.67":1},"startTime":"2023-11-28T12:51:04.921-08:00","completedTime":"2023-11-28T12:51:04.933-08:00","timeTaken":12}},"rebalanceId":"5a0f8d71d24f6bf3b3d62d47bcad22e8","nodesInfo":{"active_nodes":["ns_1@172.23.106.171","ns_1@172.23.106.176","ns_1@172.23.106.30","ns_1@172.23.96.198","ns_1@172.23.96.230","ns_1@172.23.96.245","ns_1@172.23.97.100","ns_1@172.23.97.109","ns_1@172.23.97.66","ns_1@172.23.97.67","ns_1@172.23.105.122","ns_1@172.23.97.108"],"keep_nodes":["ns_1@172.23.106.171","ns_1@172.23.106.176","ns_1@172.23.106.30","ns_1@172.23.96.198","ns_1@172.23.96.230","ns_1@172.23.96.245","ns_1@172.23.97.100","ns_1@172.23.97.109","ns_1@172.23.97.66","ns_1@172.23.97.67"],"eject_nodes":["ns_1@172.23.105.122","ns_1@172.23.97.108"],"delta_nodes":[],"failed_nodes":[]},"masterNode":"ns_1@172.23.97.67","startTime":"2023-11-28T12:51:00.617-08:00","completedTime":"2023-11-28T12:51:38.453-08:00","timeTaken":37836,"completionMessage":"Rebalance exited with reason {service_rebalance_failed,index,\n {{badmatch,\n {error,\n {bad_nodes,index,set_service_manager,\n [{'ns_1@172.23.97.108',\n {exit,\n {{linked_process_died,<34822.21321.96>,\n {'ns_1@172.23.97.108',\n {no_connection,\"index-service_api\"}}},\n {gen_server,call,\n [{'service_agent-index',\n 'ns_1@172.23.97.108'},\n {set_service_manager,<0.11522.164>},\n infinity]}}}}]}}},\n [{service_manager,set_service_manager,1,\n [{file,\"src/service_manager.erl\"},\n {line,188}]},\n {service_manager,run_op,1,\n [{file,\"src/service_manager.erl\"},\n {line,146}]},\n {proc_lib,init_p,3,\n [{file,\"proc_lib.erl\"},{line,225}]}]}}."} |
Rebalance failure 4
{"stageInfo":{"index":{"startTime":"2023-11-28T13:15:48.403-08:00","completedTime":false,"timeTaken":50134},"data":{"totalProgress":100,"perNodeProgress":{"ns_1@172.23.96.230":1,"ns_1@172.23.96.245":1,"ns_1@172.23.97.66":1,"ns_1@172.23.96.198":1},"startTime":"2023-11-28T13:15:44.997-08:00","completedTime":"2023-11-28T13:15:48.393-08:00","timeTaken":3396},"query":{"totalProgress":100,"perNodeProgress":{"ns_1@172.23.97.100":1,"ns_1@172.23.97.67":1},"startTime":"2023-11-28T13:15:48.393-08:00","completedTime":"2023-11-28T13:15:48.403-08:00","timeTaken":10}},"rebalanceId":"b6ed96eb2a1218143e6c3104abcedf2f","nodesInfo":{"active_nodes":["ns_1@172.23.105.122","ns_1@172.23.106.171","ns_1@172.23.106.176","ns_1@172.23.106.30","ns_1@172.23.96.198","ns_1@172.23.96.230","ns_1@172.23.96.245","ns_1@172.23.97.100","ns_1@172.23.97.108","ns_1@172.23.97.109","ns_1@172.23.97.66","ns_1@172.23.97.67"],"keep_nodes":["ns_1@172.23.105.122","ns_1@172.23.106.171","ns_1@172.23.106.176","ns_1@172.23.106.30","ns_1@172.23.96.198","ns_1@172.23.96.230","ns_1@172.23.96.245","ns_1@172.23.97.100","ns_1@172.23.97.108","ns_1@172.23.97.109","ns_1@172.23.97.66","ns_1@172.23.97.67"],"eject_nodes":[],"delta_nodes":[],"failed_nodes":[]},"masterNode":"ns_1@172.23.97.67","startTime":"2023-11-28T13:15:44.993-08:00","completedTime":"2023-11-28T13:16:38.537-08:00","timeTaken":53543,"completionMessage":"Rebalance exited with reason {service_rebalance_failed,index,\n {{badmatch,\n {error,\n {bad_nodes,index,set_service_manager,\n [{'ns_1@172.23.97.108',\n {exit,\n {{linked_process_died,<34822.16592.101>,\n {'ns_1@172.23.97.108',\n {no_connection,\"index-service_api\"}}},\n {gen_server,call,\n [{'service_agent-index',\n 'ns_1@172.23.97.108'},\n {set_service_manager,<0.2685.172>},\n infinity]}}}}]}}},\n [{service_manager,set_service_manager,1,\n [{file,\"src/service_manager.erl\"},\n {line,188}]},\n {service_manager,run_op,1,\n [{file,\"src/service_manager.erl\"},\n {line,146}]},\n {proc_lib,init_p,3,\n [{file,\"proc_lib.erl\"},{line,225}]}]}}."} |
|
Cbcollect logs:
url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.105.122.zip
url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.106.171.zip
url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.106.176.zip
url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.106.30.zip
url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.96.198.zip
url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.96.230.zip
url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.96.245.zip
url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.97.100.zip
url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.97.109.zip
url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.97.66.zip
url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.97.67.zip
Attachments
Issue Links
- relates to
-
MB-60176 De-couple dropIndex and stream request lock
- Open