Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-59875

[System Test] Multiple rebalance failures seen

    XMLWordPrintable

Details

    • Untriaged
    • 0
    • Unknown

    Description

      Rebalances that have failed (i.e. rebalance that are not test-induced)

      Rebalance 1 fail

      {"stageInfo":{"index":{"startTime":"2023-11-28T11:51:08.349-08:00","completedTime":false,"timeTaken":20047},"data":{"totalProgress":100,"perNodeProgress":{"ns_1@172.23.96.230":1,"ns_1@172.23.96.245":1,"ns_1@172.23.97.66":1,"ns_1@172.23.96.198":1},"startTime":"2023-11-28T11:51:02.103-08:00","completedTime":"2023-11-28T11:51:08.340-08:00","timeTaken":6237},"query":{"totalProgress":100,"perNodeProgress":{"ns_1@172.23.97.100":1,"ns_1@172.23.97.67":1},"startTime":"2023-11-28T11:51:08.340-08:00","completedTime":"2023-11-28T11:51:08.349-08:00","timeTaken":9}},"rebalanceId":"616d1837685aa055cda639b28a1b6355","nodesInfo":{"active_nodes":["ns_1@172.23.105.122","ns_1@172.23.106.171","ns_1@172.23.106.176","ns_1@172.23.106.30","ns_1@172.23.96.198","ns_1@172.23.96.230","ns_1@172.23.96.245","ns_1@172.23.97.100","ns_1@172.23.97.109","ns_1@172.23.97.66","ns_1@172.23.97.67","ns_1@172.23.97.108"],"keep_nodes":["ns_1@172.23.105.122","ns_1@172.23.106.171","ns_1@172.23.106.176","ns_1@172.23.106.30","ns_1@172.23.96.198","ns_1@172.23.96.230","ns_1@172.23.96.245","ns_1@172.23.97.100","ns_1@172.23.97.109","ns_1@172.23.97.66","ns_1@172.23.97.67"],"eject_nodes":["ns_1@172.23.97.108"],"delta_nodes":[],"failed_nodes":[]},"masterNode":"ns_1@172.23.97.67","startTime":"2023-11-28T11:51:02.090-08:00","completedTime":"2023-11-28T11:51:28.395-08:00","timeTaken":26306,"completionMessage":"Rebalance exited with reason {service_rebalance_failed,index,\n                              {{badmatch,\n                                {error,\n                                 {bad_nodes,index,set_service_manager,\n                                  [{'ns_1@172.23.97.108',\n                                    {exit,\n                                     {{linked_process_died,<34822.5516.85>,\n                                       {'ns_1@172.23.97.108',\n                                        {no_connection,\"index-service_api\"}}},\n                                      {gen_server,call,\n                                       [{'service_agent-index',\n                                         'ns_1@172.23.97.108'},\n                                        {set_service_manager,<0.6351.147>},\n                                        infinity]}}}}]}}},\n                               [{service_manager,set_service_manager,1,\n                                 [{file,\"src/service_manager.erl\"},\n                                  {line,188}]},\n                                {service_manager,run_op,1,\n                                 [{file,\"src/service_manager.erl\"},\n                                  {line,146}]},\n                                {proc_lib,init_p,3,\n                                 [{file,\"proc_lib.erl\"},{line,225}]}]}}."}
      

      Rebalance 2 fail

      {"stageInfo":{"index":{"startTime":"2023-11-28T12:38:38.122-08:00","completedTime":false,"timeTaken":60278},"data":{"totalProgress":100,"perNodeProgress":{"ns_1@172.23.96.230":1,"ns_1@172.23.96.245":1,"ns_1@172.23.97.66":1,"ns_1@172.23.96.198":1},"startTime":"2023-11-28T12:38:34.068-08:00","completedTime":"2023-11-28T12:38:38.108-08:00","timeTaken":4040},"query":{"totalProgress":100,"perNodeProgress":{"ns_1@172.23.97.100":1,"ns_1@172.23.97.67":1},"startTime":"2023-11-28T12:38:38.108-08:00","completedTime":"2023-11-28T12:38:38.122-08:00","timeTaken":14}},"rebalanceId":"fbbcce33eb989b59ab6065a048db46e2","nodesInfo":{"active_nodes":["ns_1@172.23.105.122","ns_1@172.23.106.171","ns_1@172.23.106.176","ns_1@172.23.106.30","ns_1@172.23.96.198","ns_1@172.23.96.230","ns_1@172.23.96.245","ns_1@172.23.97.100","ns_1@172.23.97.108","ns_1@172.23.97.109","ns_1@172.23.97.66","ns_1@172.23.97.67"],"keep_nodes":["ns_1@172.23.105.122","ns_1@172.23.106.171","ns_1@172.23.106.176","ns_1@172.23.106.30","ns_1@172.23.96.198","ns_1@172.23.96.230","ns_1@172.23.96.245","ns_1@172.23.97.100","ns_1@172.23.97.108","ns_1@172.23.97.109","ns_1@172.23.97.66","ns_1@172.23.97.67"],"eject_nodes":[],"delta_nodes":[],"failed_nodes":[]},"masterNode":"ns_1@172.23.97.67","startTime":"2023-11-28T12:38:34.064-08:00","completedTime":"2023-11-28T12:39:38.399-08:00","timeTaken":64335,"completionMessage":"Rebalance exited with reason {service_rebalance_failed,index,\n                              {agent_died,<34822.29020.91>,\n                               {linked_process_died,<34822.11308.94>,\n                                {'ns_1@172.23.97.108',\n                                 {timeout,\n                                  {gen_server,call,\n                                   [<34822.29412.91>,\n                                    {call,\"ServiceAPI.PrepareTopologyChange\",\n                                     #Fun<json_rpc_connection.0.36915653>,\n                                     #{timeout => 60000}},\n                                    60000]}}}}}}."}
      

      Rebalance 3 fail

      {"stageInfo":{"index":{"startTime":"2023-11-28T12:51:04.933-08:00","completedTime":false,"timeTaken":33520},"data":{"totalProgress":100,"perNodeProgress":{"ns_1@172.23.96.230":1,"ns_1@172.23.96.245":1,"ns_1@172.23.97.66":1,"ns_1@172.23.96.198":1},"startTime":"2023-11-28T12:51:00.625-08:00","completedTime":"2023-11-28T12:51:04.921-08:00","timeTaken":4296},"query":{"totalProgress":100,"perNodeProgress":{"ns_1@172.23.97.100":1,"ns_1@172.23.97.67":1},"startTime":"2023-11-28T12:51:04.921-08:00","completedTime":"2023-11-28T12:51:04.933-08:00","timeTaken":12}},"rebalanceId":"5a0f8d71d24f6bf3b3d62d47bcad22e8","nodesInfo":{"active_nodes":["ns_1@172.23.106.171","ns_1@172.23.106.176","ns_1@172.23.106.30","ns_1@172.23.96.198","ns_1@172.23.96.230","ns_1@172.23.96.245","ns_1@172.23.97.100","ns_1@172.23.97.109","ns_1@172.23.97.66","ns_1@172.23.97.67","ns_1@172.23.105.122","ns_1@172.23.97.108"],"keep_nodes":["ns_1@172.23.106.171","ns_1@172.23.106.176","ns_1@172.23.106.30","ns_1@172.23.96.198","ns_1@172.23.96.230","ns_1@172.23.96.245","ns_1@172.23.97.100","ns_1@172.23.97.109","ns_1@172.23.97.66","ns_1@172.23.97.67"],"eject_nodes":["ns_1@172.23.105.122","ns_1@172.23.97.108"],"delta_nodes":[],"failed_nodes":[]},"masterNode":"ns_1@172.23.97.67","startTime":"2023-11-28T12:51:00.617-08:00","completedTime":"2023-11-28T12:51:38.453-08:00","timeTaken":37836,"completionMessage":"Rebalance exited with reason {service_rebalance_failed,index,\n                              {{badmatch,\n                                {error,\n                                 {bad_nodes,index,set_service_manager,\n                                  [{'ns_1@172.23.97.108',\n                                    {exit,\n                                     {{linked_process_died,<34822.21321.96>,\n                                       {'ns_1@172.23.97.108',\n                                        {no_connection,\"index-service_api\"}}},\n                                      {gen_server,call,\n                                       [{'service_agent-index',\n                                         'ns_1@172.23.97.108'},\n                                        {set_service_manager,<0.11522.164>},\n                                        infinity]}}}}]}}},\n                               [{service_manager,set_service_manager,1,\n                                 [{file,\"src/service_manager.erl\"},\n                                  {line,188}]},\n                                {service_manager,run_op,1,\n                                 [{file,\"src/service_manager.erl\"},\n                                  {line,146}]},\n                                {proc_lib,init_p,3,\n                                 [{file,\"proc_lib.erl\"},{line,225}]}]}}."}
      

      Rebalance failure 4

      {"stageInfo":{"index":{"startTime":"2023-11-28T13:15:48.403-08:00","completedTime":false,"timeTaken":50134},"data":{"totalProgress":100,"perNodeProgress":{"ns_1@172.23.96.230":1,"ns_1@172.23.96.245":1,"ns_1@172.23.97.66":1,"ns_1@172.23.96.198":1},"startTime":"2023-11-28T13:15:44.997-08:00","completedTime":"2023-11-28T13:15:48.393-08:00","timeTaken":3396},"query":{"totalProgress":100,"perNodeProgress":{"ns_1@172.23.97.100":1,"ns_1@172.23.97.67":1},"startTime":"2023-11-28T13:15:48.393-08:00","completedTime":"2023-11-28T13:15:48.403-08:00","timeTaken":10}},"rebalanceId":"b6ed96eb2a1218143e6c3104abcedf2f","nodesInfo":{"active_nodes":["ns_1@172.23.105.122","ns_1@172.23.106.171","ns_1@172.23.106.176","ns_1@172.23.106.30","ns_1@172.23.96.198","ns_1@172.23.96.230","ns_1@172.23.96.245","ns_1@172.23.97.100","ns_1@172.23.97.108","ns_1@172.23.97.109","ns_1@172.23.97.66","ns_1@172.23.97.67"],"keep_nodes":["ns_1@172.23.105.122","ns_1@172.23.106.171","ns_1@172.23.106.176","ns_1@172.23.106.30","ns_1@172.23.96.198","ns_1@172.23.96.230","ns_1@172.23.96.245","ns_1@172.23.97.100","ns_1@172.23.97.108","ns_1@172.23.97.109","ns_1@172.23.97.66","ns_1@172.23.97.67"],"eject_nodes":[],"delta_nodes":[],"failed_nodes":[]},"masterNode":"ns_1@172.23.97.67","startTime":"2023-11-28T13:15:44.993-08:00","completedTime":"2023-11-28T13:16:38.537-08:00","timeTaken":53543,"completionMessage":"Rebalance exited with reason {service_rebalance_failed,index,\n                              {{badmatch,\n                                {error,\n                                 {bad_nodes,index,set_service_manager,\n                                  [{'ns_1@172.23.97.108',\n                                    {exit,\n                                     {{linked_process_died,<34822.16592.101>,\n                                       {'ns_1@172.23.97.108',\n                                        {no_connection,\"index-service_api\"}}},\n                                      {gen_server,call,\n                                       [{'service_agent-index',\n                                         'ns_1@172.23.97.108'},\n                                        {set_service_manager,<0.2685.172>},\n                                        infinity]}}}}]}}},\n                               [{service_manager,set_service_manager,1,\n                                 [{file,\"src/service_manager.erl\"},\n                                  {line,188}]},\n                                {service_manager,run_op,1,\n                                 [{file,\"src/service_manager.erl\"},\n                                  {line,146}]},\n                                {proc_lib,init_p,3,\n                                 [{file,\"proc_lib.erl\"},{line,225}]}]}}."}
      
      

      Cbcollect logs:

      url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.105.122.zip
      url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.106.171.zip
      url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.106.176.zip
      url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.106.30.zip
      url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.96.198.zip
      url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.96.230.zip
      url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.96.245.zip
      url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.97.100.zip
      url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.97.109.zip
      url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.97.66.zip
      url : https://cb-jira.s3.us-east-2.amazonaws.com/logs/systestmon-1701208645/collectinfo-2023-11-28T220811-ns_1%40172.23.97.67.zip

      Attachments

        Issue Links

          No reviews matched the request. Check your Options in the drop-down menu of this sections header.

          Activity

            People

              pavan.pb Pavan PB
              pavan.pb Pavan PB
              Votes:
              0 Vote for this issue
              Watchers:
              3 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Gerrit Reviews

                  There are no open Gerrit changes

                  PagerDuty