Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-14388

[GoXDCR-System test] assertion failed [st <= static_cast<uint64_t>(lastBySeqno) && static_cast<uint64_t>(lastBySeqno) <= en] [was: Rebalance failed with {badmatch,{error,timeout}]

    XMLWordPrintable

Details

    • Bug
    • Resolution: Fixed
    • Blocker
    • 4.0.0
    • 4.0.0
    • couchbase-bucket
    • Security Level: Public
    • None
    • centOS 6.x, 4 cores, 15Gb RAM - each node

    Description

      Build


      4.0.0-1767

      Clusters
      -----------
      C1 : http://172.23.105.44:8091/
      C2 : http://172.23.105.54:8091/
      The clusters are available for investigation.

      What we do in XDCR System test
      ------------------------------
      1. Load on both clusters C1[8 nodes], C2[8 nodes] till vb_active_resident_items_ratio < 50 for standardbucket, <70 for standardbucket1
      2. Create xdcr:
      C1.standardbucket <--> C2.standardbucket , no filter
      C1.standardbucket1 --> C1.standardbucket1 , no filter
      no replication on sasl bucket.
      2. Access phase with 98% gets, 2%sets runs for 3 hours
      3. Rebalance-out 1 node at cluster1 with workload [high dgm ~4%]
      4. Rebalance-in the same node with workload
      5. Failover one node(172.23.105.47) with workload and rebalance. Rebalance failed here, although .47 is not a part of C1 anymore.

      goxdcr crashes have taken over UI log. Copying rebalance failure log from system test worker-

      {u'node': u'ns_1@172.23.105.44', u'code': 2, u'text': u'Rebalance exited with reason {unexpected_exit,\n {\'EXIT\',<0.25682.23>,\n {wait_seqno_persisted_failed,\n "standardbucket1",20,24948,\n [{\'ns_1@172.23.105.50\',\n {\'EXIT\',\n {{badmatch,{error,timeout,\n [{mc_client_binary,cmd_vocal_recv,5,\n [

      {file,"src/mc_client_binary.erl"},\n {line,156}]},\n {mc_client_binary,select_bucket,2,\n [{file,"src/mc_client_binary.erl"}

      ,\n

      {line,351}]},\n {ns_memcached,ensure_bucket,2,\n [{file,"src/ns_memcached.erl"},\n {line,1291}]},\n {ns_memcached,handle_info,2,\n [{file,"src/ns_memcached.erl"},\n {line,745}]},\n {gen_server,handle_msg,5,\n [{file,"gen_server.erl"},{line,604}]},\n {ns_memcached,init,1,\n [{file,"src/ns_memcached.erl"},\n {line,174}]},\n {gen_server,init_it,6,\n [{file,"gen_server.erl"},{line,304}]},\n {proc_lib,init_p_do_apply,3,\n [{file,"proc_lib.erl"},{line,239}]}]},\n {gen_server,call,\n [\'ns_memcached-standardbucket1\',\n {set_vbucket,692,active},\n 180000]}},\n {gen_server,call,\n [{\'janitor_agent-standardbucket1\',\n \'ns_1@172.23.105.50\'},\n {if_rebalance,<0.25156.23>,\n {wait_seqno_persisted,20,24948}},\n infinity]}}}}]}}}\n', u'shortText': u'message', u'serverTime': u'2015-04-09T01:19:26.821Z', u'module': u'ns_orchestrator', u'tstamp': 1428567566821, u'type': u'info'}
      [2015-04-09 01:23:02,036: ERROR/Worker-7] {u'node': u'ns_1@172.23.105.44', u'code': 0, u'text': u'<0.24917.23> exited with {unexpected_exit,\n {\'EXIT\',<0.25682.23>,\n {wait_seqno_persisted_failed,"standardbucket1",20,\n 24948,\n [{\'ns_1@172.23.105.50\',\n {\'EXIT\',\n {{badmatch,{error,timeout,\n [{mc_client_binary,cmd_vocal_recv,5,\n [{file,"src/mc_client_binary.erl"},\n {line,156}]},\n {mc_client_binary,select_bucket,2,\n [{file,"src/mc_client_binary.erl"},\n {line,351}

      ]},\n {ns_memcached,ensure_bucket,2,\n [

      {file,"src/ns_memcached.erl"},\n {line,1291}]},\n {ns_memcached,handle_info,2,\n [{file,"src/ns_memcached.erl"}

      ,\n

      {line,745}

      ]},\n {gen_server,handle_msg,5,\n [

      {file,"gen_server.erl"},{line,604}]},\n {ns_memcached,init,1,\n [{file,"src/ns_memcached.erl"},\n {line,174}]},\n {gen_server,init_it,6,\n [{file,"gen_server.erl"}

      ,

      {line,304}

      ]},\n {proc_lib,init_p_do_apply,3,\n [

      {file,"proc_lib.erl"}

      ,

      {line,239}

      ]}]},\n {gen_server,call,\n [\'ns_memcached-standardbucket1\',\n

      {set_vbucket,692,active}

      ,\n 180000]}},\n {gen_server,call,\n [

      {\'janitor_agent-standardbucket1\',\n \'ns_1@172.23.105.50\'}

      ,\n {if_rebalance,<0.25156.23>,\n {wait_seqno_persisted,20,24948}},\n infinity]}}}}]}}}', u'shortText': u'message', u'serverTime': u'2015-04-09T01:19:26.794Z', u'module': u'ns_vbucket_mover', u'tstamp': 1428567566794, u'type': u'critical'}
      [2015-04-09 01:23:02,037: ERROR/Worker-7] {u'node': u'ns_1@172.23.105.50', u'code': 0, u'text': u'Control connection to memcached on \'ns_1@172.23.105.50\' disconnected: badmatch,\n {error,\n timeout,\n [{mc_client_binary,\n cmd_vocal_recv,\n 5,\n [

      {file,\n "src/mc_client_binary.erl"},\n {line,\n 156}]},\n {mc_client_binary,\n select_bucket,\n 2,\n [{file,n "src/mc_client_binary.erl"}

      ,\n

      {line,\n 351}

      ]},\n {ns_memcached,\n ensure_bucket,\n 2,\n [

      {file,\n "src/ns_memcached.erl"},\n {line,\n 1291}]},\n {ns_memcached,\n handle_info,\n 2,\n [{file,n "src/ns_memcached.erl"}

      ,\n

      {line,\n 745}

      ]},\n {gen_server,\n handle_msg,\n 5,\n [

      {file,\n "gen_server.erl"},\n {line,\n 604}]},\n {ns_memcached,\n init,\n 1,\n [{file,\n "src/ns_memcached.erl"},\n {line,\n 174}]},\n {gen_server,\n init_it,\n 6,\n [{file,n "gen_server.erl"}

      ,\n

      {line,\n 304}

      ]},\n {proc_lib,\n init_p_do_apply,\n 3,\n [

      {file,\n "proc_lib.erl"}

      ,\n

      {line,\n 239}

      ]}]}', u'shortText': u'message', u'serverTime': u'2015-04-09T01:19:17.022Z', u'module': u'ns_memcached', u'tstamp': 1428567557022, u'type': u'info'}
      [2015-04-09 01:23:02,037: ERROR/Worker-7] {u'node': u'ns_1@172.23.105.48', u'code': 5, u'text': u"Node 'ns_1@172.23.105.48' saw that node 'ns_1@172.23.105.47' went down. Details: [

      {nodedown_reason,\n connection_closed}]", u'shortText': u'node down', u'serverTime': u'2015-04-09T01:17:20.534Z', u'module': u'ns_node_disco', u'tstamp': 1428567440534, u'type': u'warning'} {u'node': u'ns_1@172.23.105.52', u'code': 5, u'text': u"Node 'ns_1@172.23.105.52' saw that node 'ns_1@172.23.105.47' went down. Details: [{nodedown_reason,n connection_closed}

      ]", u'shortText': u'node down', u'serverTime': u'2015-04-09T01:17:20.531Z', u'module': u'ns_node_disco', u'tstamp': 1428567440531, u'type': u'warning'}{u'node': u'ns_1@172.23.105.49', u'code': 5, u'text': u"Node 'ns_1@172.23.105.49' saw that node 'ns_1@172.23.105.47' went down. Details: [

      {nodedown_reason,\n connection_closed}]", u'shortText': u'node down', u'serverTime': u'2015-04-09T01:17:20.530Z', u'module': u'ns_node_disco', u'tstamp': 1428567440530, u'type': u'warning'}{u'node': u'ns_1@172.23.105.50', u'code': 5, u'text': u"Node 'ns_1@172.23.105.50' saw that node 'ns_1@172.23.105.47' went down. Details: [{nodedown_reason,n connection_closed}

      ]", u'shortText': u'node down', u'serverTime': u'2015-04-09T01:17:20.529Z', u'module': u'ns_node_disco', u'tstamp': 1428567440529, u'type': u'warning'}{u'node': u'ns_1@172.23.105.51', u'code': 5, u'text': u"Node 'ns_1@172.23.105.51' saw that node 'ns_1@172.23.105.47' went down. Details: [

      {nodedown_reason,\n connection_closed}

      ]", u'shortText': u'node down', u'serverTime': u'2015-04-09T01:17:20.527Z', u'module': u'ns_node_disco', u'tstamp': 1428567440527, u'type': u'warning'}
      Attaching logs from C1.

      Attachments

        Issue Links

          No reviews matched the request. Check your Options in the drop-down menu of this sections header.

          Activity

            People

              apiravi Aruna Piravi (Inactive)
              apiravi Aruna Piravi (Inactive)
              Votes:
              0 Vote for this issue
              Watchers:
              8 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Gerrit Reviews

                  There are no open Gerrit changes

                  PagerDuty