Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-39772

Data mismatch with rebalance + timers + curl

    XMLWordPrintable

    Details

    • Type: Bug
    • Status: Closed
    • Priority: Critical
    • Resolution: Fixed
    • Affects Version/s: 6.6.0
    • Fix Version/s: 6.6.0
    • Component/s: test-execution
    • Labels:
      None
    • Triage:
      Untriaged
    • Story Points:
      1
    • Is this a Regression?:
      Yes

      Description

      Build: 6.6.0-7737, passed on 6.6.0-7692

      We use the following handler for rebalancing in / out and swap

       function OnUpdate(doc, meta) {
          var expiry = new Date();
          expiry.setSeconds(expiry.getSeconds() + 30);
       
       
          var context = {docID : meta.id, random_text : "e6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh07Aumoe6cZZGHuh07Aumoe6cZZGHuh07Aumoe6"};
          createTimer(timerCallback,  expiry, meta.id, context);
      }
       
       
       
       
       
       
      function OnDelete(meta) {
          var expiry = new Date();
          expiry.setSeconds(expiry.getSeconds() + 30);
       
       
          var context = {docID : meta.id};
          createTimer(NDtimerCallback,  expiry, meta.id, context);
      }
       
       
      function NDtimerCallback(context) {
          var request = {
      	path : 'post?param=text',
      	headers: {
          "cache-control": "no-cache",
          "Postman-Token": "a3e931fe-8fe2-413c-be82-546062d28377"
          },
          body: "This is expected to be sent back as part of response body."
          };
          try {
          	var response = curl("POST", server, request);
          	log('response body received from server:', response.body);
          	log('response headers received from server:', response.headers);
          	log('response status received from server:', response.status);
          	var res= new Uint8Array(response.body);
          	if(response.status == 200){
          	    delete dst_bucket[context.docID];
          	}
          	else{
          	    delete dst_bucket[context.docID];
          	}
          }
          catch (e) {
          	log('error:', e);
              }
      }
      function timerCallback(context) {
          var request = {
      	path : 'post?param=text',
      	headers: {
          "cache-control": "no-cache",
          "Postman-Token": "a3e931fe-8fe2-413c-be82-546062d28377"
          },
          body: "This is expected to be sent back as part of response body."
          };
          try {
          	var response = curl("POST", server, request);
          	log('response body received from server:', response.body);
          	log('response headers received from server:', response.headers);
          	log('response status received from server:', response.status);
          	var res= new Uint8Array(response.body);
          	if(response.status == 200){
          	    dst_bucket[context.docID]=response.body;
          	}
          	else{
          	    dst_bucket[context.docID]=response.status;
          	}
          }
          catch (e) {
          	log('error:', e);
              }
      }

      Failed test with mismatch

      /testrunner -i /tmp/testexec.11548.ini -p get-cbcollect-info=True,GROUP=timer_op_curl -t eventing.eventing_rebalance.EventingRebalance.test_eventing_rebalance_out_when_existing_eventing_node_is_processing_mutations,nodes_init=5,services_init=kv-eventing-eventing-index-n1ql,dataset=default,groups=simple,reset_services=True,doc-per-day=20,handler_code=timer_op_curl_post,curl=True,GROUP=timer_op_curl;timer_op_curl_windows
       
       Exception: duplicated data in destination bucket which is not expected. Current : 2 Expected : 0  dcp_backlog : 0  TIMERS_IN_PAST : 0 lcb_exceptions : {'172.23.105.122': {}}

      ./testrunner -i /tmp/testexec.11548.ini -p get-cbcollect-info=True,GROUP=timer_op_curl -t eventing.eventing_rebalance.EventingRebalance.test_eventing_swap_rebalance_when_existing_eventing_node_is_processing_mutations,nodes_init=5,services_init=kv-eventing-eventing-index-n1ql,dataset=default,groups=simple,reset_services=True,doc-per-day=20,handler_code=timer_op_curl_put,curl=True,GROUP=timer_op_curl;timer_op_curl_windows
       
       
       
       
      Exception: missing data in destination bucket. Current : 40318 Expected : 40320  dcp_backlog : 0  TIMERS_IN_PAST : 0 lcb_exceptions : {'172.23.105.185': {}} 

      ./testrunner -i /tmp/testexec.11548.ini -p get-cbcollect-info=True,GROUP=timer_op_curl -t eventing.eventing_rebalance.EventingRebalance.test_kv_rebalance_out_when_existing_eventing_node_is_processing_mutations,nodes_init=7,services_init=kv-kv-kv-eventing-eventing-eventing-index:n1ql,dataset=default,groups=simple,reset_services=True,doc-per-day=20,handler_code=timer_op_curl_post,curl=True,GROUP=timer_op_curl;timer_op_curl_windows
       
      Exception: duplicated data in destination bucket which is not expected. Current : 1 Expected : 0  dcp_backlog : 0  TIMERS_IN_PAST : 0 lcb_exceptions : {'172.23.105.163': {}, '172.23.105.137': {}, '172.23.105.185': {}} 

      ./testrunner -i /tmp/testexec.11548.ini -p get-cbcollect-info=True,GROUP=timer_op_curl -t eventing.eventing_rebalance.EventingRebalance.test_kv_swap_rebalance_when_existing_eventing_node_is_processing_mutations,nodes_init=6,services_init=kv-kv-eventing-eventing-eventing-index:n1ql,dataset=default,groups=simple,reset_services=True,doc-per-day=20,handler_code=timer_op_curl_put,curl=True,GROUP=timer_op_curl;timer_op_curl_windows
       
      Exception: duplicated data in destination bucket which is not expected. Current : 3 Expected : 0  dcp_backlog : 0  TIMERS_IN_PAST : 0 lcb_exceptions : {'172.23.105.163': {}, '172.23.105.130': {}, '172.23.105.137': {}} 

      ./testrunner -i /tmp/testexec.11548.ini -p get-cbcollect-info=True,GROUP=timer_op_curl -t eventing.eventing_rebalance.EventingRebalance.test_eventing_rebalance_with_multiple_eventing_nodes,doc-per-day=5,dataset=default,nodes_init=5,services_init=kv-eventing-eventing-eventing-index:n1ql,groups=simple,reset_services=True,handler_code=timer_op_curl_delete,curl=True,GROUP=timer_op_curl
       
       
       
       
      Exception: missing data in destination bucket. Current : 10055 Expected : 10080  dcp_backlog : 0  TIMERS_IN_PAST : 0 lcb_exceptions : {'172.23.105.188': {}, '172.23.105.130': {}, '172.23.105.137': {}, '172.23.105.122': {}, '172.23.105.185': {}}
      
      

      ./testrunner -i /tmp/testexec.11548.ini -p get-cbcollect-info=True,GROUP=timer_op_curl -t eventing.eventing_rebalance.EventingRebalance.test_eventing_rebalance_with_multiple_kv_nodes,doc-per-day=5,dataset=default,nodes_init=5,services_init=kv-kv-kv-eventing-index:n1ql,groups=simple,reset_services=True,handler_code=timer_op_curl_get,curl=True,GROUP=timer_op_curl
       
       
      Exception: missing data in destination bucket. Current : 10078 Expected : 10080  dcp_backlog : 0  TIMERS_IN_PAST : 0 lcb_exceptions : {'172.23.105.137': {}} 

      Logs attached for fail 1, fail 2 and fail 6 (direct node zips - it must have logs for all the tests)

        Attachments

        1. 172.23.105.10-20200604-0724-diag.zip
          33.33 MB
        2. 172.23.105.122-20200604-0726-diag.zip
          16.34 MB
        3. 172.23.105.130-20200604-0728-diag.zip
          15.52 MB
        4. 172.23.105.137-20200604-0730-diag.zip
          12.15 MB
        5. 172.23.105.163-20200604-0731-diag.zip
          9.97 MB
        6. 172.23.105.185-20200604-0733-diag.zip
          14.15 MB
        7. 172.23.105.188-20200604-0735-diag.zip
          17.96 MB
        8. 172.23.108.238-20200604-0738-diag.zip
          3.63 MB
        9. test_2.zip
          36.63 MB
        10. test_3.zip
          42.51 MB
        11. test.log
          360 kB
        No reviews matched the request. Check your Options in the drop-down menu of this sections header.

          Activity

          Hide
          jeelan.poola Jeelan Poola added a comment - - edited

          Vikas Chaudhary There are no timer changes between last pass & offending build. Its is likely that the tests are racy. Like NDtimerCallback() racing with timerCallback(). Could you please confirm if these are failing consistently with latest build?

          To dissect further, could you please re-run these with Build couchbase-server-6.6.0-7726 & Build couchbase-server-6.6.0-7727? Also, do you see any timer or context records left out in the metadata bucket at the end of the test? Thank you!

          http://172.23.123.43:8000/getchangelog?product=couchbase-server&fromb=6.6.0-7692&tob=6.6.0-7737

          CHANGELOG for eventing

          • Commit: 2758effd3c0ddf85cca8b2d4ab940d56ab90e907 in build: 6.6.0-7727
            MB-38321: cap queue sizes at 61MB to smoothen throttling as new

          handlers get deployed. For e.g, when eventing quota is 3GB and only
          one function is deployed queues size can bloat upto 500MB. Now when a
          second function is deployed, queue cap suddenly drops to 250MB. This
          will cause prolonged period of throttling until queue size comes down
          to 250MB or less. In case of slow functions this can take hours.
          And we will not be reading mutations from DCP conenctions in this
          case causing KV to back up. Which in turn causes any further new
          handlers that get deployed to not recieve mutations for hours.

          Change-Id: I4491fd652bd239da1b44599a52da99d34f1a2004
          Reviewed-on: http://review.couchbase.org/c/eventing/+/128603
          Tested-by: Jeelan Basha Poola <jeelan.poola@couchbase.com>
          Reviewed-by: CI Bot
          Reviewed-by: <ankit.prabhu@couchbase.com>
          Reviewed-on: http://review.couchbase.org/c/eventing/+/129480
          Well-Formed: Build Bot <build@couchbase.com>
          Reviewed-by: Jeelan Basha Poola <jeelan.poola@couchbase.com>

          • Commit: ac66bc25f39e30a0d10f096f4b0749305dd8119c in build: 6.6.0-7727
            MB-39642: Check for mandatory function OnUpdate or OnDelete

          Change-Id: I8fdeb6c5c0432e7c8eabdb46e5d045f6d38a0ab5
          Reviewed-on: http://review.couchbase.org/c/eventing/+/128948
          Reviewed-by: Jeelan Basha Poola <jeelan.poola@couchbase.com>
          Reviewed-by: CI Bot
          Tested-by: <ankit.prabhu@couchbase.com>
          Reviewed-on: http://review.couchbase.org/c/eventing/+/129477
          Well-Formed: Build Bot <build@couchbase.com>
          Tested-by: Jeelan Basha Poola <jeelan.poola@couchbase.com>

          Show
          jeelan.poola Jeelan Poola added a comment - - edited Vikas Chaudhary There are no timer changes between last pass & offending build. Its is likely that the tests are racy. Like NDtimerCallback() racing with timerCallback(). Could you please confirm if these are failing consistently with latest build? To dissect further, could you please re-run these with Build couchbase-server-6.6.0-7726 & Build couchbase-server-6.6.0-7727? Also, do you see any timer or context records left out in the metadata bucket at the end of the test? Thank you! http://172.23.123.43:8000/getchangelog?product=couchbase-server&fromb=6.6.0-7692&tob=6.6.0-7737 CHANGELOG for eventing Commit: 2758effd3c0ddf85cca8b2d4ab940d56ab90e907 in build: 6.6.0-7727 MB-38321 : cap queue sizes at 61MB to smoothen throttling as new handlers get deployed. For e.g, when eventing quota is 3GB and only one function is deployed queues size can bloat upto 500MB. Now when a second function is deployed, queue cap suddenly drops to 250MB. This will cause prolonged period of throttling until queue size comes down to 250MB or less. In case of slow functions this can take hours. And we will not be reading mutations from DCP conenctions in this case causing KV to back up. Which in turn causes any further new handlers that get deployed to not recieve mutations for hours. Change-Id: I4491fd652bd239da1b44599a52da99d34f1a2004 Reviewed-on: http://review.couchbase.org/c/eventing/+/128603 Tested-by: Jeelan Basha Poola <jeelan.poola@couchbase.com> Reviewed-by: CI Bot Reviewed-by: <ankit.prabhu@couchbase.com> Reviewed-on: http://review.couchbase.org/c/eventing/+/129480 Well-Formed: Build Bot <build@couchbase.com> Reviewed-by: Jeelan Basha Poola <jeelan.poola@couchbase.com> Commit: ac66bc25f39e30a0d10f096f4b0749305dd8119c in build: 6.6.0-7727 MB-39642 : Check for mandatory function OnUpdate or OnDelete Change-Id: I8fdeb6c5c0432e7c8eabdb46e5d045f6d38a0ab5 Reviewed-on: http://review.couchbase.org/c/eventing/+/128948 Reviewed-by: Jeelan Basha Poola <jeelan.poola@couchbase.com> Reviewed-by: CI Bot Tested-by: <ankit.prabhu@couchbase.com> Reviewed-on: http://review.couchbase.org/c/eventing/+/129477 Well-Formed: Build Bot <build@couchbase.com> Tested-by: Jeelan Basha Poola <jeelan.poola@couchbase.com>
          Hide
          jeelan.poola Jeelan Poola added a comment - - edited

          Vikas Chaudhary, very likely there is some exception being thrown causing the bucket op to dst_bucket to be skipped. As per the stats, all the timers fired and accounted.

                      "on_update_success": 10080, 
                      "timer_cancel_counter": 0, 
                      "timer_create_counter": 10080, 
                      "timer_create_failure": 0, 
                      "timer_msg_counter": 10080,
          

          Could you please add what we discussed (application log, select *, alarm&context docs in metadata bucket) to tests and share the details of a failed run. Thank you!

          Show
          jeelan.poola Jeelan Poola added a comment - - edited Vikas Chaudhary , very likely there is some exception being thrown causing the bucket op to dst_bucket to be skipped. As per the stats, all the timers fired and accounted. "on_update_success": 10080, "timer_cancel_counter": 0, "timer_create_counter": 10080, "timer_create_failure": 0, "timer_msg_counter": 10080, Could you please add what we discussed (application log, select *, alarm&context docs in metadata bucket) to tests and share the details of a failed run. Thank you!
          Hide
          vikas.chaudhary Vikas Chaudhary added a comment -

          Jeelan Poola please find the rerun here http://qa.sc.couchbase.com/job/dev_testbed_blr2/90/consoleText 

          extra debugging info

          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [tuq_helper.run_cbq_query] RUN QUERY select RAW count(*) from dst_bucket
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [rest_client.query_tool] query params : statement=select+RAW+count%28%2A%29+from+dst_bucket
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [tuq_helper.run_cbq_query] TOTAL ELAPSED TIME: 17.416531ms
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_document_count_via_index] ================== number of doc in dst_bucket via index ===========================
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_document_count_via_index] [40317]
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_document_count_via_index] ===============================================================================================
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [tuq_helper.run_cbq_query] RUN QUERY select RAW count(0) from metadata where meta().id like 'eventing:%:al%'
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [rest_client.query_tool] query params : statement=select+RAW+count%280%29+from+metadata+where+meta%28%29.id+like+%27eventing%3A%25%3Aal%25%27
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [tuq_helper.run_cbq_query] TOTAL ELAPSED TIME: 24.665351ms
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_timer_alarm_context] ================== alarm documents in metadata ================================================
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_timer_alarm_context] {u'status': u'success', u'metrics': {u'elapsedTime': u'24.665351ms', u'executionTime': u'24.59241ms', u'resultSize': 1, u'resultCount': 1}, u'results': [0], u'requestID': u'6405429c-fff7-460e-9f68-d76c4704e306', u'signature': u'number'}
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_timer_alarm_context] ===============================================================================================
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [tuq_helper.run_cbq_query] RUN QUERY select RAW count(0) from metadata where meta().id like 'eventing:%:cx%'
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [rest_client.query_tool] query params : statement=select+RAW+count%280%29+from+metadata+where+meta%28%29.id+like+%27eventing%3A%25%3Acx%25%27
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [tuq_helper.run_cbq_query] TOTAL ELAPSED TIME: 25.456602ms
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_timer_alarm_context] ================== context documents in metadata  =============================================
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_timer_alarm_context] {u'status': u'success', u'metrics': {u'elapsedTime': u'25.456602ms', u'executionTime': u'25.38011ms', u'resultSize': 1, u'resultCount': 1}, u'results': [0], u'requestID': u'449d7e94-fc85-48ab-a119-ba77b08c3f69', u'signature': u'number'}
          2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_timer_alarm_context] =============================================================================================== 

          Show
          vikas.chaudhary Vikas Chaudhary added a comment - Jeelan Poola  please find the rerun here  http://qa.sc.couchbase.com/job/dev_testbed_blr2/90/consoleText   extra debugging info 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [tuq_helper.run_cbq_query] RUN QUERY select RAW count(*) from dst_bucket 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [rest_client.query_tool] query params : statement=select+RAW+count%28%2A%29+from+dst_bucket 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [tuq_helper.run_cbq_query] TOTAL ELAPSED TIME: 17.416531ms 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_document_count_via_index] ================== number of doc in dst_bucket via index =========================== 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_document_count_via_index] [40317] 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_document_count_via_index] =============================================================================================== 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [tuq_helper.run_cbq_query] RUN QUERY select RAW count(0) from metadata where meta().id like 'eventing:%:al%' 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [rest_client.query_tool] query params : statement=select+RAW+count%280%29+from+metadata+where+meta%28%29.id+like+%27eventing%3A%25%3Aal%25%27 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [tuq_helper.run_cbq_query] TOTAL ELAPSED TIME: 24.665351ms 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_timer_alarm_context] ================== alarm documents in metadata ================================================ 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_timer_alarm_context] {u'status': u'success', u'metrics': {u'elapsedTime': u'24.665351ms', u'executionTime': u'24.59241ms', u'resultSize': 1, u'resultCount': 1}, u'results': [0], u'requestID': u'6405429c-fff7-460e-9f68-d76c4704e306', u'signature': u'number'} 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_timer_alarm_context] =============================================================================================== 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [tuq_helper.run_cbq_query] RUN QUERY select RAW count(0) from metadata where meta().id like 'eventing:%:cx%' 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [rest_client.query_tool] query params : statement=select+RAW+count%280%29+from+metadata+where+meta%28%29.id+like+%27eventing%3A%25%3Acx%25%27 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [tuq_helper.run_cbq_query] TOTAL ELAPSED TIME: 25.456602ms 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_timer_alarm_context] ================== context documents in metadata ============================================= 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_timer_alarm_context] {u'status': u'success', u'metrics': {u'elapsedTime': u'25.456602ms', u'executionTime': u'25.38011ms', u'resultSize': 1, u'resultCount': 1}, u'results': [0], u'requestID': u'449d7e94-fc85-48ab-a119-ba77b08c3f69', u'signature': u'number'} 2020-06-09 13:34:28 | INFO | MainProcess | test_thread | [eventing_base.print_timer_alarm_context] ===============================================================================================
          Hide
          jeelan.poola Jeelan Poola added a comment -

          Vikas Chaudhary As you can see from the extra debug info above, there are no alarm or context objects left in metadata bucket, which means all timers actually fired (eventing deletes them only after timer fires). And we can also see that dst_bucket had only [40317] docs. And there were no lcb_exceptions, which basically means, some exception was thrown causing bucket op to dst_bucket to be skipped.

          Are you logging application logs as well? I did not see that in test log. Please confirm where I can see it.

          Show
          jeelan.poola Jeelan Poola added a comment - Vikas Chaudhary As you can see from the extra debug info above, there are no alarm or context objects left in metadata bucket, which means all timers actually fired (eventing deletes them only after timer fires). And we can also see that dst_bucket had only [40317] docs. And there were no lcb_exceptions, which basically means, some exception was thrown causing bucket op to dst_bucket to be skipped. Are you logging application logs as well? I did not see that in test log. Please confirm where I can see it.
          Hide
          vikas.chaudhary Vikas Chaudhary added a comment -

          Jeelan Poola we have errors which are not captured in the stats

          2020-06-09T22:38:01.569-07:00 [INFO] "error:" {"message":"Unable to perform the request: Timeout was reached","stack":"Error\n    at Function.NDtimerCallback (Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m.js:30:21)"} 

          [
              {
                  "dcp_feed_boundary": "everything",
                  "event_processing_stats": {
                      "adhoc_timer_response_received": 15,
                      "agg_messages_sent_to_worker": 99710,
                      "agg_queue_memory": 0,
                      "agg_queue_memory_cap": 106954752,
                      "agg_queue_size": 0,
                      "agg_queue_size_cap": 300000,
                      "agg_timer_feedback_queue_cap": 1500,
                      "dcp_deletion": 40320,
                      "dcp_deletion_sent_to_worker": 40320,
                      "dcp_mutation": 20160,
                      "dcp_mutation_sent_to_worker": 20160,
                      "dcp_snapshot": 60476,
                      "dcp_stream_req_counter": 1536,
                      "dcp_streamend": 512,
                      "dcp_streamreq": 1536,
                      "execution_stats": 7431,
                      "failure_stats": 7431,
                      "latency_stats": 7431,
                      "lcb_exception_stats": 7431,
                      "log_level": 3,
                      "num_processed_events": 60480,
                      "processed_events_size": 25169088,
                      "thr_count": 3,
                      "thr_map": 3,
                      "timer_events": 80640,
                      "timer_responses_received": 60480,
                      "v8_init": 3,
                      "v8_load": 3
                  },
                  "events_remaining": {
                      "dcp_backlog": 0
                  },
                  "execution_stats": {
                      "agg_queue_memory": 0,
                      "agg_queue_size": 0,
                      "curl": {
                          "delete": 0,
                          "get": 0,
                          "head": 0,
                          "post": 80640,
                          "put": 0
                      },
                      "dcp_delete_msg_counter": 40320,
                      "dcp_delete_parse_failure": 0,
                      "dcp_mutation_msg_counter": 20160,
                      "dcp_mutation_parse_failure": 0,
                      "enqueued_dcp_delete_msg_counter": 40320,
                      "enqueued_dcp_mutation_msg_counter": 20160,
                      "enqueued_timer_msg_counter": 0,
                      "feedback_queue_size": 0,
                      "filtered_dcp_delete_counter": 0,
                      "filtered_dcp_mutation_counter": 0,
                      "lcb_retry_failure": 0,
                      "messages_parsed": 99698,
                      "num_processed_events": 60480,
                      "on_delete_failure": 0,
                      "on_delete_success": 40320,
                      "on_update_failure": 0,
                      "on_update_success": 20160,
                      "processed_events_size": 25169088,
                      "timer_cancel_counter": 0,
                      "timer_create_counter": 60480,
                      "timer_create_failure": 0,
                      "timer_msg_counter": 80640,
                      "timer_responses_sent": 0,
                      "timestamp": {
                          "970": "2020-06-10T05:40:22Z",
                          "971": "2020-06-10T05:40:22Z",
                          "972": "2020-06-10T05:40:22Z"
                      },
                      "uv_msg_parse_failure": 0,
                      "uv_try_write_failure_counter": 0
                  },
                  "failure_stats": {
                      "app_worker_setting_events_lost": 0,
                      "bucket_op_exception_count": 0,
                      "checkpoint_failure_count": 0,
                      "curl_non_200_response": 1,
                      "dcp_events_lost": 0,
                      "debugger_events_lost": 0,
                      "delete_events_lost": 0,
                      "mutation_events_lost": 0,
                      "n1ql_op_exception_count": 0,
                      "timeout_count": 0,
                      "timer_callback_missing_counter": 0,
                      "timer_context_size_exceeded_counter": 0,
                      "timer_events_lost": 0,
                      "timestamp": {
                          "970": "2020-06-10T05:40:22Z",
                          "971": "2020-06-10T05:40:22Z",
                          "972": "2020-06-10T05:40:22Z"
                      },
                      "v8worker_events_lost": 0
                  },
                  "function_name": "Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m",
                  "gocb_creds_request_counter": 8,
                  "function_id": 3504289858,
                  "internal_vb_distribution_stats": {
                      "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_0": "[0-341]",
                      "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_1": "[342-682]",
                      "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_2": "[683-1023]"
                  },
                  "latency_percentile_stats": {
                      "100": 18200,
                      "50": 900,
                      "80": 1100,
                      "90": 1300,
                      "95": 1500,
                      "99": 2300
                  },
                  "lcb_creds_request_counter": 12,
                  "lcb_exception_stats": {},
                  "planner_stats": [
                      {
                          "host_name": "172.23.105.186:8096",
                          "start_vb": 0,
                          "vb_count": 1024
                      }
                  ],
                  "metastore_stats": {
                      "metastore_delete_err": 0,
                      "metastore_deletes": 0,
                      "metastore_not_found": 0,
                      "metastore_scan": 0,
                      "metastore_scan_due": 0,
                      "metastore_scan_err": 0,
                      "metastore_set": 0,
                      "metastore_set_err": 0
                  },
                  "vb_distribution_stats_from_metadata": {
                      "172.23.105.186:8096": {
                          "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_0": "[0-341]",
                          "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_1": "[342-682]",
                          "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_2": "[683-1023]"
                      }
                  },
                  "worker_pids": {
                      "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_0": 970,
                      "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_1": 971,
                      "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_2": 972
                  }
              }
          ] 

          Show
          vikas.chaudhary Vikas Chaudhary added a comment - Jeelan Poola  we have errors which are not captured in the stats 2020-06-09T22:38:01.569-07:00 [INFO] "error:" {"message":"Unable to perform the request: Timeout was reached","stack":"Error\n at Function.NDtimerCallback (Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m.js:30:21)"} [     {         "dcp_feed_boundary": "everything",         "event_processing_stats": {             "adhoc_timer_response_received": 15,             "agg_messages_sent_to_worker": 99710,             "agg_queue_memory": 0,             "agg_queue_memory_cap": 106954752,             "agg_queue_size": 0,             "agg_queue_size_cap": 300000,             "agg_timer_feedback_queue_cap": 1500,             "dcp_deletion": 40320,             "dcp_deletion_sent_to_worker": 40320,             "dcp_mutation": 20160,             "dcp_mutation_sent_to_worker": 20160,             "dcp_snapshot": 60476,             "dcp_stream_req_counter": 1536,             "dcp_streamend": 512,             "dcp_streamreq": 1536,             "execution_stats": 7431,             "failure_stats": 7431,             "latency_stats": 7431,             "lcb_exception_stats": 7431,             "log_level": 3,             "num_processed_events": 60480,             "processed_events_size": 25169088,             "thr_count": 3,             "thr_map": 3,             "timer_events": 80640,             "timer_responses_received": 60480,             "v8_init": 3,             "v8_load": 3         },         "events_remaining": {             "dcp_backlog": 0         },         "execution_stats": {             "agg_queue_memory": 0,             "agg_queue_size": 0,             "curl": {                 "delete": 0,                 "get": 0,                 "head": 0,                 "post": 80640,                 "put": 0             },             "dcp_delete_msg_counter": 40320,             "dcp_delete_parse_failure": 0,             "dcp_mutation_msg_counter": 20160,             "dcp_mutation_parse_failure": 0,             "enqueued_dcp_delete_msg_counter": 40320,             "enqueued_dcp_mutation_msg_counter": 20160,             "enqueued_timer_msg_counter": 0,             "feedback_queue_size": 0,             "filtered_dcp_delete_counter": 0,             "filtered_dcp_mutation_counter": 0,             "lcb_retry_failure": 0,             "messages_parsed": 99698,             "num_processed_events": 60480,             "on_delete_failure": 0,             "on_delete_success": 40320,             "on_update_failure": 0,             "on_update_success": 20160,             "processed_events_size": 25169088,             "timer_cancel_counter": 0,             "timer_create_counter": 60480,             "timer_create_failure": 0,             "timer_msg_counter": 80640,             "timer_responses_sent": 0,             "timestamp": {                 "970": "2020-06-10T05:40:22Z",                 "971": "2020-06-10T05:40:22Z",                 "972": "2020-06-10T05:40:22Z"             },             "uv_msg_parse_failure": 0,             "uv_try_write_failure_counter": 0         },         "failure_stats": {             "app_worker_setting_events_lost": 0,             "bucket_op_exception_count": 0,             "checkpoint_failure_count": 0,             "curl_non_200_response": 1,             "dcp_events_lost": 0,             "debugger_events_lost": 0,             "delete_events_lost": 0,             "mutation_events_lost": 0,             "n1ql_op_exception_count": 0,             "timeout_count": 0,             "timer_callback_missing_counter": 0,             "timer_context_size_exceeded_counter": 0,             "timer_events_lost": 0,             "timestamp": {                 "970": "2020-06-10T05:40:22Z",                 "971": "2020-06-10T05:40:22Z",                 "972": "2020-06-10T05:40:22Z"             },             "v8worker_events_lost": 0         },         "function_name": "Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m",         "gocb_creds_request_counter": 8,         "function_id": 3504289858,         "internal_vb_distribution_stats": {             "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_0": "[0-341]",             "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_1": "[342-682]",             "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_2": "[683-1023]"         },         "latency_percentile_stats": {             "100": 18200,             "50": 900,             "80": 1100,             "90": 1300,             "95": 1500,             "99": 2300         },         "lcb_creds_request_counter": 12,         "lcb_exception_stats": {},         "planner_stats": [             {                 "host_name": "172.23.105.186:8096",                 "start_vb": 0,                 "vb_count": 1024             }         ],         "metastore_stats": {             "metastore_delete_err": 0,             "metastore_deletes": 0,             "metastore_not_found": 0,             "metastore_scan": 0,             "metastore_scan_due": 0,             "metastore_scan_err": 0,             "metastore_set": 0,             "metastore_set_err": 0         },         "vb_distribution_stats_from_metadata": {             "172.23.105.186:8096": {                 "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_0": "[0-341]",                 "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_1": "[342-682]",                 "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_2": "[683-1023]"             }         },         "worker_pids": {             "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_0": 970,             "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_1": 971,             "worker_Function_997275830_test_eventing_rebalance_out_when_existing_eventing_node_is_processing_m_2": 972         }     } ]
          Hide
          jeelan.poola Jeelan Poola added a comment -

          Vikas Chaudhary Yes, we do not have counters for curl exceptions. Could you please log a separate ticket for it?

          You may please look into why curl server was timing out in this test setup and close out this bug after resolving the issue.

          As discussed, another simple thing we can do is put dst_bucket[meta.id] = doc; in the catch block as well and see if test passes consistently. Assigning this back to you. Thank you for help!!

          Show
          jeelan.poola Jeelan Poola added a comment - Vikas Chaudhary Yes, we do not have counters for curl exceptions. Could you please log a separate ticket for it? You may please look into why curl server was timing out in this test setup and close out this bug after resolving the issue. As discussed, another simple thing we can do is put dst_bucket [meta.id] = doc; in the catch block as well and see if test passes consistently. Assigning this back to you. Thank you for help!!
          Hide
          vikas.chaudhary Vikas Chaudhary added a comment -

          filed MB-39862 

          Show
          vikas.chaudhary Vikas Chaudhary added a comment - filed  MB-39862  

            People

            Assignee:
            vikas.chaudhary Vikas Chaudhary
            Reporter:
            vikas.chaudhary Vikas Chaudhary
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

              Dates

              Created:
              Updated:
              Resolved:

                Gerrit Reviews

                There are no open Gerrit changes

                  PagerDuty