Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-33281

1 doc missing during source bucket mutation + rebalance

    XMLWordPrintable

Details

    • Untriaged
    • Unknown

    Description

      Build: 6.5.0-2547

      Seeing 1 doc missing with source bucket mutation with rebalance in kv,eventing (co-located)

      ./testrunner -i /tmp/testexec.14474.ini -p get-cbcollect-info=True,GROUP=source_bucket_mutation -t eventing.eventing_rebalance.EventingRebalance.test_rebalance_in_with_different_topologies,doc-per-day=20,dataset=default,nodes_init=4,services_init=kv-kv:eventing-kv:eventing-index:n1ql,groups=simple,services_in=kv,eventing,reset_services=True,handler_code=source_bucket_mutation,source_bucket_mutation=True,GROUP=source_bucket_mutation 
       
      Exception: Bucket operations from handler code took lot of time to complete or didn't go through. Current : 80639 Expected : 80640  dcp_backlog : 0  TIMERS_IN_PAST : 0 lcb_exceptions : {'172.23.97.226': {}, '172.23.97.224': {}, '172.23.97.222': {}}

      [2019-03-07 06:20:15,026] - [eventing_base:516] INFO - Stats for Node 172.23.97.226 is 
      [
          {
              "dcp_feed_boundary": "everything", 
              "event_processing_stats": {
                  "adhoc_timer_response_received": 3, 
                  "agg_messages_sent_to_worker": 9963, 
                  "agg_queue_memory": 0, 
                  "agg_queue_memory_cap": 132120576, 
                  "agg_queue_size": 0, 
                  "agg_queue_size_cap": 300000, 
                  "agg_timer_feedback_queue_cap": 1500, 
                  "agg_timer_feedback_queue_size": 0, 
                  "dcp_mutation": 936, 
                  "dcp_mutation_sent_to_worker": 232, 
                  "dcp_mutation_suppressed_counter": 704, 
                  "dcp_snapshot": 330, 
                  "dcp_stream_req_counter": 341, 
                  "dcp_streamreq": 341, 
                  "execution_stats": 1875, 
                  "failure_stats": 1875, 
                  "latency_stats": 1875, 
                  "lcb_exception_stats": 1875, 
                  "log_level": 3, 
                  "thr_count": 3, 
                  "thr_map": 3, 
                  "v8_init": 3, 
                  "v8_load": 3
              }, 
              "events_remaining": {
                  "dcp_backlog": 0
              }, 
              "execution_stats": {
                  "agg_queue_memory": 0, 
                  "agg_queue_size": 0, 
                  "dcp_delete_msg_counter": 0, 
                  "dcp_mutation_msg_counter": 232, 
                  "enqueued_dcp_delete_msg_counter": 0, 
                  "enqueued_dcp_mutation_msg_counter": 232, 
                  "enqueued_timer_msg_counter": 0, 
                  "feedback_queue_size": 0, 
                  "lcb_retry_failure": 0, 
                  "messages_parsed": 9951, 
                  "on_delete_failure": 0, 
                  "on_delete_success": 0, 
                  "on_update_failure": 0, 
                  "on_update_success": 232, 
                  "timer_create_failure": 0, 
                  "timer_msg_counter": 0, 
                  "timer_responses_sent": 0, 
                  "timestamp": {
                      "21711": "2019-03-07T14:20:14Z", 
                      "21713": "2019-03-07T14:20:14Z", 
                      "21714": "2019-03-07T14:20:14Z"
                  }, 
                  "uv_try_write_failure_counter": 0
              }, 
              "failure_stats": {
                  "app_worker_setting_events_lost": 0, 
                  "bucket_op_exception_count": 0, 
                  "checkpoint_failure_count": 0, 
                  "dcp_events_lost": 0, 
                  "debugger_events_lost": 0, 
                  "delete_events_lost": 0, 
                  "mutation_events_lost": 0, 
                  "n1ql_op_exception_count": 0, 
                  "timeout_count": 0, 
                  "timer_callback_missing_counter": 0, 
                  "timer_context_size_exceeded_counter": 0, 
                  "timer_events_lost": 0, 
                  "timestamp": {
                      "21711": "2019-03-07T14:20:14Z", 
                      "21713": "2019-03-07T14:20:14Z", 
                      "21714": "2019-03-07T14:20:14Z"
                  }, 
                  "v8worker_events_lost": 0
              }, 
              "function_id": 90482421, 
              "function_name": "Function_329142646_test_rebalance_in_with_different_topologies", 
              "gocb_creds_request_counter": 20, 
              "internal_vb_distribution_stats": {
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_0": "[683-796]", 
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_1": "[797-910]", 
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_2": "[911-1023]"
              }, 
              "latency_percentile_stats": {
                  "100": 19000, 
                  "50": 1000, 
                  "80": 1000, 
                  "90": 2000, 
                  "95": 4000, 
                  "99": 14000
              }, 
              "lcb_creds_request_counter": 24, 
              "lcb_exception_stats": {}, 
              "metastore_stats": {}, 
              "planner_stats": [
                  {
                      "host_name": "172.23.97.222:8096", 
                      "start_vb": 0, 
                      "vb_count": 342
                  }, 
                  {
                      "host_name": "172.23.97.224:8096", 
                      "start_vb": 342, 
                      "vb_count": 341
                  }, 
                  {
                      "host_name": "172.23.97.226:8096", 
                      "start_vb": 683, 
                      "vb_count": 341
                  }
              ], 
              "vb_distribution_stats_from_metadata": {
                  "172.23.97.222:8096": {
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_0": "[0-113]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_1": "[114-227]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_2": "[228-341]"
                  }, 
                  "172.23.97.224:8096": {
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_0": "[342-455]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_1": "[456-569]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_2": "[570-682]"
                  }, 
                  "172.23.97.226:8096": {
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_0": "[683-796]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_1": "[797-910]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_2": "[911-1023]"
                  }
              }, 
              "worker_pids": {
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_0": 21714, 
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_1": 21713, 
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_2": 21711
              }
          }
      ] 
      [2019-03-07 06:20:15,043] - [eventing_base:516] INFO - Stats for Node 172.23.97.222 is 
      [
          {
              "dcp_feed_boundary": "everything", 
              "event_processing_stats": {
                  "adhoc_timer_response_received": 13, 
                  "agg_messages_sent_to_worker": 32112, 
                  "agg_queue_memory": 0, 
                  "agg_queue_memory_cap": 132120576, 
                  "agg_queue_size": 0, 
                  "agg_queue_size_cap": 300000, 
                  "agg_timer_feedback_queue_cap": 1500, 
                  "agg_timer_feedback_queue_size": 0, 
                  "dcp_mutation": 40592, 
                  "dcp_mutation_sent_to_worker": 20108, 
                  "dcp_mutation_suppressed_counter": 20484, 
                  "dcp_snapshot": 40009, 
                  "dcp_stream_req_counter": 938, 
                  "dcp_streamend": 596, 
                  "dcp_streamreq": 938, 
                  "execution_stats": 2091, 
                  "failure_stats": 2091, 
                  "latency_stats": 2091, 
                  "lcb_exception_stats": 2091, 
                  "log_level": 3, 
                  "thr_count": 3, 
                  "thr_map": 3, 
                  "v8_init": 3, 
                  "v8_load": 3
              }, 
              "events_remaining": {
                  "dcp_backlog": 0
              }, 
              "execution_stats": {
                  "agg_queue_memory": 0, 
                  "agg_queue_size": 0, 
                  "dcp_delete_msg_counter": 0, 
                  "dcp_mutation_msg_counter": 20108, 
                  "enqueued_dcp_delete_msg_counter": 0, 
                  "enqueued_dcp_mutation_msg_counter": 20108, 
                  "enqueued_timer_msg_counter": 0, 
                  "feedback_queue_size": 0, 
                  "lcb_retry_failure": 0, 
                  "messages_parsed": 32100, 
                  "on_delete_failure": 0, 
                  "on_delete_success": 0, 
                  "on_update_failure": 0, 
                  "on_update_success": 20107, 
                  "timer_create_failure": 0, 
                  "timer_msg_counter": 0, 
                  "timer_responses_sent": 0, 
                  "timestamp": {
                      "27537": "2019-03-07T14:20:14Z", 
                      "27538": "2019-03-07T14:20:14Z", 
                      "27548": "2019-03-07T14:20:14Z"
                  }, 
                  "uv_try_write_failure_counter": 0
              }, 
              "failure_stats": {
                  "app_worker_setting_events_lost": 0, 
                  "bucket_op_exception_count": 0, 
                  "checkpoint_failure_count": 0, 
                  "dcp_events_lost": 0, 
                  "debugger_events_lost": 0, 
                  "delete_events_lost": 0, 
                  "mutation_events_lost": 0, 
                  "n1ql_op_exception_count": 0, 
                  "timeout_count": 0, 
                  "timer_callback_missing_counter": 0, 
                  "timer_context_size_exceeded_counter": 0, 
                  "timer_events_lost": 0, 
                  "timestamp": {
                      "27537": "2019-03-07T14:20:14Z", 
                      "27538": "2019-03-07T14:20:14Z", 
                      "27548": "2019-03-07T14:20:14Z"
                  }, 
                  "v8worker_events_lost": 0
              }, 
              "function_id": 90482421, 
              "function_name": "Function_329142646_test_rebalance_in_with_different_topologies", 
              "gocb_creds_request_counter": 20, 
              "internal_vb_distribution_stats": {
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_0": "[0-113]", 
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_1": "[114-227]", 
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_2": "[228-341]"
              }, 
              "latency_percentile_stats": {
                  "100": 50000, 
                  "50": 1000, 
                  "80": 1000, 
                  "90": 2000, 
                  "95": 4000, 
                  "99": 9000
              }, 
              "lcb_creds_request_counter": 28, 
              "lcb_exception_stats": {}, 
              "metastore_stats": {}, 
              "planner_stats": [
                  {
                      "host_name": "172.23.97.222:8096", 
                      "start_vb": 0, 
                      "vb_count": 342
                  }, 
                  {
                      "host_name": "172.23.97.224:8096", 
                      "start_vb": 342, 
                      "vb_count": 341
                  }, 
                  {
                      "host_name": "172.23.97.226:8096", 
                      "start_vb": 683, 
                      "vb_count": 341
                  }
              ], 
              "vb_distribution_stats_from_metadata": {
                  "172.23.97.222:8096": {
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_0": "[0-113]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_1": "[114-227]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_2": "[228-341]"
                  }, 
                  "172.23.97.224:8096": {
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_0": "[342-455]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_1": "[456-569]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_2": "[570-682]"
                  }, 
                  "172.23.97.226:8096": {
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_0": "[683-796]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_1": "[797-910]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_2": "[911-1023]"
                  }
              }, 
              "worker_pids": {
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_0": 27537, 
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_1": 27538, 
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_2": 27548
              }
          }
      ] 
      [2019-03-07 06:20:15,059] - [eventing_base:516] INFO - Stats for Node 172.23.97.224 is 
      [
          {
              "dcp_feed_boundary": "everything", 
              "event_processing_stats": {
                  "adhoc_timer_response_received": 12, 
                  "agg_messages_sent_to_worker": 32101, 
                  "agg_queue_memory": 0, 
                  "agg_queue_memory_cap": 132120576, 
                  "agg_queue_size": 0, 
                  "agg_queue_size_cap": 300000, 
                  "agg_timer_feedback_queue_cap": 1500, 
                  "agg_timer_feedback_queue_size": 0, 
                  "dcp_mutation": 40379, 
                  "dcp_mutation_sent_to_worker": 19996, 
                  "dcp_mutation_suppressed_counter": 20383, 
                  "dcp_snapshot": 39399, 
                  "dcp_stream_req_counter": 988, 
                  "dcp_streamend": 647, 
                  "dcp_streamreq": 988, 
                  "execution_stats": 2091, 
                  "failure_stats": 2091, 
                  "latency_stats": 2091, 
                  "lcb_exception_stats": 2091, 
                  "log_level": 3, 
                  "thr_count": 3, 
                  "thr_map": 3, 
                  "v8_init": 3, 
                  "v8_load": 3
              }, 
              "events_remaining": {
                  "dcp_backlog": 0
              }, 
              "execution_stats": {
                  "agg_queue_memory": 0, 
                  "agg_queue_size": 0, 
                  "dcp_delete_msg_counter": 0, 
                  "dcp_mutation_msg_counter": 19996, 
                  "enqueued_dcp_delete_msg_counter": 0, 
                  "enqueued_dcp_mutation_msg_counter": 19996, 
                  "enqueued_timer_msg_counter": 0, 
                  "feedback_queue_size": 0, 
                  "lcb_retry_failure": 0, 
                  "messages_parsed": 32089, 
                  "on_delete_failure": 0, 
                  "on_delete_success": 0, 
                  "on_update_failure": 0, 
                  "on_update_success": 19980, 
                  "timer_create_failure": 0, 
                  "timer_msg_counter": 0, 
                  "timer_responses_sent": 0, 
                  "timestamp": {
                      "21235": "2019-03-07T14:20:14Z", 
                      "21236": "2019-03-07T14:20:14Z", 
                      "21238": "2019-03-07T14:20:14Z"
                  }, 
                  "uv_try_write_failure_counter": 0
              }, 
              "failure_stats": {
                  "app_worker_setting_events_lost": 0, 
                  "bucket_op_exception_count": 0, 
                  "checkpoint_failure_count": 0, 
                  "dcp_events_lost": 0, 
                  "debugger_events_lost": 0, 
                  "delete_events_lost": 0, 
                  "mutation_events_lost": 0, 
                  "n1ql_op_exception_count": 0, 
                  "timeout_count": 0, 
                  "timer_callback_missing_counter": 0, 
                  "timer_context_size_exceeded_counter": 0, 
                  "timer_events_lost": 0, 
                  "timestamp": {
                      "21235": "2019-03-07T14:20:14Z", 
                      "21236": "2019-03-07T14:20:14Z", 
                      "21238": "2019-03-07T14:20:14Z"
                  }, 
                  "v8worker_events_lost": 0
              }, 
              "function_id": 90482421, 
              "function_name": "Function_329142646_test_rebalance_in_with_different_topologies", 
              "gocb_creds_request_counter": 20, 
              "internal_vb_distribution_stats": {
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_0": "[342-455]", 
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_1": "[456-569]", 
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_2": "[570-682]"
              }, 
              "latency_percentile_stats": {
                  "100": 28000, 
                  "50": 1000, 
                  "80": 1000, 
                  "90": 2000, 
                  "95": 4000, 
                  "99": 10000
              }, 
              "lcb_creds_request_counter": 28, 
              "lcb_exception_stats": {}, 
              "metastore_stats": {}, 
              "planner_stats": [
                  {
                      "host_name": "172.23.97.222:8096", 
                      "start_vb": 0, 
                      "vb_count": 342
                  }, 
                  {
                      "host_name": "172.23.97.224:8096", 
                      "start_vb": 342, 
                      "vb_count": 341
                  }, 
                  {
                      "host_name": "172.23.97.226:8096", 
                      "start_vb": 683, 
                      "vb_count": 341
                  }
              ], 
              "vb_distribution_stats_from_metadata": {
                  "172.23.97.222:8096": {
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_0": "[0-113]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_1": "[114-227]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_2": "[228-341]"
                  }, 
                  "172.23.97.224:8096": {
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_0": "[342-455]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_1": "[456-569]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_2": "[570-682]"
                  }, 
                  "172.23.97.226:8096": {
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_0": "[683-796]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_1": "[797-910]", 
                      "worker_Function_329142646_test_rebalance_in_with_different_topologies_2": "[911-1023]"
                  }
              }, 
              "worker_pids": {
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_0": 21235, 
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_1": 21236, 
                  "worker_Function_329142646_test_rebalance_in_with_different_topologies_2": 21238
              }
          }
      ]  

      logs: test_2 (2).zip

      Rebalance in kv,eventing,n1ql

      ./testrunner -i /tmp/testexec.14474.ini -p get-cbcollect-info=True,GROUP=source_bucket_mutation -t eventing.eventing_rebalance.EventingRebalance.test_rebalance_in_with_different_topologies,doc-per-day=20,dataset=default,nodes_init=3,services_init=kv:index:n1ql-kv:eventing:n1ql-kv:eventing:n1ql,groups=simple,services_in=kv,eventing,n1ql,reset_services=True,handler_code=source_bucket_mutation,source_bucket_mutation=True,GROUP=source_bucket_mutation
       
       
      Exception: Bucket operations from handler code took lot of time to complete or didn't go through. Current : 80639 Expected : 80640  dcp_backlog : 0  TIMERS_IN_PAST : 0 lcb_exceptions : {'172.23.97.224': {}, '172.23.97.225': {}, '172.23.97.222': {}}
      

      test_7 (1).zip

      Attachments

        1. test_10.zip
          42.44 MB
        2. test_10 (1).zip
          54.12 MB
        3. test_10 (1).zip
          51.79 MB
        4. test_15.zip
          67.13 MB
        5. test_2 (1).zip
          21.20 MB
        6. test_2 (2).zip
          18.54 MB
        7. test_7 (1).zip
          36.06 MB
        No reviews matched the request. Check your Options in the drop-down menu of this sections header.

        Activity

          People

            satya.nand Satya Nand (Inactive)
            vikas.chaudhary Vikas Chaudhary
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Gerrit Reviews

                There are no open Gerrit changes

                PagerDuty