Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-46092

Offline Upgrade 6.6.1-> 7.0.0 : Timer not firing after upgrade

    XMLWordPrintable

Details

    Description

      Offline Upgrade from 6.6.1-9123 -> 7.0.0-5071

      1. create 5 node cluster (2- kv, 1 eventing,1 n1ql ,1 index)
      2. create 2 handler timer and bucket op
      3. load data
      4. deploy only timer
      5. upgrade all the nodes as export CB_MASTER_PASSWORD=password; rpm -U /tmp/couchbase-server-enterprise-7.0.0-5071-centos7.x86_64.rpm

       ./testrunner.py -i /tmp/win10-bucket-ops.ini -t eventing.eventing_upgrade.EventingUpgrade.test_offline_upgrade_with_eventing,nodes_init=5,dataset=default,groups=simple,upgrade_test=True,initial_version=6.6.1-9213,doc-per-day=10,upgrade_version=7.0.0-5071
       
      Current : 6732 Expected : 20160 ...

      Stats before upgrade

      2021-05-04 01:04:21,636 - root - INFO - Stats for Node 172.23.106.73 is 
      [
          {
              "dcp_feed_boundary": "everything",
              "event_processing_stats": {
                  "adhoc_timer_response_received": 3,
                  "agg_messages_sent_to_worker": 21625,
                  "agg_queue_memory": 0,
                  "agg_queue_memory_cap": 53477376,
                  "agg_queue_size": 0,
                  "agg_queue_size_cap": 300000,
                  "agg_timer_feedback_queue_cap": 1500,
                  "dcp_mutation": 20160,
                  "dcp_mutation_sent_to_worker": 20160,
                  "dcp_snapshot": 1024,
                  "dcp_stream_req_counter": 1024,
                  "dcp_streamreq": 1024,
                  "execution_stats": 84,
                  "failure_stats": 84,
                  "latency_stats": 84,
                  "lcb_exception_stats": 84,
                  "log_level": 3,
                  "num_processed_events": 20160,
                  "processed_events_size": 13934248,
                  "thr_count": 3,
                  "thr_map": 3,
                  "timer_responses_received": 20160,
                  "v8_init": 3,
                  "v8_load": 3
              },
              "events_remaining": {
                  "dcp_backlog": 0
              },
              "execution_stats": {
                  "agg_queue_memory": 0,
                  "agg_queue_size": 0,
                  "curl": {
                      "delete": 0,
                      "get": 0,
                      "head": 0,
                      "post": 0,
                      "put": 0
                  },
                  "dcp_delete_msg_counter": 0,
                  "dcp_delete_parse_failure": 0,
                  "dcp_mutation_msg_counter": 20160,
                  "dcp_mutation_parse_failure": 0,
                  "enqueued_dcp_delete_msg_counter": 0,
                  "enqueued_dcp_mutation_msg_counter": 20160,
                  "enqueued_timer_msg_counter": 0,
                  "feedback_queue_size": 0,
                  "filtered_dcp_delete_counter": 0,
                  "filtered_dcp_mutation_counter": 0,
                  "lcb_retry_failure": 0,
                  "messages_parsed": 21613,
                  "num_processed_events": 20160,
                  "on_delete_failure": 0,
                  "on_delete_success": 0,
                  "on_update_failure": 0,
                  "on_update_success": 20160,
                  "processed_events_size": 13934248,
                  "timer_cancel_counter": 0,
                  "timer_create_counter": 20160,
                  "timer_create_failure": 0,
                  "timer_msg_counter": 0,
                  "timer_responses_sent": 0,
                  "timestamp": {
                      "108035": "2021-05-04T08:04:20Z",
                      "108044": "2021-05-04T08:04:20Z",
                      "108052": "2021-05-04T08:04:20Z"
                  },
                  "uv_msg_parse_failure": 0,
                  "uv_try_write_failure_counter": 0
              },
              "failure_stats": {
                  "app_worker_setting_events_lost": 0,
                  "bkt_ops_cas_mismatch_count": 0,
                  "bucket_op_exception_count": 0,
                  "checkpoint_failure_count": 0,
                  "curl_non_200_response": 0,
                  "dcp_events_lost": 0,
                  "debugger_events_lost": 0,
                  "delete_events_lost": 0,
                  "mutation_events_lost": 0,
                  "n1ql_op_exception_count": 0,
                  "timeout_count": 0,
                  "timer_callback_missing_counter": 0,
                  "timer_context_size_exceeded_counter": 0,
                  "timer_events_lost": 0,
                  "timestamp": {
                      "108035": "2021-05-04T08:04:20Z",
                      "108044": "2021-05-04T08:04:20Z",
                      "108052": "2021-05-04T08:04:20Z"
                  },
                  "v8worker_events_lost": 0
              },
              "function_id": 4271354540,
              "function_name": "timers",
              "gocb_creds_request_counter": 12,
              "internal_vb_distribution_stats": {
                  "worker_timers_0": "[0-341]",
                  "worker_timers_1": "[342-682]",
                  "worker_timers_2": "[683-1023]"
              },
              "latency_percentile_stats": {
                  "100": 22000,
                  "50": 1000,
                  "80": 1400,
                  "90": 1700,
                  "95": 2200,
                  "99": 4300
              },
              "lcb_creds_request_counter": 36,
              "lcb_exception_stats": {},
              "metastore_stats": {
                  "metastore_delete_err": 0,
                  "metastore_deletes": 0,
                  "metastore_not_found": 0,
                  "metastore_scan": 0,
                  "metastore_scan_due": 0,
                  "metastore_scan_err": 0,
                  "metastore_set": 0,
                  "metastore_set_err": 0
              },
              "planner_stats": [
                  {
                      "host_name": "172.23.106.73:8096",
                      "start_vb": 0,
                      "vb_count": 1024
                  }
              ],
              "vb_distribution_stats_from_metadata": {},
              "worker_pids": {
                  "worker_timers_0": 108035,
                  "worker_timers_1": 108044,
                  "worker_timers_2": 108052
              }
          }
      ]  

      Stats after upgrade

      [
          {
              "dcp_feed_boundary": "everything",
              "event_processing_stats": {
                  "adhoc_timer_response_received": 4,
                  "agg_messages_sent_to_worker": 29936,
                  "agg_queue_memory": 0,
                  "agg_queue_memory_cap": 53477376,
                  "agg_queue_size": 0,
                  "agg_queue_size_cap": 100000,
                  "agg_timer_feedback_queue_cap": 500,
                  "dcp_mutation": 20160,
                  "dcp_mutation_sent_to_worker": 20160,
                  "dcp_snapshot": 1024,
                  "dcp_stream_req_counter": 1024,
                  "dcp_streamreq": 1024,
                  "execution_stats": 1749,
                  "failure_stats": 1749,
                  "latency_stats": 1749,
                  "lcb_exception_stats": 1749,
                  "log_level": 1,
                  "num_processed_events": 20160,
                  "processed_events_size": 14516812,
                  "thr_count": 1,
                  "thr_map": 1,
                  "v8_init": 1,
                  "v8_load": 1
              },
              "events_remaining": {
                  "dcp_backlog": 0
              },
              "execution_stats": {
                  "agg_queue_memory": 0,
                  "agg_queue_size": 0,
                  "curl": {
                      "delete": 0,
                      "get": 0,
                      "head": 0,
                      "post": 0,
                      "put": 0
                  },
                  "curl_success_count": 0,
                  "dcp_delete_msg_counter": 0,
                  "dcp_delete_parse_failure": 0,
                  "dcp_mutation_msg_counter": 20160,
                  "dcp_mutation_parse_failure": 0,
                  "enqueued_dcp_delete_msg_counter": 0,
                  "enqueued_dcp_mutation_msg_counter": 20160,
                  "enqueued_timer_msg_counter": 0,
                  "feedback_queue_size": 0,
                  "filtered_dcp_delete_counter": 0,
                  "filtered_dcp_mutation_counter": 0,
                  "lcb_retry_failure": 0,
                  "messages_parsed": 29932,
                  "no_op_counter": 0,
                  "num_processed_events": 20160,
                  "on_delete_failure": 0,
                  "on_delete_success": 0,
                  "on_update_failure": 0,
                  "on_update_success": 20160,
                  "processed_events_size": 14516812,
                  "timer_callback_failure": 0,
                  "timer_callback_success": 0,
                  "timer_cancel_counter": 0,
                  "timer_create_counter": 0,
                  "timer_create_failure": 0,
                  "timer_msg_counter": 0,
                  "timer_responses_sent": 0,
                  "timestamp": {
                      "109559": "2021-05-04T08:40:41Z"
                  },
                  "uv_msg_parse_failure": 0,
                  "uv_try_write_failure_counter": 0
              },
              "failure_stats": {
                  "app_worker_setting_events_lost": 0,
                  "bkt_ops_cas_mismatch_count": 0,
                  "bucket_op_exception_count": 0,
                  "checkpoint_failure_count": 0,
                  "curl_failure_count": 0,
                  "curl_max_resp_size_exceeded": 0,
                  "curl_non_200_response": 0,
                  "curl_timeout_count": 0,
                  "dcp_events_lost": 0,
                  "debugger_events_lost": 0,
                  "delete_events_lost": 0,
                  "mutation_events_lost": 0,
                  "n1ql_op_exception_count": 0,
                  "timeout_count": 0,
                  "timer_callback_missing_counter": 0,
                  "timer_context_size_exceeded_counter": 0,
                  "timer_events_lost": 0,
                  "timestamp": {
                      "109559": "2021-05-04T08:40:41Z"
                  },
                  "v8worker_events_lost": 0
              },
              "function_name": "bucket_op",
              "gocb_creds_request_counter": 53,
              "function_id": 628199521,
              "internal_vb_distribution_stats": {
                  "worker_bucket_op_0": "[0-1023]"
              },
              "latency_percentile_stats": {
                  "50": 300,
                  "80": 400,
                  "90": 500,
                  "95": 800,
                  "99": 1600,
                  "100": 12000
              },
              "lcb_creds_request_counter": 20,
              "lcb_exception_stats": {},
              "planner_stats": [
                  {
                      "host_name": "172.23.106.73:8096",
                      "start_vb": 0,
                      "vb_count": 1024
                  }
              ],
              "metastore_stats": {},
              "vb_distribution_stats_from_metadata": {
                  "172.23.106.73:8096": {
                      "worker_bucket_op_0": "[0-1023]"
                  }
              },
              "worker_pids": {
                  "worker_bucket_op_0": 109559
              }
          },
          {
              "dcp_feed_boundary": "everything",
              "event_processing_stats": {
                  "adhoc_timer_response_received": 2,
                  "agg_messages_sent_to_worker": 17251,
                  "agg_queue_memory": 0,
                  "agg_queue_memory_cap": 63963136,
                  "agg_queue_size": 0,
                  "agg_queue_size_cap": 100000,
                  "agg_timer_feedback_queue_cap": 500,
                  "dcp_mutation": 6761,
                  "dcp_mutation_sent_to_worker": 6761,
                  "dcp_snapshot": 342,
                  "dcp_stream_req_counter": 342,
                  "dcp_streamreq": 342,
                  "execution_stats": 2028,
                  "failure_stats": 2028,
                  "latency_stats": 2028,
                  "lcb_exception_stats": 2028,
                  "log_level": 1,
                  "num_processed_events": 6761,
                  "processed_events_size": 4867464,
                  "reb_vb_remaining_to_own": 682,
                  "reb_vb_remaining_to_stream_req": 682,
                  "thr_count": 1,
                  "thr_map": 1,
                  "timer_events": 6732,
                  "timer_responses_received": 6761,
                  "v8_init": 1,
                  "v8_load": 1
              },
              "events_remaining": {
                  "dcp_backlog": 0
              },
              "execution_stats": {
                  "agg_queue_memory": 0,
                  "agg_queue_size": 0,
                  "curl": {
                      "delete": 0,
                      "get": 0,
                      "head": 0,
                      "post": 0,
                      "put": 0
                  },
                  "curl_success_count": 0,
                  "dcp_delete_msg_counter": 0,
                  "dcp_delete_parse_failure": 0,
                  "dcp_mutation_msg_counter": 6761,
                  "dcp_mutation_parse_failure": 0,
                  "enqueued_dcp_delete_msg_counter": 0,
                  "enqueued_dcp_mutation_msg_counter": 6761,
                  "enqueued_timer_msg_counter": 0,
                  "feedback_queue_size": 0,
                  "filtered_dcp_delete_counter": 0,
                  "filtered_dcp_mutation_counter": 0,
                  "lcb_retry_failure": 0,
                  "messages_parsed": 17242,
                  "no_op_counter": 0,
                  "num_processed_events": 6761,
                  "on_delete_failure": 0,
                  "on_delete_success": 0,
                  "on_update_failure": 0,
                  "on_update_success": 6761,
                  "processed_events_size": 4867464,
                  "timer_callback_failure": 0,
                  "timer_callback_success": 6732,
                  "timer_cancel_counter": 0,
                  "timer_create_counter": 6761,
                  "timer_create_failure": 0,
                  "timer_msg_counter": 6732,
                  "timer_responses_sent": 0,
                  "timestamp": {
                      "109254": "2021-05-04T08:40:41Z"
                  },
                  "uv_msg_parse_failure": 0,
                  "uv_try_write_failure_counter": 0
              },
              "failure_stats": {
                  "app_worker_setting_events_lost": 0,
                  "bkt_ops_cas_mismatch_count": 0,
                  "bucket_op_exception_count": 0,
                  "checkpoint_failure_count": 0,
                  "curl_failure_count": 0,
                  "curl_max_resp_size_exceeded": 0,
                  "curl_non_200_response": 0,
                  "curl_timeout_count": 0,
                  "dcp_events_lost": 0,
                  "debugger_events_lost": 0,
                  "delete_events_lost": 0,
                  "mutation_events_lost": 0,
                  "n1ql_op_exception_count": 0,
                  "timeout_count": 0,
                  "timer_callback_missing_counter": 0,
                  "timer_context_size_exceeded_counter": 0,
                  "timer_events_lost": 0,
                  "timestamp": {
                      "109254": "2021-05-04T08:40:41Z"
                  },
                  "v8worker_events_lost": 0
              },
              "function_name": "timers",
              "gocb_creds_request_counter": 53,
              "function_id": 4271354540,
              "internal_vb_distribution_stats": {
                  "worker_timers_0": "[0-341]"
              },
              "latency_percentile_stats": {
                  "50": 700,
                  "80": 1000,
                  "90": 1200,
                  "95": 1800,
                  "99": 5000,
                  "100": 23700
              },
              "lcb_creds_request_counter": 20,
              "lcb_exception_stats": {},
              "planner_stats": [
                  {
                      "host_name": "172.23.106.73:8096",
                      "start_vb": 0,
                      "vb_count": 1024
                  }
              ],
              "metastore_stats": {
                  "metastore_delete_err": 0,
                  "metastore_deletes": 0,
                  "metastore_not_found": 0,
                  "metastore_scan": 0,
                  "metastore_scan_due": 0,
                  "metastore_scan_err": 0,
                  "metastore_set": 0,
                  "metastore_set_err": 0
              },
              "vb_distribution_stats_from_metadata": {
                  "172.23.106.73:8096": {
                      "worker_timers_0": "[0-341]",
                      "worker_timers_1": "[342-682]",
                      "worker_timers_2": "[683-1023]"
                  }
              },
              "worker_pids": {
                  "worker_timers_0": 109254
              }
          }
      ] 

      Another problem vb distribution is 

      "172.23.106.73:8096":

      { "worker_timers_0": "[0-341]", "worker_timers_1": "[342-682]", "worker_timers_2": "[683-1023]" }

       but only 1 worker is spawned 

      "worker_timers_0": 109254

      Attachments

        Issue Links

          For Gerrit Dashboard: MB-46092
          # Subject Branch Project Status CR V

          Activity

            People

              chanabasappa.ghali Chanabasappa Ghali
              vikas.chaudhary Vikas Chaudhary
              Votes:
              0 Vote for this issue
              Watchers:
              8 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                PagerDuty