Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-50924

[Upgrade] Timers are not firing after online upgrade to Neo

    XMLWordPrintable

Details

    • Bug
    • Resolution: Not a Bug
    • Critical
    • 7.1.0
    • 7.1.0
    • eventing

    Description

      QE TEST

      ./testrunner -i /tmp/win10-bucket-ops.ini -p  -t eventing.eventing_upgrade.EventingUpgrade.test_online_upgrade_with_failover_rebalance_with_eventing,nodes_init=5,dataset=default,groups=simple,skip_cleanup=True,upgrade_test=True,initial_version=7.0.2-6703,doc-per-day=10,upgrade_version=7.1.0-2179,get-cbcollect-info=True,enable_n2n_encryption_and_tls=True
      

      Same issue observed when cluster is upgraded from 6.6.x to Neo.

      Error

      ['Traceback (most recent call last):\n  File "pytests/eventing/eventing_upgrade.py", line 319, in test_online_upgrade_with_failover_rebalance_with_eventing\n    self.verify_doc_count_collections("dst_bucket._default._default", self.docs_per_day * 2016)\n  File "pytests/eventing/eventing_base.py", line 1055, in verify_doc_count_collections\n    actual_count, expected_count, total_dcp_backlog, timers_in_past, lcb))\nException: missing data in destination bucket. Current : 16712 Expected : 20160  dcp_backlog : 0  TIMERS_IN_PAST : 0 lcb_exceptions : {\'172.23.106.78\': {}}\n']
      

      Eventing Stats

          {
              "dcp_feed_boundary": "everything",
              "event_processing_stats": {
                  "adhoc_timer_response_received": 4,
                  "agg_messages_sent_to_worker": 20929,
                  "agg_queue_memory": 0,
                  "agg_queue_memory_cap": 53477376,
                  "agg_queue_size": 0,
                  "agg_queue_size_cap": 100000,
                  "agg_timer_feedback_queue_cap": 500,
                  "dcp_mutation": 16712,
                  "dcp_mutation_sent_to_worker": 16712,
                  "dcp_snapshot": 852,
                  "dcp_stream_req_counter": 1024,
                  "dcp_streamreq": 1024,
                  "execution_stats": 637,
                  "failure_stats": 637,
                  "latency_stats": 637,
                  "lcb_exception_stats": 637,
                  "log_level": 1,
                  "num_processed_events": 16712,
                  "processed_events_size": 12101680,
                  "thr_count": 1,
                  "thr_map": 1,
                  "v8_init": 1,
                  "v8_load": 1
              },
              "events_remaining": {
                  "dcp_backlog": 0
              },
              "execution_stats": {
                  "agg_queue_memory": 0,
                  "agg_queue_size": 0,
                  "curl": {
                      "delete": 0,
                      "get": 0,
                      "head": 0,
                      "post": 0,
                      "put": 0
                  },
                  "curl_success_count": 0,
                  "dcp_delete_msg_counter": 0,
                  "dcp_delete_parse_failure": 0,
                  "dcp_mutation_msg_counter": 16712,
                  "dcp_mutation_parse_failure": 0,
                  "enqueued_dcp_delete_msg_counter": 0,
                  "enqueued_dcp_mutation_msg_counter": 16712,
                  "enqueued_timer_msg_counter": 0,
                  "feedback_queue_size": 0,
                  "filtered_dcp_delete_counter": 0,
                  "filtered_dcp_mutation_counter": 0,
                  "lcb_retry_failure": 0,
                  "messages_parsed": 20925,
                  "no_op_counter": 0,
                  "num_processed_events": 16712,
                  "on_delete_failure": 0,
                  "on_delete_success": 0,
                  "on_update_failure": 0,
                  "on_update_success": 16712,
                  "processed_events_size": 12101680,
                  "timer_callback_failure": 0,
                  "timer_callback_success": 0,
                  "timer_cancel_counter": 0,
                  "timer_create_counter": 0,
                  "timer_create_failure": 0,
                  "timer_msg_counter": 0,
                  "timer_responses_sent": 0,
                  "timestamp": {
                      "114291": "2022-02-09T14:04:37Z"
                  },
                  "uv_msg_parse_failure": 0,
                  "uv_try_write_failure_counter": 0
              },
              "failure_stats": {
                  "app_worker_setting_events_lost": 0,
                  "bkt_ops_cas_mismatch_count": 0,
                  "bucket_cache_overflow_count": 0,
                  "bucket_op_cache_miss_count": 0,
                  "bucket_op_exception_count": 0,
                  "checkpoint_failure_count": 0,
                  "curl_failure_count": 0,
                  "curl_max_resp_size_exceeded": 0,
                  "curl_non_200_response": 0,
                  "curl_timeout_count": 0,
                  "dcp_events_lost": 0,
                  "debugger_events_lost": 0,
                  "delete_events_lost": 0,
                  "mutation_events_lost": 0,
                  "n1ql_op_exception_count": 0,
                  "timeout_count": 0,
                  "timer_callback_missing_counter": 0,
                  "timer_context_size_exceeded_counter": 0,
                  "timer_events_lost": 0,
                  "timestamp": {
                      "114291": "2022-02-09T14:04:37Z"
                  },
                  "v8worker_events_lost": 0
              },
              "function_id": 3520773259,
              "function_name": "bucket_op",
              "gocb_creds_request_counter": 8,
              "internal_vb_distribution_stats": {
                  "worker_bucket_op_0": "[0-1023]"
              },
              "latency_percentile_stats": {
                  "100": 11500,
                  "50": 300,
                  "80": 400,
                  "90": 600,
                  "95": 800,
                  "99": 1500
              },
              "lcb_creds_request_counter": 175,
              "lcb_exception_stats": {},
              "metastore_stats": {},
              "planner_stats": [
                  {
                      "host_name": "172.23.106.78:18096",
                      "start_vb": 0,
                      "vb_count": 1024
                  }
              ],
              "vb_distribution_stats_from_metadata": {
                  "172.23.106.78:18096": {
                      "worker_bucket_op_0": "[0-1023]"
                  }
              },
              "worker_pids": {
                  "worker_bucket_op_0": 114291
              }
          }
      

      On 172.23.106.73 (outgoing eventing node)
      ns_server.eventing.log

      2022-02-09T05:46:14.002-08:00 [Error] Consumer::dcpRequestStreamHandle [worker_timers_0:/tmp/127.0.0.1:8091_0_166492549.sock:18225] vb: 421 STREAMREQ call failed on dcpFeed: {eventing:lcQL0wAi-9:worker_timers_0_172.23.106.77:11210_172.23.106.73:8096}, err: dcp.invalidVbucket
      2022-02-09T05:46:14.002-08:00 [Error] DCP[{eventing:lcQL0wAi-9:worker_timers_0_172.23.106.77:11210_172.23.106.73:8096}] ##1a5 len(feed.nodeFeeds[master]) is 0. Master node for vb: 421 is 172.23.106.64:11210 nodeFeeds length 1
      2022-02-09T05:46:14.002-08:00 [Error] Consumer::dcpRequestStreamHandle [worker_timers_0:/tmp/127.0.0.1:8091_0_166492549.sock:18225] vb: 421 STREAMREQ call failed on dcpFeed: {eventing:lcQL0wAi-9:worker_timers_0_172.23.106.77:11210_172.23.106.73:8096}, err: dcp.invalidVbucket
      2022-02-09T05:46:14.003-08:00 [Error] DCP[{eventing:lcQL0wAi-9:worker_timers_0_172.23.106.77:11210_172.23.106.73:8096}] ##1a5 len(feed.nodeFeeds[master]) is 0. Master node for vb: 421 is 172.23.106.64:11210 nodeFeeds length 1
      

      On 172.23.106.78 (incoming eventing node)
      ns_server.eventing.log

      2022-02-09T05:47:34.760-08:00 [Error] Consumer::getKvVbMap [worker_timers_0:/tmp/127.0.0.1:8091_0_166492549.sock:113786] Failed to grab vbMap for bucket: src_bucket, err: vbmap: Invalid KV node no -1 for vb 0
      2022-02-09T05:47:34.810-08:00 [Error] GetVBmap() failed: vbmap: Invalid KV node no -1 for vb 0
      2022-02-09T05:47:34.879-08:00 [Error] GetVBmap() failed: vbmap: Invalid KV node no -1 for vb 0
      2022-02-09T05:47:35.157-08:00 [Error] GetVBmap() failed: vbmap: Invalid KV node no -1 for vb 0
      2022-02-09T05:47:35.219-08:00 [Error] GetVBmap() failed: vbmap: Invalid KV node no -1 for vb 0
      2022-02-09T05:47:35.302-08:00 [Error] GetVBmap() failed: vbmap: Invalid KV node no -1 for vb 0
      

      Attachments

        No reviews matched the request. Check your Options in the drop-down menu of this sections header.

        Activity

          People

            sujay.gad Sujay Gad
            sujay.gad Sujay Gad
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Gerrit Reviews

                There are no open Gerrit changes

                PagerDuty