Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-28489

Multiple functions deployed listening to same src_bucket causes few mutations to go to dcp_backlog

    XMLWordPrintable

Details

    • Bug
    • Resolution: Fixed
    • Critical
    • 5.5.0
    • 5.5.0
    • eventing

    Description

      Steps to Repro
      Run the following script on vagrant

      ./testrunner -i b/temp_all.ini  -t eventing.eventing_volume.EventingVolume.test_eventing_volume,nodes_init=6,services_init=kv-kv-kv-eventing-eventing-index:n1ql,dataset=default,groups=simple,reset_services=True,doc-per-day=1,skip_cleanup=True
      

      1) Created 2016 docs on src bucket 6 node cluster kv-kv-kv-eventing-eventing-index:n1ql
      2) Deployed 3 functions(See attached functions). All writing to different dst buckets
      3) Wait for eventing to process all mutations and write to different dst buckets.

      See attached screenshot of buckets at the end of the test.

      Stats from 2 eventing nodes

      Balakumarans-MacBook-Pro:testrunner balakumaran.g$ curl -u Administrator:password http://10.111.170.104:8096/api/v1/stats -s | jq .
      [
        {
          "event_processing_stats": {
            "AGG_MESSAGES_SENT_TO_WORKER": 7800,
            "DCP_MUTATION": 1917,
            "DCP_MUTATION_SENT_TO_WORKER": 1019,
            "DCP_SNAPSHOT": 1317,
            "DCP_STREAMREQ": 512,
            "EXECUTION_STATS": 1690,
            "FAILURE_STATS": 1690,
            "HANDLER_CODE": 3,
            "LATENCY_STATS": 1690,
            "LCB_EXCEPTION_STATS": 1690,
            "LOG_LEVEL": 3,
            "SOURCE_MAP": 3,
            "THR_COUNT": 3,
            "THR_MAP": 3,
            "V8_INIT": 3,
            "V8_LOAD": 3
          },
          "events_remaining": {
            "dcp_backlog": 898
          },
          "execution_stats": {
            "agg_queue_size": 0,
            "cron_timer_msg_counter": 0,
            "dcp_delete_msg_counter": 0,
            "dcp_mutation_msg_counter": 1019,
            "doc_timer_create_failure": 0,
            "doc_timer_msg_counter": 0,
            "doc_timer_responses_sent": 0,
            "enqueued_cron_timer_msg_counter": 0,
            "enqueued_dcp_delete_msg_counter": 0,
            "enqueued_dcp_mutation_msg_counter": 1019,
            "enqueued_doc_timer_msg_counter": 0,
            "feedback_queue_size": 0,
            "messages_parsed": 7791,
            "on_delete_failure": 0,
            "on_delete_success": 0,
            "on_update_failure": 0,
            "on_update_success": 1019
          },
          "failure_stats": {
            "bucket_op_exception_count": 0,
            "checkpoint_failure_count": 0,
            "n1ql_op_exception_count": 0,
            "timeout_count": 0
          },
          "function_name": "Function_504106722_test_eventing_volume_bucket_op",
          "lcb_exception_stats": {},
          "planner_stats": [
            {
              "host_name": "10.111.170.104:8096",
              "start_vb": 0,
              "vb_count": 512
            },
            {
              "host_name": "10.112.170.101:8096",
              "start_vb": 512,
              "vb_count": 512
            }
          ],
          "vb_distribution_stats": {
            "10.111.170.104:8096": {
              "worker_Function_504106722_test_eventing_volume_bucket_op_0": "[0-170]",
              "worker_Function_504106722_test_eventing_volume_bucket_op_1": "[171-341]",
              "worker_Function_504106722_test_eventing_volume_bucket_op_2": "[342-511]"
            },
            "10.112.170.101:8096": {
              "worker_Function_504106722_test_eventing_volume_bucket_op_0": "[512-682]",
              "worker_Function_504106722_test_eventing_volume_bucket_op_1": "[683-853]",
              "worker_Function_504106722_test_eventing_volume_bucket_op_2": "[854-1023]"
            }
          },
          "worker_pids": {
            "worker_Function_504106722_test_eventing_volume_bucket_op_0": 8885,
            "worker_Function_504106722_test_eventing_volume_bucket_op_1": 8893,
            "worker_Function_504106722_test_eventing_volume_bucket_op_2": 8886
          }
        },
        {
          "event_processing_stats": {
            "AGG_MESSAGES_SENT_TO_WORKER": 7845,
            "CRON_TIMER_EVENTS": 945,
            "DCP_MUTATION": 1917,
            "DCP_MUTATION_SENT_TO_WORKER": 1019,
            "DCP_SNAPSHOT": 1304,
            "DCP_STREAMREQ": 512,
            "EXECUTION_STATS": 1668,
            "FAILURE_STATS": 1668,
            "HANDLER_CODE": 3,
            "LATENCY_STATS": 1668,
            "LCB_EXCEPTION_STATS": 1668,
            "LOG_LEVEL": 6,
            "SOURCE_MAP": 3,
            "THR_COUNT": 3,
            "THR_MAP": 3,
            "V8_INIT": 3,
            "V8_LOAD": 3
          },
          "events_remaining": {
            "dcp_backlog": 898
          },
          "execution_stats": {
            "agg_queue_size": 0,
            "cron_timer_msg_counter": 945,
            "dcp_delete_msg_counter": 0,
            "dcp_mutation_msg_counter": 1019,
            "doc_timer_create_failure": 0,
            "doc_timer_msg_counter": 0,
            "doc_timer_responses_sent": 0,
            "enqueued_cron_timer_msg_counter": 130,
            "enqueued_dcp_delete_msg_counter": 0,
            "enqueued_dcp_mutation_msg_counter": 1019,
            "enqueued_doc_timer_msg_counter": 0,
            "feedback_queue_size": 0,
            "messages_parsed": 7836,
            "on_delete_failure": 0,
            "on_delete_success": 0,
            "on_update_failure": 0,
            "on_update_success": 1019
          },
          "failure_stats": {
            "bucket_op_exception_count": 0,
            "checkpoint_failure_count": 0,
            "n1ql_op_exception_count": 0,
            "timeout_count": 0
          },
          "function_name": "Function_504106722_test_eventing_volume_bucket_op_with_cron_timers",
          "lcb_exception_stats": {},
          "planner_stats": [
            {
              "host_name": "10.111.170.104:8096",
              "start_vb": 0,
              "vb_count": 512
            },
            {
              "host_name": "10.112.170.101:8096",
              "start_vb": 512,
              "vb_count": 512
            }
          ],
          "vb_distribution_stats": {
            "10.111.170.104:8096": {
              "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_0": "[0-170]",
              "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_1": "[171-341]",
              "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_2": "[342-511]"
            },
            "10.112.170.101:8096": {
              "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_0": "[512-682]",
              "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_1": "[683-853]",
              "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_2": "[854-1023]"
            }
          },
          "worker_pids": {
            "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_0": 8942,
            "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_1": 8939,
            "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_2": 8945
          }
        },
        {
          "event_processing_stats": {
            "AGG_MESSAGES_SENT_TO_WORKER": 4335,
            "DCP_MUTATION": 1917,
            "DCP_MUTATION_SENT_TO_WORKER": 1019,
            "DCP_SNAPSHOT": 1304,
            "DCP_STREAMREQ": 512,
            "EXECUTION_STATS": 823,
            "FAILURE_STATS": 823,
            "HANDLER_CODE": 3,
            "LATENCY_STATS": 823,
            "LCB_EXCEPTION_STATS": 823,
            "LOG_LEVEL": 6,
            "SOURCE_MAP": 3,
            "THR_COUNT": 3,
            "THR_MAP": 3,
            "V8_INIT": 3,
            "V8_LOAD": 3
          },
          "events_remaining": {
            "dcp_backlog": 898
          },
          "execution_stats": {
            "agg_queue_size": 0,
            "cron_timer_msg_counter": 0,
            "dcp_delete_msg_counter": 0,
            "dcp_mutation_msg_counter": 1019,
            "doc_timer_create_failure": 0,
            "doc_timer_msg_counter": 0,
            "doc_timer_responses_sent": 0,
            "enqueued_cron_timer_msg_counter": 0,
            "enqueued_dcp_delete_msg_counter": 0,
            "enqueued_dcp_mutation_msg_counter": 1019,
            "enqueued_doc_timer_msg_counter": 0,
            "feedback_queue_size": 1019,
            "messages_parsed": 4326,
            "on_delete_failure": 0,
            "on_delete_success": 0,
            "on_update_failure": 0,
            "on_update_success": 1019
          },
          "failure_stats": {
            "bucket_op_exception_count": 0,
            "checkpoint_failure_count": 0,
            "n1ql_op_exception_count": 0,
            "timeout_count": 0
          },
          "function_name": "Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers",
          "lcb_exception_stats": {},
          "planner_stats": [
            {
              "host_name": "10.111.170.104:8096",
              "start_vb": 0,
              "vb_count": 512
            },
            {
              "host_name": "10.112.170.101:8096",
              "start_vb": 512,
              "vb_count": 512
            }
          ],
          "vb_distribution_stats": {
            "10.111.170.104:8096": {
              "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_0": "[0-170]",
              "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_1": "[171-341]",
              "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_2": "[342-511]"
            },
            "10.112.170.101:8096": {
              "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_0": "[512-682]",
              "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_1": "[683-853]",
              "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_2": "[854-1023]"
            }
          },
          "worker_pids": {
            "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_0": 9019,
            "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_1": 9031,
            "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_2": 9025
          }
        }
      ]
      Balakumarans-MacBook-Pro:testrunner balakumaran.g$ 
      

      Balakumarans-MacBook-Pro:testrunner balakumaran.g$ curl -u Administrator:password http://10.112.170.101:8096/api/v1/stats -s | jq .
      [
        {
          "event_processing_stats": {
            "AGG_MESSAGES_SENT_TO_WORKER": 3098,
            "DCP_MUTATION": 1345,
            "DCP_MUTATION_SENT_TO_WORKER": 997,
            "DCP_SNAPSHOT": 770,
            "DCP_STREAMREQ": 512,
            "EXECUTION_STATS": 520,
            "FAILURE_STATS": 520,
            "HANDLER_CODE": 3,
            "LATENCY_STATS": 520,
            "LCB_EXCEPTION_STATS": 520,
            "LOG_LEVEL": 3,
            "SOURCE_MAP": 3,
            "THR_COUNT": 3,
            "THR_MAP": 3,
            "V8_INIT": 3,
            "V8_LOAD": 3
          },
          "events_remaining": {
            "dcp_backlog": 348
          },
          "execution_stats": {
            "agg_queue_size": 0,
            "cron_timer_msg_counter": 0,
            "dcp_delete_msg_counter": 0,
            "dcp_mutation_msg_counter": 997,
            "doc_timer_create_failure": 0,
            "doc_timer_msg_counter": 0,
            "doc_timer_responses_sent": 0,
            "enqueued_cron_timer_msg_counter": 0,
            "enqueued_dcp_delete_msg_counter": 0,
            "enqueued_dcp_mutation_msg_counter": 997,
            "enqueued_doc_timer_msg_counter": 0,
            "feedback_queue_size": 0,
            "messages_parsed": 3089,
            "on_delete_failure": 0,
            "on_delete_success": 0,
            "on_update_failure": 0,
            "on_update_success": 997
          },
          "failure_stats": {
            "bucket_op_exception_count": 0,
            "checkpoint_failure_count": 0,
            "n1ql_op_exception_count": 0,
            "timeout_count": 1
          },
          "function_name": "Function_504106722_test_eventing_volume_bucket_op",
          "lcb_exception_stats": {},
          "planner_stats": [
            {
              "host_name": "10.111.170.104:8096",
              "start_vb": 0,
              "vb_count": 512
            },
            {
              "host_name": "10.112.170.101:8096",
              "start_vb": 512,
              "vb_count": 512
            }
          ],
          "vb_distribution_stats": {
            "10.111.170.104:8096": {
              "worker_Function_504106722_test_eventing_volume_bucket_op_0": "[0-170]",
              "worker_Function_504106722_test_eventing_volume_bucket_op_1": "[171-341]",
              "worker_Function_504106722_test_eventing_volume_bucket_op_2": "[342-511]"
            },
            "10.112.170.101:8096": {
              "worker_Function_504106722_test_eventing_volume_bucket_op_0": "[512-682]",
              "worker_Function_504106722_test_eventing_volume_bucket_op_1": "[683-853]",
              "worker_Function_504106722_test_eventing_volume_bucket_op_2": "[854-1023]"
            }
          },
          "worker_pids": {
            "worker_Function_504106722_test_eventing_volume_bucket_op_0": 8587,
            "worker_Function_504106722_test_eventing_volume_bucket_op_1": 8586,
            "worker_Function_504106722_test_eventing_volume_bucket_op_2": 8597
          }
        },
        {
          "event_processing_stats": {
            "AGG_MESSAGES_SENT_TO_WORKER": 3256,
            "CRON_TIMER_EVENTS": 1070,
            "DCP_MUTATION": 1345,
            "DCP_MUTATION_SENT_TO_WORKER": 997,
            "DCP_SNAPSHOT": 770,
            "DCP_STREAMREQ": 512,
            "EXECUTION_STATS": 516,
            "FAILURE_STATS": 516,
            "HANDLER_CODE": 6,
            "LATENCY_STATS": 516,
            "LCB_EXCEPTION_STATS": 516,
            "LOG_LEVEL": 9,
            "SOURCE_MAP": 6,
            "THR_COUNT": 6,
            "THR_MAP": 6,
            "V8_INIT": 6,
            "V8_LOAD": 6
          },
          "events_remaining": {
            "dcp_backlog": 348
          },
          "execution_stats": {
            "agg_queue_size": 0,
            "cron_timer_msg_counter": 707,
            "dcp_delete_msg_counter": 0,
            "dcp_mutation_msg_counter": 676,
            "doc_timer_create_failure": 0,
            "doc_timer_msg_counter": 0,
            "doc_timer_responses_sent": 0,
            "enqueued_cron_timer_msg_counter": 105,
            "enqueued_dcp_delete_msg_counter": 0,
            "enqueued_dcp_mutation_msg_counter": 676,
            "enqueued_doc_timer_msg_counter": 0,
            "feedback_queue_size": 0,
            "messages_parsed": 2836,
            "on_delete_failure": 0,
            "on_delete_success": 0,
            "on_update_failure": 1,
            "on_update_success": 675
          },
          "failure_stats": {
            "bucket_op_exception_count": 0,
            "checkpoint_failure_count": 0,
            "n1ql_op_exception_count": 0,
            "timeout_count": 5
          },
          "function_name": "Function_504106722_test_eventing_volume_bucket_op_with_cron_timers",
          "lcb_exception_stats": {},
          "planner_stats": [
            {
              "host_name": "10.111.170.104:8096",
              "start_vb": 0,
              "vb_count": 512
            },
            {
              "host_name": "10.112.170.101:8096",
              "start_vb": 512,
              "vb_count": 512
            }
          ],
          "vb_distribution_stats": {
            "10.111.170.104:8096": {
              "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_0": "[0-170]",
              "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_1": "[171-341]",
              "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_2": "[342-511]"
            },
            "10.112.170.101:8096": {
              "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_0": "[512-682]",
              "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_1": "[683-853]",
              "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_2": "[854-1023]"
            }
          },
          "worker_pids": {
            "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_0": 8637,
            "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_1": 8639,
            "worker_Function_504106722_test_eventing_volume_bucket_op_with_cron_timers_2": 8816
          }
        },
        {
          "event_processing_stats": {
            "AGG_MESSAGES_SENT_TO_WORKER": 2501,
            "DCP_MUTATION": 1345,
            "DCP_MUTATION_SENT_TO_WORKER": 997,
            "DCP_SNAPSHOT": 772,
            "DCP_STREAMREQ": 512,
            "EXECUTION_STATS": 370,
            "FAILURE_STATS": 370,
            "HANDLER_CODE": 3,
            "LATENCY_STATS": 370,
            "LCB_EXCEPTION_STATS": 370,
            "LOG_LEVEL": 6,
            "SOURCE_MAP": 3,
            "THR_COUNT": 3,
            "THR_MAP": 3,
            "V8_INIT": 3,
            "V8_LOAD": 3
          },
          "events_remaining": {
            "dcp_backlog": 1113
          },
          "execution_stats": {
            "agg_queue_size": 0,
            "cron_timer_msg_counter": 0,
            "dcp_delete_msg_counter": 0,
            "dcp_mutation_msg_counter": 997,
            "doc_timer_create_failure": 0,
            "doc_timer_msg_counter": 0,
            "doc_timer_responses_sent": 0,
            "enqueued_cron_timer_msg_counter": 0,
            "enqueued_dcp_delete_msg_counter": 0,
            "enqueued_dcp_mutation_msg_counter": 997,
            "enqueued_doc_timer_msg_counter": 0,
            "feedback_queue_size": 682,
            "messages_parsed": 2492,
            "on_delete_failure": 0,
            "on_delete_success": 0,
            "on_update_failure": 315,
            "on_update_success": 682
          },
          "failure_stats": {
            "bucket_op_exception_count": 0,
            "checkpoint_failure_count": 1104,
            "n1ql_op_exception_count": 0,
            "timeout_count": 6
          },
          "function_name": "Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers",
          "lcb_exception_stats": {},
          "planner_stats": [
            {
              "host_name": "10.111.170.104:8096",
              "start_vb": 0,
              "vb_count": 512
            },
            {
              "host_name": "10.112.170.101:8096",
              "start_vb": 512,
              "vb_count": 512
            }
          ],
          "vb_distribution_stats": {
            "10.111.170.104:8096": {
              "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_0": "[0-170]",
              "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_1": "[171-341]",
              "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_2": "[342-511]"
            },
            "10.112.170.101:8096": {
              "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_0": "[512-682]",
              "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_1": "[683-853]",
              "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_2": "[854-1023]"
            }
          },
          "worker_pids": {
            "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_0": 8700,
            "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_1": 8704,
            "worker_Function_504106722_test_eventing_volume_n1ql_op_with_doc_timers_2": 8733
          }
        }
      ]
      Balakumarans-MacBook-Pro:testrunner balakumaran.g$ 
      

      Also I noted certain anomalies in the stats when multiple functions listen to same src bucket.
      1) All the functions have same dcp_backlog even when some functions have processed all mutations.
      2) DCP_MUTATION and DCP_MUTATION_SENT_TO_WORKER don't match up when functions listen to same src bucket.

      Attachments

        No reviews matched the request. Check your Options in the drop-down menu of this sections header.

        Activity

          People

            asingh Abhishek Singh (Inactive)
            Balakumaran.Gopal Balakumaran Gopal
            Votes:
            0 Vote for this issue
            Watchers:
            6 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Gerrit Reviews

                There are no open Gerrit changes

                PagerDuty