Details
-
Bug
-
Resolution: Fixed
-
Critical
-
Cheshire-Cat
-
Untriaged
-
-
1
-
Unknown
Description
Offline Upgrade from 6.6.1-9123 -> 7.0.0-5071
- create 5 node cluster (2- kv, 1 eventing,1 n1ql ,1 index)
- create 2 handler timer and bucket op
- load data
- deploy only timer
- upgrade all the nodes as export CB_MASTER_PASSWORD=password; rpm -U /tmp/couchbase-server-enterprise-7.0.0-5071-centos7.x86_64.rpm
./testrunner.py -i /tmp/win10-bucket-ops.ini -t eventing.eventing_upgrade.EventingUpgrade.test_offline_upgrade_with_eventing,nodes_init=5,dataset=default,groups=simple,upgrade_test=True,initial_version=6.6.1-9213,doc-per-day=10,upgrade_version=7.0.0-5071
|
|
Current : 6732 Expected : 20160 ...
|
Stats before upgrade
2021-05-04 01:04:21,636 - root - INFO - Stats for Node 172.23.106.73 is
|
[
|
{
|
"dcp_feed_boundary": "everything",
|
"event_processing_stats": {
|
"adhoc_timer_response_received": 3,
|
"agg_messages_sent_to_worker": 21625,
|
"agg_queue_memory": 0,
|
"agg_queue_memory_cap": 53477376,
|
"agg_queue_size": 0,
|
"agg_queue_size_cap": 300000,
|
"agg_timer_feedback_queue_cap": 1500,
|
"dcp_mutation": 20160,
|
"dcp_mutation_sent_to_worker": 20160,
|
"dcp_snapshot": 1024,
|
"dcp_stream_req_counter": 1024,
|
"dcp_streamreq": 1024,
|
"execution_stats": 84,
|
"failure_stats": 84,
|
"latency_stats": 84,
|
"lcb_exception_stats": 84,
|
"log_level": 3,
|
"num_processed_events": 20160,
|
"processed_events_size": 13934248,
|
"thr_count": 3,
|
"thr_map": 3,
|
"timer_responses_received": 20160,
|
"v8_init": 3,
|
"v8_load": 3
|
},
|
"events_remaining": {
|
"dcp_backlog": 0
|
},
|
"execution_stats": {
|
"agg_queue_memory": 0,
|
"agg_queue_size": 0,
|
"curl": {
|
"delete": 0,
|
"get": 0,
|
"head": 0,
|
"post": 0,
|
"put": 0
|
},
|
"dcp_delete_msg_counter": 0,
|
"dcp_delete_parse_failure": 0,
|
"dcp_mutation_msg_counter": 20160,
|
"dcp_mutation_parse_failure": 0,
|
"enqueued_dcp_delete_msg_counter": 0,
|
"enqueued_dcp_mutation_msg_counter": 20160,
|
"enqueued_timer_msg_counter": 0,
|
"feedback_queue_size": 0,
|
"filtered_dcp_delete_counter": 0,
|
"filtered_dcp_mutation_counter": 0,
|
"lcb_retry_failure": 0,
|
"messages_parsed": 21613,
|
"num_processed_events": 20160,
|
"on_delete_failure": 0,
|
"on_delete_success": 0,
|
"on_update_failure": 0,
|
"on_update_success": 20160,
|
"processed_events_size": 13934248,
|
"timer_cancel_counter": 0,
|
"timer_create_counter": 20160,
|
"timer_create_failure": 0,
|
"timer_msg_counter": 0,
|
"timer_responses_sent": 0,
|
"timestamp": {
|
"108035": "2021-05-04T08:04:20Z",
|
"108044": "2021-05-04T08:04:20Z",
|
"108052": "2021-05-04T08:04:20Z"
|
},
|
"uv_msg_parse_failure": 0,
|
"uv_try_write_failure_counter": 0
|
},
|
"failure_stats": {
|
"app_worker_setting_events_lost": 0,
|
"bkt_ops_cas_mismatch_count": 0,
|
"bucket_op_exception_count": 0,
|
"checkpoint_failure_count": 0,
|
"curl_non_200_response": 0,
|
"dcp_events_lost": 0,
|
"debugger_events_lost": 0,
|
"delete_events_lost": 0,
|
"mutation_events_lost": 0,
|
"n1ql_op_exception_count": 0,
|
"timeout_count": 0,
|
"timer_callback_missing_counter": 0,
|
"timer_context_size_exceeded_counter": 0,
|
"timer_events_lost": 0,
|
"timestamp": {
|
"108035": "2021-05-04T08:04:20Z",
|
"108044": "2021-05-04T08:04:20Z",
|
"108052": "2021-05-04T08:04:20Z"
|
},
|
"v8worker_events_lost": 0
|
},
|
"function_id": 4271354540,
|
"function_name": "timers",
|
"gocb_creds_request_counter": 12,
|
"internal_vb_distribution_stats": {
|
"worker_timers_0": "[0-341]",
|
"worker_timers_1": "[342-682]",
|
"worker_timers_2": "[683-1023]"
|
},
|
"latency_percentile_stats": {
|
"100": 22000,
|
"50": 1000,
|
"80": 1400,
|
"90": 1700,
|
"95": 2200,
|
"99": 4300
|
},
|
"lcb_creds_request_counter": 36,
|
"lcb_exception_stats": {},
|
"metastore_stats": {
|
"metastore_delete_err": 0,
|
"metastore_deletes": 0,
|
"metastore_not_found": 0,
|
"metastore_scan": 0,
|
"metastore_scan_due": 0,
|
"metastore_scan_err": 0,
|
"metastore_set": 0,
|
"metastore_set_err": 0
|
},
|
"planner_stats": [
|
{
|
"host_name": "172.23.106.73:8096",
|
"start_vb": 0,
|
"vb_count": 1024
|
}
|
],
|
"vb_distribution_stats_from_metadata": {},
|
"worker_pids": {
|
"worker_timers_0": 108035,
|
"worker_timers_1": 108044,
|
"worker_timers_2": 108052
|
}
|
}
|
]
|
Stats after upgrade
[
|
{
|
"dcp_feed_boundary": "everything",
|
"event_processing_stats": {
|
"adhoc_timer_response_received": 4,
|
"agg_messages_sent_to_worker": 29936,
|
"agg_queue_memory": 0,
|
"agg_queue_memory_cap": 53477376,
|
"agg_queue_size": 0,
|
"agg_queue_size_cap": 100000,
|
"agg_timer_feedback_queue_cap": 500,
|
"dcp_mutation": 20160,
|
"dcp_mutation_sent_to_worker": 20160,
|
"dcp_snapshot": 1024,
|
"dcp_stream_req_counter": 1024,
|
"dcp_streamreq": 1024,
|
"execution_stats": 1749,
|
"failure_stats": 1749,
|
"latency_stats": 1749,
|
"lcb_exception_stats": 1749,
|
"log_level": 1,
|
"num_processed_events": 20160,
|
"processed_events_size": 14516812,
|
"thr_count": 1,
|
"thr_map": 1,
|
"v8_init": 1,
|
"v8_load": 1
|
},
|
"events_remaining": {
|
"dcp_backlog": 0
|
},
|
"execution_stats": {
|
"agg_queue_memory": 0,
|
"agg_queue_size": 0,
|
"curl": {
|
"delete": 0,
|
"get": 0,
|
"head": 0,
|
"post": 0,
|
"put": 0
|
},
|
"curl_success_count": 0,
|
"dcp_delete_msg_counter": 0,
|
"dcp_delete_parse_failure": 0,
|
"dcp_mutation_msg_counter": 20160,
|
"dcp_mutation_parse_failure": 0,
|
"enqueued_dcp_delete_msg_counter": 0,
|
"enqueued_dcp_mutation_msg_counter": 20160,
|
"enqueued_timer_msg_counter": 0,
|
"feedback_queue_size": 0,
|
"filtered_dcp_delete_counter": 0,
|
"filtered_dcp_mutation_counter": 0,
|
"lcb_retry_failure": 0,
|
"messages_parsed": 29932,
|
"no_op_counter": 0,
|
"num_processed_events": 20160,
|
"on_delete_failure": 0,
|
"on_delete_success": 0,
|
"on_update_failure": 0,
|
"on_update_success": 20160,
|
"processed_events_size": 14516812,
|
"timer_callback_failure": 0,
|
"timer_callback_success": 0,
|
"timer_cancel_counter": 0,
|
"timer_create_counter": 0,
|
"timer_create_failure": 0,
|
"timer_msg_counter": 0,
|
"timer_responses_sent": 0,
|
"timestamp": {
|
"109559": "2021-05-04T08:40:41Z"
|
},
|
"uv_msg_parse_failure": 0,
|
"uv_try_write_failure_counter": 0
|
},
|
"failure_stats": {
|
"app_worker_setting_events_lost": 0,
|
"bkt_ops_cas_mismatch_count": 0,
|
"bucket_op_exception_count": 0,
|
"checkpoint_failure_count": 0,
|
"curl_failure_count": 0,
|
"curl_max_resp_size_exceeded": 0,
|
"curl_non_200_response": 0,
|
"curl_timeout_count": 0,
|
"dcp_events_lost": 0,
|
"debugger_events_lost": 0,
|
"delete_events_lost": 0,
|
"mutation_events_lost": 0,
|
"n1ql_op_exception_count": 0,
|
"timeout_count": 0,
|
"timer_callback_missing_counter": 0,
|
"timer_context_size_exceeded_counter": 0,
|
"timer_events_lost": 0,
|
"timestamp": {
|
"109559": "2021-05-04T08:40:41Z"
|
},
|
"v8worker_events_lost": 0
|
},
|
"function_name": "bucket_op",
|
"gocb_creds_request_counter": 53,
|
"function_id": 628199521,
|
"internal_vb_distribution_stats": {
|
"worker_bucket_op_0": "[0-1023]"
|
},
|
"latency_percentile_stats": {
|
"50": 300,
|
"80": 400,
|
"90": 500,
|
"95": 800,
|
"99": 1600,
|
"100": 12000
|
},
|
"lcb_creds_request_counter": 20,
|
"lcb_exception_stats": {},
|
"planner_stats": [
|
{
|
"host_name": "172.23.106.73:8096",
|
"start_vb": 0,
|
"vb_count": 1024
|
}
|
],
|
"metastore_stats": {},
|
"vb_distribution_stats_from_metadata": {
|
"172.23.106.73:8096": {
|
"worker_bucket_op_0": "[0-1023]"
|
}
|
},
|
"worker_pids": {
|
"worker_bucket_op_0": 109559
|
}
|
},
|
{
|
"dcp_feed_boundary": "everything",
|
"event_processing_stats": {
|
"adhoc_timer_response_received": 2,
|
"agg_messages_sent_to_worker": 17251,
|
"agg_queue_memory": 0,
|
"agg_queue_memory_cap": 63963136,
|
"agg_queue_size": 0,
|
"agg_queue_size_cap": 100000,
|
"agg_timer_feedback_queue_cap": 500,
|
"dcp_mutation": 6761,
|
"dcp_mutation_sent_to_worker": 6761,
|
"dcp_snapshot": 342,
|
"dcp_stream_req_counter": 342,
|
"dcp_streamreq": 342,
|
"execution_stats": 2028,
|
"failure_stats": 2028,
|
"latency_stats": 2028,
|
"lcb_exception_stats": 2028,
|
"log_level": 1,
|
"num_processed_events": 6761,
|
"processed_events_size": 4867464,
|
"reb_vb_remaining_to_own": 682,
|
"reb_vb_remaining_to_stream_req": 682,
|
"thr_count": 1,
|
"thr_map": 1,
|
"timer_events": 6732,
|
"timer_responses_received": 6761,
|
"v8_init": 1,
|
"v8_load": 1
|
},
|
"events_remaining": {
|
"dcp_backlog": 0
|
},
|
"execution_stats": {
|
"agg_queue_memory": 0,
|
"agg_queue_size": 0,
|
"curl": {
|
"delete": 0,
|
"get": 0,
|
"head": 0,
|
"post": 0,
|
"put": 0
|
},
|
"curl_success_count": 0,
|
"dcp_delete_msg_counter": 0,
|
"dcp_delete_parse_failure": 0,
|
"dcp_mutation_msg_counter": 6761,
|
"dcp_mutation_parse_failure": 0,
|
"enqueued_dcp_delete_msg_counter": 0,
|
"enqueued_dcp_mutation_msg_counter": 6761,
|
"enqueued_timer_msg_counter": 0,
|
"feedback_queue_size": 0,
|
"filtered_dcp_delete_counter": 0,
|
"filtered_dcp_mutation_counter": 0,
|
"lcb_retry_failure": 0,
|
"messages_parsed": 17242,
|
"no_op_counter": 0,
|
"num_processed_events": 6761,
|
"on_delete_failure": 0,
|
"on_delete_success": 0,
|
"on_update_failure": 0,
|
"on_update_success": 6761,
|
"processed_events_size": 4867464,
|
"timer_callback_failure": 0,
|
"timer_callback_success": 6732,
|
"timer_cancel_counter": 0,
|
"timer_create_counter": 6761,
|
"timer_create_failure": 0,
|
"timer_msg_counter": 6732,
|
"timer_responses_sent": 0,
|
"timestamp": {
|
"109254": "2021-05-04T08:40:41Z"
|
},
|
"uv_msg_parse_failure": 0,
|
"uv_try_write_failure_counter": 0
|
},
|
"failure_stats": {
|
"app_worker_setting_events_lost": 0,
|
"bkt_ops_cas_mismatch_count": 0,
|
"bucket_op_exception_count": 0,
|
"checkpoint_failure_count": 0,
|
"curl_failure_count": 0,
|
"curl_max_resp_size_exceeded": 0,
|
"curl_non_200_response": 0,
|
"curl_timeout_count": 0,
|
"dcp_events_lost": 0,
|
"debugger_events_lost": 0,
|
"delete_events_lost": 0,
|
"mutation_events_lost": 0,
|
"n1ql_op_exception_count": 0,
|
"timeout_count": 0,
|
"timer_callback_missing_counter": 0,
|
"timer_context_size_exceeded_counter": 0,
|
"timer_events_lost": 0,
|
"timestamp": {
|
"109254": "2021-05-04T08:40:41Z"
|
},
|
"v8worker_events_lost": 0
|
},
|
"function_name": "timers",
|
"gocb_creds_request_counter": 53,
|
"function_id": 4271354540,
|
"internal_vb_distribution_stats": {
|
"worker_timers_0": "[0-341]"
|
},
|
"latency_percentile_stats": {
|
"50": 700,
|
"80": 1000,
|
"90": 1200,
|
"95": 1800,
|
"99": 5000,
|
"100": 23700
|
},
|
"lcb_creds_request_counter": 20,
|
"lcb_exception_stats": {},
|
"planner_stats": [
|
{
|
"host_name": "172.23.106.73:8096",
|
"start_vb": 0,
|
"vb_count": 1024
|
}
|
],
|
"metastore_stats": {
|
"metastore_delete_err": 0,
|
"metastore_deletes": 0,
|
"metastore_not_found": 0,
|
"metastore_scan": 0,
|
"metastore_scan_due": 0,
|
"metastore_scan_err": 0,
|
"metastore_set": 0,
|
"metastore_set_err": 0
|
},
|
"vb_distribution_stats_from_metadata": {
|
"172.23.106.73:8096": {
|
"worker_timers_0": "[0-341]",
|
"worker_timers_1": "[342-682]",
|
"worker_timers_2": "[683-1023]"
|
}
|
},
|
"worker_pids": {
|
"worker_timers_0": 109254
|
}
|
}
|
]
|
Another problem vb distribution is
"172.23.106.73:8096":
{ "worker_timers_0": "[0-341]", "worker_timers_1": "[342-682]", "worker_timers_2": "[683-1023]" }but only 1 worker is spawned
"worker_timers_0": 109254
Attachments
Issue Links
- relates to
-
MB-45973 Upgrade 6.6.1-> 7.0.0 : Timer not firing after upgrade, worker count change and service crash
- Closed