Details
-
Bug
-
Resolution: Fixed
-
Critical
-
7.1.0
-
Enterprise Edition 7.1.0 build 2383
-
Untriaged
-
Centos 64-bit
-
-
1
-
Yes
Description
Data mismatch is being observed for timers handler in 2 separate swap rebalance tests.
TEST 1
./testrunner -i /tmp/win10-bucket-ops.ini -p get-cbcollect-info=True -t eventing.eventing_rebalance_collection.EventingRebalanceCollection.test_eventing_rebalance_swap_delete_recreate_collections,nodes_init=5,services_init=kv-eventing-eventing-index-n1ql,dataset=default,groups=simple,reset_services=True,doc-per-day=20,GROUP=bucket_op,java_sdk_client=True |
Test passed both times on 7.1.0-2335 and failed both times on 7.1.0-2383 after couple of runs on both the builds.
test_log_7.1.0-2335.txt
test_log_7.1.0-2383.txt
FAILURE
Traceback (most recent call last):
|
File "pytests/eventing/eventing_rebalance_collection.py", line 583, in test_eventing_rebalance_swap_delete_recreate_collections |
self.verify_all_handler(0) |
File "pytests/eventing/eventing_rebalance_collection.py", line 80, in verify_all_handler |
self.verify_doc_count_collections("dst_bucket.scope_1.coll_1", number_of_docs) |
File "./pytests/eventing/eventing_base.py", line 1068, in verify_doc_count_collections |
actual_count, expected_count, total_dcp_backlog, timers_in_past, lcb))
|
Exception: duplicated data in destination bucket which is not expected. Current : 311 Expected : 0 dcp_backlog : 40320 TIMERS_IN_PAST : 0 lcb_exceptions : {'172.23.106.77': {}} |
Stats for timers handler
{
|
"dcp_feed_boundary": "everything", |
"event_processing_stats": { |
"adhoc_timer_response_received": 3, |
"agg_messages_sent_to_worker": 45867, |
"agg_queue_memory": 0, |
"agg_queue_memory_cap": 106954752, |
"agg_queue_size": 0, |
"agg_queue_size_cap": 100000, |
"agg_timer_feedback_queue_cap": 500, |
"dcp_deletion": 40320, |
"dcp_deletion_sent_to_worker": 40320, |
"dcp_seqno_advanced": 1024, |
"dcp_snapshot": 1024, |
"dcp_stream_req_counter": 1024, |
"dcp_streamreq": 1024, |
"execution_stats": 698, |
"failure_stats": 698, |
"latency_stats": 698, |
"lcb_exception_stats": 698, |
"log_level": 1, |
"num_processed_events": 41344, |
"processed_events_size": 10576476, |
"thr_count": 1, |
"thr_map": 1, |
"timer_events": 50118, |
"timer_responses_received": 40320, |
"v8_init": 1, |
"v8_load": 1 |
},
|
"events_remaining": { |
"dcp_backlog": 0 |
},
|
"execution_stats": { |
"agg_queue_memory": 0, |
"agg_queue_size": 0, |
"curl": { |
"delete": 0, |
"get": 0, |
"head": 0, |
"post": 0, |
"put": 0 |
},
|
"curl_success_count": 0, |
"dcp_delete_msg_counter": 40320, |
"dcp_delete_parse_failure": 0, |
"dcp_mutation_msg_counter": 0, |
"dcp_mutation_parse_failure": 0, |
"enqueued_dcp_delete_msg_counter": 40320, |
"enqueued_dcp_mutation_msg_counter": 0, |
"enqueued_timer_msg_counter": 0, |
"feedback_queue_size": 0, |
"filtered_dcp_delete_counter": 0, |
"filtered_dcp_mutation_counter": 0, |
"lcb_retry_failure": 0, |
"messages_parsed": 45856, |
"no_op_counter": 1024, |
"num_processed_events": 41344, |
"on_delete_failure": 0, |
"on_delete_success": 40320, |
"on_update_failure": 0, |
"on_update_success": 0, |
"processed_events_size": 10576476, |
"timer_callback_failure": 0, |
"timer_callback_success": 50118, |
"timer_cancel_counter": 0, |
"timer_create_counter": 40320, |
"timer_create_failure": 0, |
"timer_msg_counter": 50118, |
"timer_responses_sent": 0, |
"timestamp": { |
"126872": "2022-03-02T07:07:48Z" |
},
|
"uv_msg_parse_failure": 0, |
"uv_try_write_failure_counter": 0 |
},
|
"failure_stats": { |
"app_worker_setting_events_lost": 0, |
"bkt_ops_cas_mismatch_count": 0, |
"bucket_cache_overflow_count": 0, |
"bucket_op_cache_miss_count": 0, |
"bucket_op_exception_count": 0, |
"checkpoint_failure_count": 0, |
"curl_failure_count": 0, |
"curl_max_resp_size_exceeded": 0, |
"curl_non_200_response": 0, |
"curl_timeout_count": 0, |
"dcp_events_lost": 0, |
"debugger_events_lost": 0, |
"delete_events_lost": 0, |
"mutation_events_lost": 0, |
"n1ql_op_exception_count": 0, |
"timeout_count": 0, |
"timer_callback_missing_counter": 0, |
"timer_context_size_exceeded_counter": 0, |
"timer_events_lost": 0, |
"timestamp": { |
"126872": "2022-03-02T07:07:48Z" |
},
|
"v8worker_events_lost": 0 |
},
|
"function_id": 3391603128, |
"function_name": "timers", |
"function_scope": { |
"bucket": "src_bucket", |
"scope": "_default" |
},
|
"gocb_creds_request_counter": 1, |
"internal_vb_distribution_stats": { |
"worker_src_bucket/_default/timers_0": "[0-1023]" |
},
|
"latency_percentile_stats": { |
"100": 20400, |
"50": 800, |
"80": 1100, |
"90": 1300, |
"95": 1700, |
"99": 3600 |
},
|
"lcb_creds_request_counter": 16, |
"lcb_exception_stats": {}, |
"metastore_stats": { |
"metastore_delete_err": 0, |
"metastore_deletes": 0, |
"metastore_not_found": 0, |
"metastore_scan": 0, |
"metastore_scan_due": 0, |
"metastore_scan_err": 0, |
"metastore_set": 0, |
"metastore_set_err": 0 |
},
|
"planner_stats": [ |
{
|
"host_name": "172.23.106.77:8096", |
"start_vb": 0, |
"vb_count": 1024 |
}
|
],
|
"vb_distribution_stats_from_metadata": { |
"172.23.106.77:8096": { |
"worker_src_bucket/_default/timers_0": "[0-1023]" |
}
|
},
|
"worker_pids": { |
"worker_src_bucket/_default/timers_0": 126872 |
}
|
}
|
TEST 2
./testrunner -i /tmp/win10-bucket-ops.ini -p get-cbcollect-info=True -t eventing.eventing_multihandler.EventingMultiHandler.test_multiple_handle_multiple_collections_swap_rebalance_kv,nodes_init=5,services_init=kv-eventing-eventing-index-n1ql,dataset=default,groups=simple,reset_services=True,skip_cleanup=True,num_src_buckets=12,num_dst_buckets=12,num_handlers=12,deploy_handler=12,worker_count=1,handler_code=handler_code/ABO/insert_timer.js,sequential=False,num_pause=1,GROUP=multihandler_multicollection_rebalance,global_function_scope=True |
Ran this test couple of times on 7.1.0-2335, passed both times.
Test failed 4/5 times on 7.1.0-2383.
Attachments
Issue Links
- backports to
-
MB-51340 [BP-MB-51275]: Eventing: Data mismatch for timers handler in couple of swap rebalance tests
- Closed