Details
-
Bug
-
Resolution: Cannot Reproduce
-
Critical
-
7.6.0
-
7.6.0-1568
-
Untriaged
-
Linux x86_64
-
0
-
Unknown
Description
Steps To Recreate:
- Create a 4 node cluster( 172.23.104.231[KV], 172.23.107.168[n1ql], 172.23.107.52[eventing], 172.23.109.84[index]
- Create three buckets(src_bucket, dst_bucket, metadata)
- All buckets has storageBackend=magma and replicas=1
- Create primary indexe on all three buckets
- Deploy the following code
Deploying the following handler code : Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations with
bindings: {'buckets': [{'alias': 'dst_bucket', 'bucket_name': 'dst_bucket', 'access': 'rw'}], 'metadata_bucket': 'metadata', 'source_bucket': 'src_bucket', 'curl': []} and
settings: {'checkpoint_interval': 20000, 'cleanup_timers': False, 'dcp_stream_boundary': 'everything', 'deployment_status': False, 'description': 'Sample Description', 'log_level': 'INFO', 'skip_timer_threshold': 86400, 'sock_batch_size': 1, 'tick_duration': 5000, 'timer_processing_tick_interval': 500, 'timer_worker_pool_size': 3, 'worker_count': 3, 'processing_status': False, 'cpp_worker_thread_count': 1, 'execution_timeout': 20, 'data_chan_size': 10000, 'worker_queue_cap': 100000, 'use_memory_manager': True, 'deadline_timeout': 21, 'timer_storage_chan_size': 10000, 'dcp_gen_chan_size': 10000}
[2023-10-01 00:14:55,855] - [eventing_base:438] INFO -
function OnUpdate(doc, meta) {
var expiry = new Date();
expiry.setSeconds(expiry.getSeconds() + 30);
var context = {docID : meta.id, random_text : "e6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh07Aumoe6cZZGHuh07Aumoe6cZZGHuh07Aumoe6"};
createTimer(timerCallback, expiry, meta.id, context);
}
function OnDelete(meta) {
var expiry = new Date();
expiry.setSeconds(expiry.getSeconds() + 30);
var context = {docID : meta.id};
createTimer(NDtimerCallback, expiry, meta.id, context);
}
function NDtimerCallback(context) {
while(true){
try{
delete dst_bucket[context.docID];
break;
}catch(e){
log(e);
}
}
}
function timerCallback(context) {
while(true){
try{
dst_bucket[context.docID] = context.random_text;
break;
}catch(e){
log(e);
}
}
}
[2023-10-01 00:14:57,021] - [eventing_base:440] INFO - deploy Application : b'{\n "code": 0,\n "info": "Function: src_bucket/_default/Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations is deploying"\n}'
[2023-10-01 00:14:57,021] - [basetestcase:885] INFO - sleep for 5 secs. Waiting for Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations to deployed... ...
[2023-10-01 00:15:02,044] - [basetestcase:885] INFO - sleep for 5 secs. Waiting for Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations to deployed... ...
[2023-10-01 00:15:07,065] - [basetestcase:885] INFO - sleep for 5 secs. Waiting for Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations to deployed... ...
[2023-10-01 00:15:12,085] - [basetestcase:885] INFO - sleep for 5 secs. Waiting for Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations to deployed... ...
[2023-10-01 00:15:17,169] - [basetestcase:2702] INFO - list of eventing nodes in cluster: [ip:172.23.107.52 port:8091 ssh_username:root]
[2023-10-01 00:15:17,256] - [eventing_base:673] INFO - Stats for Node 172.23.107.52 is
[
{
"dcp_feed_boundary": "everything",
"event_processing_stats": {
"adhoc_timer_response_received": 1,
"agg_messages_sent_to_worker": 2217,
"agg_queue_memory": 0,
"agg_queue_memory_cap": 106954752,
"agg_queue_size": 0,
"agg_queue_size_cap": 300000,
"agg_timer_feedback_queue_cap": 1500,
"dcp_seqno_advanced": 1024,
"dcp_snapshot": 1024,
"dcp_stream_req_counter": 1024,
"dcp_streamreq": 1024,
"execution_stats": 29,
"failure_stats": 29,
"latency_stats": 29,
"lcb_exception_stats": 29,
"log_level": 3,
"num_processed_events": 1024,
"processed_events_size": 172128,
"thr_count": 3,
"thr_map": 3,
"v8_init": 3,
"v8_load": 3
},
"events_remaining": {
"dcp_backlog": 0
},
"execution_stats": {
"agg_queue_memory": 0,
"agg_queue_size": 0,
"curl": {
"delete": 0,
"get": 0,
"head": 0,
"post": 0,
"put": 0
},
"curl_success_count": 0,
"dcp_delete_msg_counter": 0,
"dcp_delete_parse_failure": 0,
"dcp_mutation_msg_counter": 0,
"dcp_mutation_parse_failure": 0,
"enqueued_dcp_delete_msg_counter": 0,
"enqueued_dcp_mutation_msg_counter": 0,
"enqueued_timer_msg_counter": 0,
"feedback_queue_size": 0,
"filtered_dcp_delete_counter": 0,
"filtered_dcp_mutation_counter": 0,
"lcb_retry_failure": 0,
"messages_parsed": 2205,
"no_op_counter": 1024,
"num_processed_events": 1024,
"on_delete_failure": 0,
"on_delete_success": 0,
"on_update_failure": 0,
"on_update_success": 0,
"processed_events_size": 172128,
"timer_callback_failure": 0,
"timer_callback_success": 0,
"timer_cancel_counter": 0,
"timer_create_counter": 0,
"timer_create_failure": 0,
"timer_msg_counter": 0,
"timer_responses_sent": 0,
"timestamp": {
"767789": "2023-10-01T07:15:17Z",
"767797": "2023-10-01T07:15:17Z",
"767807": "2023-10-01T07:15:16Z"
},
"uv_msg_parse_failure": 0,
"uv_try_write_failure_counter": 0
},
"failure_stats": {
"analytics_op_exception_count": 0,
"app_worker_setting_events_lost": 0,
"bkt_ops_cas_mismatch_count": 0,
"bucket_cache_overflow_count": 0,
"bucket_op_cache_miss_count": 0,
"bucket_op_exception_count": 0,
"checkpoint_failure_count": 0,
"curl_failure_count": 0,
"curl_max_resp_size_exceeded": 0,
"curl_non_200_response": 0,
"curl_timeout_count": 0,
"dcp_events_lost": 0,
"debugger_events_lost": 0,
"delete_events_lost": 0,
"mutation_events_lost": 0,
"n1ql_op_exception_count": 0,
"timeout_count": 0,
"timer_callback_missing_counter": 0,
"timer_context_size_exceeded_counter": 0,
"timer_events_lost": 0,
"timestamp": {
"767789": "2023-10-01T07:15:17Z",
"767797": "2023-10-01T07:15:17Z",
"767807": "2023-10-01T07:15:16Z"
},
"v8worker_events_lost": 0
},
"function_id": 3341595328,
"function_name": "Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations",
"function_scope": {
"bucket": "src_bucket",
"scope": "_default"
},
"gocb_creds_request_counter": 1,
"internal_vb_distribution_stats": {
"worker_src_bucket/_default/Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations_0": "[0-341]",
"worker_src_bucket/_default/Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations_1": "[342-682]",
"worker_src_bucket/_default/Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations_2": "[683-1023]"
},
"latency_percentile_stats": {
"100": 0,
"50": 0,
"80": 0,
"90": 0,
"95": 0,
"99": 0
},
"lcb_creds_request_counter": 6,
"lcb_exception_stats": {},
"metastore_stats": {
"metastore_delete_err": 0,
"metastore_deletes": 0,
"metastore_not_found": 0,
"metastore_scan": 0,
"metastore_scan_due": 0,
"metastore_scan_err": 0,
"metastore_set": 0,
"metastore_set_err": 0
},
"planner_stats": [
{
"host_name": "172.23.107.52:8096",
"start_vb": 0,
"vb_count": 1024
}
],
"vb_distribution_stats_from_metadata": {},
"worker_pids": {
"worker_src_bucket/_default/Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations_0": 767789,
"worker_src_bucket/_default/Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations_1": 767797,
"worker_src_bucket/_default/Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations_2": 767807
}
}
]
- Start loading 4032 docs to source bucket
- SIGKIll beam.smp(pkill beam.smp) on node 172.23.107.52(eventing node), and restart couchbase server.
- Observed eventing consumer crashed at Eventing Consumer crashed at 0x000000000042542b in main (argc=<optimized out>, argv=<optimized out>) at /home/couchbase/jenkins/workspace/couchbase-server-unix/goproj/src/github.com/couchbase/eventing/v8_consumer/src/client.cc:1300
Below core was found on node 172.23.107.52(1c779c6b-51a0-4eaf-05e4eb93-6f5bdd9c.dmp) *
BackTrace:
(gdb) bt full
|
#0 __pthread_clockjoin_ex (threadid=139882694608640, thread_return=0x0, clockid=<optimized out>, abstime=<optimized out>, block=<optimized out>)
|
at pthread_join_common.c:145
|
__oldtype = 0
|
__err = <optimized out>
|
tid = 771447
|
_buffer = {__routine = 0x7f38fbf56250 <cleanup>, __arg = 0x7f38fa528d28, __canceltype = 0, __prev = 0x0}
|
pd = 0x7f38fa528700
|
self = <optimized out>
|
result = 0
|
pd_result = <optimized out>
|
#1 0x00007f38fc1beda7 in __gthread_join (__value_ptr=0x0, __threadid=<optimized out>)
|
at /tmp/deploy/objdir/x86_64-pc-linux-gnu/libstdc++-v3/include/x86_64-pc-linux-gnu/bits/gthr-default.h:669
|
No locals.
|
#2 std::thread::join (this=this@entry=0x560b10 <AppWorker::GetAppWorker()::worker+16>) at /tmp/deploy/objdir/../gcc-13.2.0/libstdc++-v3/src/c++11/thread.cc:134
|
__e = 22
|
#3 0x000000000042542b in main (argc=<optimized out>, argv=<optimized out>)
|
at /home/couchbase/jenkins/workspace/couchbase-server-unix/goproj/src/github.com/couchbase/eventing/v8_consumer/src/client.cc:1300
|
app_location = {static npos = 18446744073709551615,
|
_M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>},
|
_M_p = 0x26cc3d0 <error: Cannot access memory at address 0x26cc3d0>}, _M_string_length = 99, {
|
_M_local_buf = "c\000\000\000\000\000\000\000\r\000\000\000\000\000\000", _M_allocated_capacity = 99}}
|
ipc_type = {static npos = 18446744073709551615,
|
_M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x7ffc06c48ad0 "af_unix"},
|
_M_string_length = 7, {_M_local_buf = "af_unix\000\200\210n\000\000\000\000", _M_allocated_capacity = 33892920341784161}}
|
port = {static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>},
|
_M_p = 0x26cc440 <error: Cannot access memory at address 0x26cc440>}, _M_string_length = 37, {
|
_M_local_buf = "%\000\000\000\000\000\000\000`\214\304\006\374\177\000", _M_allocated_capacity = 37}}
|
feedback_port = {static npos = 18446744073709551615,
|
_M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>},
|
_M_p = 0x26cc470 <error: Cannot access memory at address 0x26cc470>}, _M_string_length = 39, {_M_local_buf = "'", '\000' <repeats 14 times>,
|
_M_allocated_capacity = 39}}
|
worker_id = {static npos = 18446744073709551615,
|
_M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>},
|
_M_p = 0x26cc4a0 <error: Cannot access memory at address 0x26cc4a0>}, _M_string_length = 108, {_M_local_buf = "l", '\000' <repeats 14 times>,
|
_M_allocated_capacity = 108}}
|
batch_size = 1
|
feedback_batch_size = 100
|
diag_dir = {static npos = 18446744073709551615,
|
_M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>},
|
_M_p = 0x26cc520 <error: Cannot access memory at address 0x26cc520>}, _M_string_length = 38, {_M_local_buf = "&", '\000' <repeats 14 times>,
|
_M_allocated_capacity = 38}}
|
ip_type = {static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>},
|
_M_p = 0x7ffc06c48b70 "ipv4"}, _M_string_length = 4, {_M_local_buf = "ipv4\000\000\000\000\f\336\337\373\070\177\000", _M_allocated_capacity = 880177257}}
|
breakpad_on = {static npos = 18446744073709551615,
|
--Type <RET> for more, q to quit, c to continue without paging--
|
_M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x7ffc06c48b90 "true"},
|
_M_string_length = 4, {_M_local_buf = "true", '\000' <repeats 11 times>, _M_allocated_capacity = 1702195828}}
|
function_id = {static npos = 18446744073709551615,
|
_M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x7ffc06c48bb0 "2528875726"},
|
_M_string_length = 10, {_M_local_buf = "2528875726\000\000\000\000\000", _M_allocated_capacity = 3978146560452212018}}
|
user_prefix = {static npos = 18446744073709551615,
|
_M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x7ffc06c48bd0 "eventing"},
|
_M_string_length = 8, {_M_local_buf = "eventing\000\000\000\000|\000\000", _M_allocated_capacity = 7453010382134015589}}
|
ns_server_port = {static npos = 18446744073709551615,
|
_M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x7ffc06c48bf0 "8091"},
|
_M_string_length = 4, {_M_local_buf = "8091", '\000' <repeats 11 times>, _M_allocated_capacity = 825831480}}
|
num_vbuckets = <optimized out>
|
user = {static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>},
|
_M_p = 0x7ffc06c48c10 "Administrator"}, _M_string_length = 13, {_M_local_buf = "Administrator\000\377\377", _M_allocated_capacity = 8391166453657461825}}
|
domain = {static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>},
|
_M_p = 0x7ffc06c48c30 "admin"}, _M_string_length = 5, {_M_local_buf = "admin\000\000\000\244\355\337\373\070\177\000",
|
_M_allocated_capacity = 474215179361}}
|
worker = 0x560b00 <AppWorker::GetAppWorker()::worker>
|
QE-TEST:
./testrunner -i /data/workspace/debian-p0-eventing-vset00-00-recovery_timers/testexec.18938.ini -p get-cbcollect-info=True,GROUP=recovery_timers,java_sdk_client=True,default_bucket=False,get-cbcollect-info=True,sirius_url=http://172.23.120.103:4000 -t eventing.eventing_recovery.EventingRecovery.test_killing_erlang_when_eventing_is_processing_mutations,nodes_init=4,services_init=kv-eventing-index-n1ql,dataset=default,groups=simple,reset_services=True,skip_cleanup=True,doc-per-day=2,handler_code=bucket_op_with_timers,GROUP=recovery_timers
|
Job:: http://qa.sc.couchbase.com/job/test_suite_executor/623157/consoleFull
Job-Name: : recovery_timers(eventing)