Details
-
Bug
-
Resolution: Fixed
-
Critical
-
7.6.0
-
7.6.0-1568
-
Untriaged
-
Linux x86_64
-
0
-
Unknown
Description
Steps To Recreate:
- Create a 3 node cluster( 172.23.107.1[KV], 172.23.97.199[index:n1ql], 172.23.106.105[eventing])
- Create three buckets(src_bucket, dst_bucket, metadata)
- All buckets has storageBackend=magma and replicas=1
- Create primary indexe on all three buckets
- Create below eventing function
Retry Rebalance settings changed to : {'enabled': True, 'afterTimePeriod': 150, 'maxAttempts': 1}
[2023-10-01 00:57:56,474] - [basetestcase:2702] INFO - list of eventing nodes in cluster: [ip:172.23.106.105 port:8091 ssh_username:root]
[2023-10-01 00:57:57,558] - [eventing_base:266] INFO - saving function b'{\n "code": 0,\n "info": "Function: src_bucket/_default/Function_391749709_test_auto_retry_of_failed_rebalance_when_producer_killed stored in temp store"\n}'
[2023-10-01 00:57:57,559] - [eventing_base:437] INFO - Deploying the following handler code : Function_391749709_test_auto_retry_of_failed_rebalance_when_producer_killed with
bindings: {'buckets': [{'alias': 'dst_bucket', 'bucket_name': 'dst_bucket', 'access': 'rw'}], 'metadata_bucket': 'metadata', 'source_bucket': 'src_bucket', 'curl': []} and
settings: {'checkpoint_interval': 20000, 'cleanup_timers': False, 'dcp_stream_boundary': 'everything', 'deployment_status': False, 'description': 'Sample Description', 'log_level': 'INFO', 'skip_timer_threshold': 86400, 'sock_batch_size': 1, 'tick_duration': 5000, 'timer_processing_tick_interval': 500, 'timer_worker_pool_size': 3, 'worker_count': 3, 'processing_status': False, 'cpp_worker_thread_count': 1, 'execution_timeout': 20, 'data_chan_size': 10000, 'worker_queue_cap': 100000, 'use_memory_manager': True, 'deadline_timeout': 21, 'timer_storage_chan_size': 10000, 'dcp_gen_chan_size': 10000}
[2023-10-01 00:57:57,559] - [eventing_base:438] INFO -
function OnUpdate(doc, meta) {
var expiry = new Date();
expiry.setSeconds(expiry.getSeconds() + 300);
var context = {docID : meta.id, random_text : "e6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh07Aumoe6cZZGHuh07Aumoe6cZZGHuh07Aumoe6"};
createTimer(timerCallback, expiry, meta.id, context);
}
function OnDelete(meta) {
var expiry = new Date();
expiry.setSeconds(expiry.getSeconds() + 300);
var context = {docID : meta.id};
createTimer(NDtimerCallback, expiry, meta.id, context);
}
function NDtimerCallback(context) {
var docID = context.docID;
while (true) {
try {
var query = DELETE FROM dst_bucket where meta().id = $docID;
break;
} catch (e) {
log(e);
}
}
}
function timerCallback(context) {
var docID = context.docID;
while (true) {
try {
var query = UPSERT INTO dst_bucket ( KEY, VALUE ) VALUES ( $docID ,'timerCallback');
break;
} catch (e) {
log(e);
}
}
}
- Start loading 10800 docs to source bucket
- While data loading is going on add 172.23.105.187 to the cluster, Trigger rebalance
- Test is suppose to kill evening-producer on node 172.23.106.105, but because of infra issue that didn't happen
- Observed Memcached crashed at AuditImpl::create_audit_event (this=0x7f826f0d5300, event_id=4097, payload=...)
Below core was found on node 172.23.107.1 (6058416b-5a54-4a7a-c8af14aa-4c937752.dmp). "172.23.107.1 OS" == debian 11
Note:: One core with same traces was found on node 172.23.106.105 as well.
bt-full:
https://gist.github.com/ankushsharma29/2ed7ec5f239a9c52a4f6f9d489f93581
Backtrace:
(gdb) bt
|
#0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
|
#1 0x00007f827035e537 in __GI_abort () at abort.c:79
|
#2 0x00007f82707239ab in __gnu_cxx::__verbose_terminate_handler () at /tmp/deploy/objdir/../gcc-13.2.0/libstdc++-v3/libsupc++/vterminate.cc:95
|
#3 0x0000000000c58236 in backtrace_terminate_handler () at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/utilities/terminate_handler.cc:88
|
#4 0x00007f82707332fa in __cxxabiv1::__terminate (handler=<optimized out>) at /tmp/deploy/objdir/../gcc-13.2.0/libstdc++-v3/libsupc++/eh_terminate.cc:48
|
#5 0x00007f8270732379 in __cxa_call_terminate (ue_header=0x1e4f590) at /tmp/deploy/objdir/../gcc-13.2.0/libstdc++-v3/libsupc++/eh_call.cc:54
|
#6 0x00007f8270732a86 in __cxxabiv1::__gxx_personality_v0 (version=<optimized out>, actions=6, exception_class=5138137972254386944, ue_header=<optimized out>,
|
context=0x7ffea35e2ca0) at /tmp/deploy/objdir/../gcc-13.2.0/libstdc++-v3/libsupc++/eh_personality.cc:688
|
#7 0x00007f827052a6b9 in _Unwind_RaiseException_Phase2 (exc=0x1e4f590, context=0x7ffea35e2ca0, frames_p=0x7ffea35e2ba8)
|
at /tmp/deploy/objdir/../gcc-13.2.0/libgcc/unwind.inc:64
|
#8 0x00007f827052b1ad in _Unwind_Resume (exc=exc@entry=0x1e4f590) at /tmp/deploy/objdir/../gcc-13.2.0/libgcc/unwind.inc:242
|
#9 0x000000000053a92e in std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::~basic_string (this=<optimized out>,
|
__in_chrg=<optimized out>) at /opt/gcc-13.2.0/include/c++/13.2.0/bits/basic_string.h:792
|
#10 checked_snprintf (str=<optimized out>, size=<optimized out>, format=format@entry=0xe39292 "-%02u:%02u")
|
at /home/couchbase/jenkins/workspace/couchbase-server-unix/platform/src/checked_snprintf.cc:46
|
#11 0x0000000000a12c2e in ISOTime::generatetimestamp (destination=..., now=<optimized out>, frac_of_second=198962)
|
at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/time/isotime.cc:65
|
#12 0x0000000000a12e65 in ISOTime::generatetimestamp (destination=...) at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/time/isotime.cc:84
|
#13 ISOTime::generatetimestamp[abi:cxx11]() () at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/time/isotime.cc:96
|
#14 0x0000000000709a32 in AuditImpl::create_audit_event (this=0x7f826f0d5300, event_id=4097, payload=...)
|
at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/auditd/src/audit.cc:77
|
#15 0x000000000070d396 in AuditImpl::~AuditImpl (this=0x7f826f0d5300, __in_chrg=<optimized out>)
|
at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/auditd/src/audit.cc:58
|
#16 0x000000000070da69 in AuditImpl::~AuditImpl (this=0x7f826f0d5300, __in_chrg=<optimized out>)
|
at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/auditd/src/audit.cc:73
|
#17 0x000000000063da10 in std::default_delete<cb::audit::Audit>::operator() (__ptr=<optimized out>, this=<optimized out>)
|
at /opt/gcc-13.2.0/include/c++/13.2.0/bits/unique_ptr.h:99
|
#18 std::__uniq_ptr_impl<cb::audit::Audit, std::default_delete<cb::audit::Audit> >::reset (__p=0x0, this=0x113cbf0 <getAuditHandle()::handle>)
|
at /opt/gcc-13.2.0/include/c++/13.2.0/bits/unique_ptr.h:211
|
#19 std::unique_ptr<cb::audit::Audit, std::default_delete<cb::audit::Audit> >::reset (__p=0x0, this=0x113cbf0 <getAuditHandle()::handle>)
|
at /opt/gcc-13.2.0/include/c++/13.2.0/bits/unique_ptr.h:509
|
#20 shutdown_audit () at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/daemon/mcaudit.cc:582
|
#21 0x0000000000580c5f in memcached_main (argc=<optimized out>, argv=<optimized out>)
|
at /home/couchbase/jenkins/workspace/couchbase-server-unix/kv_engine/daemon/memcached.cc:1107
|
#22 0x00007f827035fd0a in __libc_start_main (main=0x54c9b0 <main(int, char**)>, argc=3, argv=0x7ffea35e3c58, init=<optimized out>, fini=<optimized out>,
|
rtld_fini=<optimized out>, stack_end=0x7ffea35e3c48) at ../csu/libc-start.c:308
|
#23 0x0000000000577634 in _start ()
|
QE-TEST:
./testrunner -i /data/workspace/debian-p0-eventing-vset00-00-recovery_n1ql/testexec.26977.ini -p get-cbcollect-info=True,GROUP=recovery_n1ql,java_sdk_client=True,skip_log_scan=True,default_bucket=False,get-cbcollect-info=True,sirius_url=http://172.23.120.103:4000 -t eventing.eventing_recovery.EventingRecovery.test_auto_retry_of_failed_rebalance_when_producer_killed,nodes_init=3,services_init=kv-eventing-index:n1ql,dataset=default,groups=simple,reset_services=True,skip_cleanup=True,doc-per-day=5,handler_code=n1ql_op_with_timers,GROUP=recovery_n1ql
|
Job:: http://qa.sc.couchbase.com/job/test_suite_executor/623153/consoleFull
Job_Name:: eventing-recovery_n1ql