Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-58900

Eventing Consumer crashed at 0x000000000042542b in main (argc=<optimized out>, argv=<optimized out>) at /home/couchbase/jenkins/workspace/couchbase-server-unix/goproj/src/github.com/couchbase/eventing/v8_consumer/src/client.cc:1300

    XMLWordPrintable

Details

    • Bug
    • Resolution: Cannot Reproduce
    • Critical
    • 7.6.0
    • 7.6.0
    • eventing
    • 7.6.0-1568

    Description

      Steps To Recreate:

      1. Create a 4 node cluster( 172.23.104.231[KV], 172.23.107.168[n1ql], 172.23.107.52[eventing], 172.23.109.84[index]
      2. Create three buckets(src_bucket, dst_bucket, metadata)
      3. All buckets has storageBackend=magma and replicas=1
      4. Create primary indexe on all three buckets
      5. Deploy the following code

        Deploying the following handler code : Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations with 
        bindings: {'buckets': [{'alias': 'dst_bucket', 'bucket_name': 'dst_bucket', 'access': 'rw'}], 'metadata_bucket': 'metadata', 'source_bucket': 'src_bucket', 'curl': []} and 
        settings: {'checkpoint_interval': 20000, 'cleanup_timers': False, 'dcp_stream_boundary': 'everything', 'deployment_status': False, 'description': 'Sample Description', 'log_level': 'INFO', 'skip_timer_threshold': 86400, 'sock_batch_size': 1, 'tick_duration': 5000, 'timer_processing_tick_interval': 500, 'timer_worker_pool_size': 3, 'worker_count': 3, 'processing_status': False, 'cpp_worker_thread_count': 1, 'execution_timeout': 20, 'data_chan_size': 10000, 'worker_queue_cap': 100000, 'use_memory_manager': True, 'deadline_timeout': 21, 'timer_storage_chan_size': 10000, 'dcp_gen_chan_size': 10000}
        [2023-10-01 00:14:55,855] - [eventing_base:438] INFO - 
        function OnUpdate(doc, meta) {
            var expiry = new Date();
            expiry.setSeconds(expiry.getSeconds() + 30);
         
            var context = {docID : meta.id, random_text : "e6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh0R7Aumoe6cZZGHuh07Aumoe6cZZGHuh07Aumoe6cZZGHuh07Aumoe6"};
            createTimer(timerCallback,  expiry, meta.id, context);
        }
        function OnDelete(meta) {
            var expiry = new Date();
            expiry.setSeconds(expiry.getSeconds() + 30);
         
            var context = {docID : meta.id};
            createTimer(NDtimerCallback,  expiry, meta.id, context);
        }
        function NDtimerCallback(context) {
            while(true){
                try{
                delete dst_bucket[context.docID];
                break;
                }catch(e){
                    log(e);
                }
            }
        }
        function timerCallback(context) {
            while(true){
                try{
                dst_bucket[context.docID] = context.random_text;
                break;
                }catch(e){
                    log(e);
                }
            }
        }
        [2023-10-01 00:14:57,021] - [eventing_base:440] INFO - deploy Application : b'{\n "code": 0,\n "info": "Function: src_bucket/_default/Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations is deploying"\n}'
        [2023-10-01 00:14:57,021] - [basetestcase:885] INFO - sleep for 5 secs. Waiting for Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations to deployed... ...
        [2023-10-01 00:15:02,044] - [basetestcase:885] INFO - sleep for 5 secs. Waiting for Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations to deployed... ...
        [2023-10-01 00:15:07,065] - [basetestcase:885] INFO - sleep for 5 secs. Waiting for Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations to deployed... ...
        [2023-10-01 00:15:12,085] - [basetestcase:885] INFO - sleep for 5 secs. Waiting for Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations to deployed... ...
        [2023-10-01 00:15:17,169] - [basetestcase:2702] INFO - list of eventing nodes in cluster: [ip:172.23.107.52 port:8091 ssh_username:root]
        [2023-10-01 00:15:17,256] - [eventing_base:673] INFO - Stats for Node 172.23.107.52 is 
        [
            {
                "dcp_feed_boundary": "everything",
                "event_processing_stats": {
                    "adhoc_timer_response_received": 1,
                    "agg_messages_sent_to_worker": 2217,
                    "agg_queue_memory": 0,
                    "agg_queue_memory_cap": 106954752,
                    "agg_queue_size": 0,
                    "agg_queue_size_cap": 300000,
                    "agg_timer_feedback_queue_cap": 1500,
                    "dcp_seqno_advanced": 1024,
                    "dcp_snapshot": 1024,
                    "dcp_stream_req_counter": 1024,
                    "dcp_streamreq": 1024,
                    "execution_stats": 29,
                    "failure_stats": 29,
                    "latency_stats": 29,
                    "lcb_exception_stats": 29,
                    "log_level": 3,
                    "num_processed_events": 1024,
                    "processed_events_size": 172128,
                    "thr_count": 3,
                    "thr_map": 3,
                    "v8_init": 3,
                    "v8_load": 3
                },
                "events_remaining": {
                    "dcp_backlog": 0
                },
                "execution_stats": {
                    "agg_queue_memory": 0,
                    "agg_queue_size": 0,
                    "curl": {
                        "delete": 0,
                        "get": 0,
                        "head": 0,
                        "post": 0,
                        "put": 0
                    },
                    "curl_success_count": 0,
                    "dcp_delete_msg_counter": 0,
                    "dcp_delete_parse_failure": 0,
                    "dcp_mutation_msg_counter": 0,
                    "dcp_mutation_parse_failure": 0,
                    "enqueued_dcp_delete_msg_counter": 0,
                    "enqueued_dcp_mutation_msg_counter": 0,
                    "enqueued_timer_msg_counter": 0,
                    "feedback_queue_size": 0,
                    "filtered_dcp_delete_counter": 0,
                    "filtered_dcp_mutation_counter": 0,
                    "lcb_retry_failure": 0,
                    "messages_parsed": 2205,
                    "no_op_counter": 1024,
                    "num_processed_events": 1024,
                    "on_delete_failure": 0,
                    "on_delete_success": 0,
                    "on_update_failure": 0,
                    "on_update_success": 0,
                    "processed_events_size": 172128,
                    "timer_callback_failure": 0,
                    "timer_callback_success": 0,
                    "timer_cancel_counter": 0,
                    "timer_create_counter": 0,
                    "timer_create_failure": 0,
                    "timer_msg_counter": 0,
                    "timer_responses_sent": 0,
                    "timestamp": {
                        "767789": "2023-10-01T07:15:17Z",
                        "767797": "2023-10-01T07:15:17Z",
                        "767807": "2023-10-01T07:15:16Z"
                    },
                    "uv_msg_parse_failure": 0,
                    "uv_try_write_failure_counter": 0
                },
                "failure_stats": {
                    "analytics_op_exception_count": 0,
                    "app_worker_setting_events_lost": 0,
                    "bkt_ops_cas_mismatch_count": 0,
                    "bucket_cache_overflow_count": 0,
                    "bucket_op_cache_miss_count": 0,
                    "bucket_op_exception_count": 0,
                    "checkpoint_failure_count": 0,
                    "curl_failure_count": 0,
                    "curl_max_resp_size_exceeded": 0,
                    "curl_non_200_response": 0,
                    "curl_timeout_count": 0,
                    "dcp_events_lost": 0,
                    "debugger_events_lost": 0,
                    "delete_events_lost": 0,
                    "mutation_events_lost": 0,
                    "n1ql_op_exception_count": 0,
                    "timeout_count": 0,
                    "timer_callback_missing_counter": 0,
                    "timer_context_size_exceeded_counter": 0,
                    "timer_events_lost": 0,
                    "timestamp": {
                        "767789": "2023-10-01T07:15:17Z",
                        "767797": "2023-10-01T07:15:17Z",
                        "767807": "2023-10-01T07:15:16Z"
                    },
                    "v8worker_events_lost": 0
                },
                "function_id": 3341595328,
                "function_name": "Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations",
                "function_scope": {
                    "bucket": "src_bucket",
                    "scope": "_default"
                },
                "gocb_creds_request_counter": 1,
                "internal_vb_distribution_stats": {
                    "worker_src_bucket/_default/Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations_0": "[0-341]",
                    "worker_src_bucket/_default/Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations_1": "[342-682]",
                    "worker_src_bucket/_default/Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations_2": "[683-1023]"
                },
                "latency_percentile_stats": {
                    "100": 0,
                    "50": 0,
                    "80": 0,
                    "90": 0,
                    "95": 0,
                    "99": 0
                },
                "lcb_creds_request_counter": 6,
                "lcb_exception_stats": {},
                "metastore_stats": {
                    "metastore_delete_err": 0,
                    "metastore_deletes": 0,
                    "metastore_not_found": 0,
                    "metastore_scan": 0,
                    "metastore_scan_due": 0,
                    "metastore_scan_err": 0,
                    "metastore_set": 0,
                    "metastore_set_err": 0
                },
                "planner_stats": [
                    {
                        "host_name": "172.23.107.52:8096",
                        "start_vb": 0,
                        "vb_count": 1024
                    }
                ],
                "vb_distribution_stats_from_metadata": {},
                "worker_pids": {
                    "worker_src_bucket/_default/Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations_0": 767789,
                    "worker_src_bucket/_default/Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations_1": 767797,
                    "worker_src_bucket/_default/Function_457399420_test_killing_erlang_when_eventing_is_processing_mutations_2": 767807
                }
            }
        ] 
        

      6. Start loading 4032 docs to source bucket
      7. SIGKIll beam.smp(pkill beam.smp) on node 172.23.107.52(eventing node), and restart couchbase server.
      8. Observed eventing consumer crashed at Eventing Consumer crashed at 0x000000000042542b in main (argc=<optimized out>, argv=<optimized out>) at /home/couchbase/jenkins/workspace/couchbase-server-unix/goproj/src/github.com/couchbase/eventing/v8_consumer/src/client.cc:1300

      Below core was found on node 172.23.107.52(1c779c6b-51a0-4eaf-05e4eb93-6f5bdd9c.dmp) *

      BackTrace:

      (gdb) bt full
      #0  __pthread_clockjoin_ex (threadid=139882694608640, thread_return=0x0, clockid=<optimized out>, abstime=<optimized out>, block=<optimized out>)
          at pthread_join_common.c:145
              __oldtype = 0
              __err = <optimized out>
              tid = 771447
              _buffer = {__routine = 0x7f38fbf56250 <cleanup>, __arg = 0x7f38fa528d28, __canceltype = 0, __prev = 0x0}
              pd = 0x7f38fa528700
              self = <optimized out>
              result = 0
              pd_result = <optimized out>
      #1  0x00007f38fc1beda7 in __gthread_join (__value_ptr=0x0, __threadid=<optimized out>)
          at /tmp/deploy/objdir/x86_64-pc-linux-gnu/libstdc++-v3/include/x86_64-pc-linux-gnu/bits/gthr-default.h:669
      No locals.
      #2  std::thread::join (this=this@entry=0x560b10 <AppWorker::GetAppWorker()::worker+16>) at /tmp/deploy/objdir/../gcc-13.2.0/libstdc++-v3/src/c++11/thread.cc:134
              __e = 22
      #3  0x000000000042542b in main (argc=<optimized out>, argv=<optimized out>)
          at /home/couchbase/jenkins/workspace/couchbase-server-unix/goproj/src/github.com/couchbase/eventing/v8_consumer/src/client.cc:1300
              app_location = {static npos = 18446744073709551615,
                _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>},
                  _M_p = 0x26cc3d0 <error: Cannot access memory at address 0x26cc3d0>}, _M_string_length = 99, {
                  _M_local_buf = "c\000\000\000\000\000\000\000\r\000\000\000\000\000\000", _M_allocated_capacity = 99}}
              ipc_type = {static npos = 18446744073709551615,
                _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x7ffc06c48ad0 "af_unix"},
                _M_string_length = 7, {_M_local_buf = "af_unix\000\200\210n\000\000\000\000", _M_allocated_capacity = 33892920341784161}}
              port = {static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>},
                  _M_p = 0x26cc440 <error: Cannot access memory at address 0x26cc440>}, _M_string_length = 37, {
                  _M_local_buf = "%\000\000\000\000\000\000\000`\214\304\006\374\177\000", _M_allocated_capacity = 37}}
              feedback_port = {static npos = 18446744073709551615,
                _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>},
                  _M_p = 0x26cc470 <error: Cannot access memory at address 0x26cc470>}, _M_string_length = 39, {_M_local_buf = "'", '\000' <repeats 14 times>,
                  _M_allocated_capacity = 39}}
              worker_id = {static npos = 18446744073709551615,
                _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>},
                  _M_p = 0x26cc4a0 <error: Cannot access memory at address 0x26cc4a0>}, _M_string_length = 108, {_M_local_buf = "l", '\000' <repeats 14 times>,
                  _M_allocated_capacity = 108}}
              batch_size = 1
              feedback_batch_size = 100
              diag_dir = {static npos = 18446744073709551615,
                _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>},
                  _M_p = 0x26cc520 <error: Cannot access memory at address 0x26cc520>}, _M_string_length = 38, {_M_local_buf = "&", '\000' <repeats 14 times>,
                  _M_allocated_capacity = 38}}
              ip_type = {static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>},
                  _M_p = 0x7ffc06c48b70 "ipv4"}, _M_string_length = 4, {_M_local_buf = "ipv4\000\000\000\000\f\336\337\373\070\177\000", _M_allocated_capacity = 880177257}}
              breakpad_on = {static npos = 18446744073709551615,
      --Type <RET> for more, q to quit, c to continue without paging--
                _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x7ffc06c48b90 "true"},
                _M_string_length = 4, {_M_local_buf = "true", '\000' <repeats 11 times>, _M_allocated_capacity = 1702195828}}
              function_id = {static npos = 18446744073709551615,
                _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x7ffc06c48bb0 "2528875726"},
                _M_string_length = 10, {_M_local_buf = "2528875726\000\000\000\000\000", _M_allocated_capacity = 3978146560452212018}}
              user_prefix = {static npos = 18446744073709551615,
                _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x7ffc06c48bd0 "eventing"},
                _M_string_length = 8, {_M_local_buf = "eventing\000\000\000\000|\000\000", _M_allocated_capacity = 7453010382134015589}}
              ns_server_port = {static npos = 18446744073709551615,
                _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>}, _M_p = 0x7ffc06c48bf0 "8091"},
                _M_string_length = 4, {_M_local_buf = "8091", '\000' <repeats 11 times>, _M_allocated_capacity = 825831480}}
              num_vbuckets = <optimized out>
              user = {static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>},
                  _M_p = 0x7ffc06c48c10 "Administrator"}, _M_string_length = 13, {_M_local_buf = "Administrator\000\377\377", _M_allocated_capacity = 8391166453657461825}}
              domain = {static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<std::__new_allocator<char>> = {<No data fields>}, <No data fields>},
                  _M_p = 0x7ffc06c48c30 "admin"}, _M_string_length = 5, {_M_local_buf = "admin\000\000\000\244\355\337\373\070\177\000",
                  _M_allocated_capacity = 474215179361}}
              worker = 0x560b00 <AppWorker::GetAppWorker()::worker>
      

      QE-TEST:

      ./testrunner -i /data/workspace/debian-p0-eventing-vset00-00-recovery_timers/testexec.18938.ini -p get-cbcollect-info=True,GROUP=recovery_timers,java_sdk_client=True,default_bucket=False,get-cbcollect-info=True,sirius_url=http://172.23.120.103:4000 -t eventing.eventing_recovery.EventingRecovery.test_killing_erlang_when_eventing_is_processing_mutations,nodes_init=4,services_init=kv-eventing-index-n1ql,dataset=default,groups=simple,reset_services=True,skip_cleanup=True,doc-per-day=2,handler_code=bucket_op_with_timers,GROUP=recovery_timers
      

      Job:: http://qa.sc.couchbase.com/job/test_suite_executor/623157/consoleFull

      Job-Name: : recovery_timers(eventing)

      Attachments

        No reviews matched the request. Check your Options in the drop-down menu of this sections header.

        Activity

          People

            ankush.sharma Ankush Sharma
            ankush.sharma Ankush Sharma
            Votes:
            0 Vote for this issue
            Watchers:
            6 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Gerrit Reviews

                There are no open Gerrit changes

                PagerDuty