Uploaded image for project: 'Couchbase C client library libcouchbase'
  1. Couchbase C client library libcouchbase
  2. CCBC-5

abort in do_read_data (c=0x7b09bf0) at src/event.c:105

    Details

    • Type: Bug
    • Status: Closed
    • Priority: Blocker
    • Resolution: Fixed
    • Affects Version/s: None
    • Fix Version/s: None
    • Component/s: None
    • Security Level: Public
    • Labels:
      None
    • Environment:
      built with debug on
      libcouchbase: b9796af Fixed compilation of the packet_debug extension
      memcachetest: 0.8.3-1-geaf0f9f (my master branch)

      Description

      GDB seems to think libcouchbase is in the middle of a recv() call, but I don't think that's actually correct. I did a packet capture, and I see that libcouchbase hangs right after the noop request, but somehow slips in another request that's not in serial order right before it.

      Some gdb output:
      (gdb) run -h 10.1.5.7:8091 -L 5 -i 1000 -M 8165 -F
      Starting program: /root/debugmemcachetest/bin/memcachetest -h 10.1.5.7:8091 -L 5 -i 1000 -M 8165 -F
      [Thread debugging using libthread_db enabled]
      [New Thread 47907415978192 (LWP 2195)]
      [New Thread 1094388032 (LWP 2198)]
      [Thread 1094388032 (LWP 2198) exited]
      Failed to get server stats
      [New Thread 1094388032 (LWP 2199)]

      Program received signal SIGABRT, Aborted.
      [Switching to Thread 1094388032 (LWP 2199)]
      0x000000322ec30155 in raise () from /lib64/libc.so.6
      (gdb) where
      #0 0x000000322ec30155 in raise () from /lib64/libc.so.6
      #1 0x000000322ec31bf0 in abort () from /lib64/libc.so.6
      #2 0x00002b924ffe98ec in do_read_data (c=0x108b0bf0) at src/event.c:105
      #3 0x00002b924ffe9aa7 in libcouchbase_server_event_handler (sock=10, which=2,
      arg=0x108b0bf0) at src/event.c:152
      #4 0x00002b925020b59a in event_base_loop () from /usr/lib64/libevent-1.4.so.2
      #5 0x00002b924ffe9d4d in libcouchbase_execute (instance=0x108608c0)
      at src/execute.c:39
      #6 0x00000000004055a8 in memcached_get_wrapper (connection=0x1085bf90,
      key=0x413affb0 "335", nkey=3, size=0x413affa8, data=0x413affa0)
      at main.c:448
      #7 0x00000000004052ec in test (ctx=0x1085df70) at main.c:727
      #8 0x000000000040567e in test_thread_main (arg=0x1085df70) at main.c:759
      #9 0x000000322f806307 in start_thread () from /lib64/libpthread.so.0
      #10 0x000000322ecd1ded in clone () from /lib64/libc.so.6
      (gdb) down
      Bottom (innermost) frame selected; you cannot go down.
      (gdb) up
      #1 0x000000322ec31bf0 in abort () from /lib64/libc.so.6
      (gdb) up
      #2 0x00002b924ffe98ec in do_read_data (c=0x108b0bf0) at src/event.c:105
      105 src/event.c: No such file or directory.
      in src/event.c
      (gdb) p/x req->request.magic
      $1 = 0x81
      (gdb) p req
      $2 = (protocol_binary_request_header *) 0x1087e1f0
      (gdb) p req[0]
      $3 = {request =

      {magic = 129 '\201', opcode = 9 '\t', keylen = 0, extlen = 4 '\004', datatype = 0 '\0', vbucket = 0, bodylen = 3911122944, opaque = 1002, cas = 14723677855292588032}

      ,
      bytes = "\201\t\000\000\004\000\000\000\000\000\037??\003\000\000\000\000\000\000\000\003U?"}
      (gdb) p operations
      $4 = 2
      (gdb) p nr
      $5 = 0
      (gdb) p c->cmd_log.avail
      $6 = 51
      (gdb) p c[0]
      $7 = {hostname = 0x1085de90 "10.1.5.7", port = 0x1085de99 "11210", sock = 10,
      root_ai = 0x1085ea00, curr_ai = 0x1085ea00, output =

      { data = 0x108759c0 "\200\t", size = 16384, avail = 0}

      , output_cookies =

      { data = 0x108799d0 "\020?:A", size = 2048, avail = 16}

      , cmd_log =

      { data = 0x1087a1e0 "\200\t", size = 16384, avail = 51}

      , pending =

      { data = 0x108707d0 "\200\001", size = 16384, avail = 0}

      , pending_cookies =

      { data = 0x1086ffc0 "\220?:A", size = 2048, avail = 0}

      ,
      current_packet = 18446744073709551615, input =

      {data = 0x1087e1f0 "\201\t", size = 8192, avail = 8192}

      , sasl_conn = 0x108747e0, ev_event = {ev_next =

      { tqe_next = 0x0, tqe_prev = 0x108608f0}

      , ev_active_next =

      { tqe_next = 0x0, tqe_prev = 0x1085ca10}

      , ev_signal_next =

      { tqe_next = 0x0, tqe_prev = 0x0}

      , min_heap_idx = 4294967295,
      ev_base = 0x1085bff0, ev_fd = 10, ev_events = 18, ev_ncalls = 0,
      ev_pncalls = 0x413afe2e, ev_timeout =

      {tv_sec = 0, tv_usec = 0},
      ev_pri = 0,
      ev_callback = 0x2b924ffe9a78 <libcouchbase_server_event_handler>,
      ev_arg = 0x108b0bf0, ev_res = 2, ev_flags = 130}, ev_flags = 2,
      connected = true,
      ev_handler = 0x2b924ffe9a78 <libcouchbase_server_event_handler>,
      instance = 0x108608c0}


      Though, interestingly, if I bump up the -i, it gets to 8335 operations before hitting an abort:
      (gdb) run -h 10.1.5.7:8091 -L 5 -i 10000 -M 8165 -F
      Starting program: /root/debugmemcachetest/bin/memcachetest -h 10.1.5.7:8091 -L 5 -i 10000 -M 8165 -F
      [Thread debugging using libthread_db enabled]
      [New Thread 47268074524880 (LWP 2825)]
      [New Thread 1107622208 (LWP 2828)]
      [Thread 1107622208 (LWP 2828) exited]
      Failed to get server stats
      [New Thread 1107622208 (LWP 2829)]

      Program received signal SIGABRT, Aborted.
      [Switching to Thread 1107622208 (LWP 2829)]
      0x000000322ec30155 in raise () from /lib64/libc.so.6
      (gdb) where
      #0 0x000000322ec30155 in raise () from /lib64/libc.so.6
      #1 0x000000322ec31bf0 in abort () from /lib64/libc.so.6
      #2 0x00002afd744638ec in do_read_data (c=0x8163cb0) at src/event.c:105
      #3 0x00002afd74463aa7 in libcouchbase_server_event_handler (sock=10, which=2,
      arg=0x8163cb0) at src/event.c:152
      #4 0x00002afd7468559a in event_base_loop () from /usr/lib64/libevent-1.4.so.2
      #5 0x00002afd74463d4d in libcouchbase_execute (instance=0x80aa200)
      at src/execute.c:39
      #6 0x00000000004055a8 in memcached_get_wrapper (connection=0x80a58d0,
      key=0x4204efb0 "8335", nkey=4, size=0x4204efa8, data=0x4204efa0)
      at main.c:448
      #7 0x00000000004052ec in test (ctx=0x80a78b0) at main.c:727
      #8 0x000000000040567e in test_thread_main (arg=0x80a78b0) at main.c:759
      #9 0x000000322f806307 in start_thread () from /lib64/libpthread.so.0
      #10 0x000000322ecd1ded in clone () from /lib64/libc.so.6
      (gdb) p c[0]
      cannot subscript something of type `<data variable, no debug info>'
      (gdb) up
      #1 0x000000322ec31bf0 in abort () from /lib64/libc.so.6
      (gdb) up
      #2 0x00002afd744638ec in do_read_data (c=0x8163cb0) at src/event.c:105
      105 src/event.c: No such file or directory.
      in src/event.c
      (gdb) p c[0]
      $1 = {hostname = 0x80a77d0 "10.1.5.7", port = 0x80a77d9 "11210", sock = 10,
      root_ai = 0x80a8340, curr_ai = 0x80a8340, output = { data = 0x8128a80 "\200\t", size = 16384, avail = 0}, output_cookies = { data = 0x812ca90 "\020?\004B", size = 2048, avail = 16}, cmd_log = { data = 0x812d2a0 "\200\t", size = 16384, avail = 52}, pending = { data = 0x8123890 "\200\001", size = 16384, avail = 0}, pending_cookies = { data = 0x8123080 "\220?\004B", size = 2048, avail = 0},
      current_packet = 18446744073709551615, input = {data = 0x81312b0 "\201\t", size = 8192, avail = 8192}, sasl_conn = 0x81278a0, ev_event = {ev_next = { tqe_next = 0x0, tqe_prev = 0x80aa230}, ev_active_next = {tqe_next = 0x0, tqe_prev = 0x80a6350}, ev_signal_next = {tqe_next = 0x0, tqe_prev = 0x0}, min_heap_idx = 4294967295, ev_base = 0x80a5930,
      ev_fd = 10, ev_events = 18, ev_ncalls = 0, ev_pncalls = 0x4204ee2e,
      ev_timeout = {tv_sec = 0, tv_usec = 0}

      , ev_pri = 0,
      ev_callback = 0x2afd74463a78 <libcouchbase_server_event_handler>,
      ev_arg = 0x8163cb0, ev_res = 2, ev_flags = 130}, ev_flags = 2,
      connected = true,
      ev_handler = 0x2afd74463a78 <libcouchbase_server_event_handler>,
      instance = 0x80aa200}
      (gdb) p req[0]
      $2 = {request =

      {magic = 129 '\201', opcode = 9 '\t', keylen = 0, extlen = 4 '\004', datatype = 0 '\0', vbucket = 0, bodylen = 3911122944, opaque = 10002, cas = 477384859036155904}

      ,
      bytes = "\201\t\000\000\004\000\000\000\000\000\037?\022'\000\000\000\000\000\000\000\003?\006"}

      Context from my libcouchbase:

      58 case PROTOCOL_BINARY_RES:
      59 libcouchbase_server_purge_implicit_responses(c, res->response.opaque);
      60 c->instance->response_handler[res->response.opcode](c,
      61 command_cookie,
      62 res);
      63 req = (protocol_binary_request_header*)c->cmd_log.data;
      64 processed = ntohl(req->request.bodylen) + sizeof(*req);
      65 assert(c->cmd_log.avail >= processed);
      66 memmove(c->cmd_log.data, c->cmd_log.data + processed,
      67 c->cmd_log.avail - processed);
      68 c->cmd_log.avail -= processed;
      69 req = (protocol_binary_request_header*)c->input.data;
      70
      71 memmove(c->output_cookies.data, cptr, (size_t)(end - cptr));
      72 c->output_cookies.avail -= sizeof(command_cookie);
      73 break;
      74 default:
      75 abort();
      76 }
      77 }
      78
      79 processed = ntohl(req->request.bodylen) + sizeof(*req);
      80 memmove(c->input.data, c->input.data + processed,
      81 c->input.avail - processed);
      82 c->input.avail -= processed;
      83 }
      84
      85 if (operations == operations_per_call)

      { 86 // allow some other connections to process some data as well 87 return ; 88 }

      89
      90 nr = recv(c->sock,
      91 c->input.data + c->input.avail,
      92 c->input.size - c->input.avail,
      93 0);
      94
      95 if (nr == -1) {
      96 switch (errno)

      { 97 case EINTR: 98 break; 99 case EWOULDBLOCK: 100 return; 101 default: 102 abort(); 103 }

      104 } else if (nr == 0)

      { 105 abort(); 106 }

      else

      { 107 c->input.avail += (size_t)nr; 108 }

      109 } while (true);

      No reviews matched the request. Check your Options in the drop-down menu of this sections header.

        Activity

        Hide
        avsej Sergey Avseyev added a comment -

        Added SHA1 of the fix fb42840

        Show
        avsej Sergey Avseyev added a comment - Added SHA1 of the fix fb42840

          People

          • Assignee:
            trond Trond Norbye
            Reporter:
            ingenthr Matt Ingenthron
          • Votes:
            0 Vote for this issue
            Watchers:
            0 Start watching this issue

            Dates

            • Created:
              Updated:
              Resolved:

              Gerrit Reviews

              There are no open Gerrit changes