Details
-
Bug
-
Resolution: Fixed
-
Blocker
-
None
-
None
-
None
-
Security Level: Public
-
None
-
built with debug on
libcouchbase: b9796af Fixed compilation of the packet_debug extension
memcachetest: 0.8.3-1-geaf0f9f (my master branch)
Description
GDB seems to think libcouchbase is in the middle of a recv() call, but I don't think that's actually correct. I did a packet capture, and I see that libcouchbase hangs right after the noop request, but somehow slips in another request that's not in serial order right before it.
Some gdb output:
(gdb) run -h 10.1.5.7:8091 -L 5 -i 1000 -M 8165 -F
Starting program: /root/debugmemcachetest/bin/memcachetest -h 10.1.5.7:8091 -L 5 -i 1000 -M 8165 -F
[Thread debugging using libthread_db enabled]
[New Thread 47907415978192 (LWP 2195)]
[New Thread 1094388032 (LWP 2198)]
[Thread 1094388032 (LWP 2198) exited]
Failed to get server stats
[New Thread 1094388032 (LWP 2199)]
Program received signal SIGABRT, Aborted.
[Switching to Thread 1094388032 (LWP 2199)]
0x000000322ec30155 in raise () from /lib64/libc.so.6
(gdb) where
#0 0x000000322ec30155 in raise () from /lib64/libc.so.6
#1 0x000000322ec31bf0 in abort () from /lib64/libc.so.6
#2 0x00002b924ffe98ec in do_read_data (c=0x108b0bf0) at src/event.c:105
#3 0x00002b924ffe9aa7 in libcouchbase_server_event_handler (sock=10, which=2,
arg=0x108b0bf0) at src/event.c:152
#4 0x00002b925020b59a in event_base_loop () from /usr/lib64/libevent-1.4.so.2
#5 0x00002b924ffe9d4d in libcouchbase_execute (instance=0x108608c0)
at src/execute.c:39
#6 0x00000000004055a8 in memcached_get_wrapper (connection=0x1085bf90,
key=0x413affb0 "335", nkey=3, size=0x413affa8, data=0x413affa0)
at main.c:448
#7 0x00000000004052ec in test (ctx=0x1085df70) at main.c:727
#8 0x000000000040567e in test_thread_main (arg=0x1085df70) at main.c:759
#9 0x000000322f806307 in start_thread () from /lib64/libpthread.so.0
#10 0x000000322ecd1ded in clone () from /lib64/libc.so.6
(gdb) down
Bottom (innermost) frame selected; you cannot go down.
(gdb) up
#1 0x000000322ec31bf0 in abort () from /lib64/libc.so.6
(gdb) up
#2 0x00002b924ffe98ec in do_read_data (c=0x108b0bf0) at src/event.c:105
105 src/event.c: No such file or directory.
in src/event.c
(gdb) p/x req->request.magic
$1 = 0x81
(gdb) p req
$2 = (protocol_binary_request_header *) 0x1087e1f0
(gdb) p req[0]
$3 = {request =
,
bytes = "\201\t\000\000\004\000\000\000\000\000\037??\003\000\000\000\000\000\000\000\003U?"}
(gdb) p operations
$4 = 2
(gdb) p nr
$5 = 0
(gdb) p c->cmd_log.avail
$6 = 51
(gdb) p c[0]
$7 = {hostname = 0x1085de90 "10.1.5.7", port = 0x1085de99 "11210", sock = 10,
root_ai = 0x1085ea00, curr_ai = 0x1085ea00, output =
, output_cookies =
{ data = 0x108799d0 "\020?:A", size = 2048, avail = 16}, cmd_log =
{ data = 0x1087a1e0 "\200\t", size = 16384, avail = 51}, pending =
{ data = 0x108707d0 "\200\001", size = 16384, avail = 0}, pending_cookies =
{ data = 0x1086ffc0 "\220?:A", size = 2048, avail = 0},
current_packet = 18446744073709551615, input =
, sasl_conn = 0x108747e0, ev_event = {ev_next =
{ tqe_next = 0x0, tqe_prev = 0x108608f0}, ev_active_next =
{ tqe_next = 0x0, tqe_prev = 0x1085ca10}, ev_signal_next =
{ tqe_next = 0x0, tqe_prev = 0x0}, min_heap_idx = 4294967295,
ev_base = 0x1085bff0, ev_fd = 10, ev_events = 18, ev_ncalls = 0,
ev_pncalls = 0x413afe2e, ev_timeout =
ev_pri = 0,
ev_callback = 0x2b924ffe9a78 <libcouchbase_server_event_handler>,
ev_arg = 0x108b0bf0, ev_res = 2, ev_flags = 130}, ev_flags = 2,
connected = true,
ev_handler = 0x2b924ffe9a78 <libcouchbase_server_event_handler>,
instance = 0x108608c0}
Though, interestingly, if I bump up the -i, it gets to 8335 operations before hitting an abort:
(gdb) run -h 10.1.5.7:8091 -L 5 -i 10000 -M 8165 -F
Starting program: /root/debugmemcachetest/bin/memcachetest -h 10.1.5.7:8091 -L 5 -i 10000 -M 8165 -F
[Thread debugging using libthread_db enabled]
[New Thread 47268074524880 (LWP 2825)]
[New Thread 1107622208 (LWP 2828)]
[Thread 1107622208 (LWP 2828) exited]
Failed to get server stats
[New Thread 1107622208 (LWP 2829)]
Program received signal SIGABRT, Aborted.
[Switching to Thread 1107622208 (LWP 2829)]
0x000000322ec30155 in raise () from /lib64/libc.so.6
(gdb) where
#0 0x000000322ec30155 in raise () from /lib64/libc.so.6
#1 0x000000322ec31bf0 in abort () from /lib64/libc.so.6
#2 0x00002afd744638ec in do_read_data (c=0x8163cb0) at src/event.c:105
#3 0x00002afd74463aa7 in libcouchbase_server_event_handler (sock=10, which=2,
arg=0x8163cb0) at src/event.c:152
#4 0x00002afd7468559a in event_base_loop () from /usr/lib64/libevent-1.4.so.2
#5 0x00002afd74463d4d in libcouchbase_execute (instance=0x80aa200)
at src/execute.c:39
#6 0x00000000004055a8 in memcached_get_wrapper (connection=0x80a58d0,
key=0x4204efb0 "8335", nkey=4, size=0x4204efa8, data=0x4204efa0)
at main.c:448
#7 0x00000000004052ec in test (ctx=0x80a78b0) at main.c:727
#8 0x000000000040567e in test_thread_main (arg=0x80a78b0) at main.c:759
#9 0x000000322f806307 in start_thread () from /lib64/libpthread.so.0
#10 0x000000322ecd1ded in clone () from /lib64/libc.so.6
(gdb) p c[0]
cannot subscript something of type `<data variable, no debug info>'
(gdb) up
#1 0x000000322ec31bf0 in abort () from /lib64/libc.so.6
(gdb) up
#2 0x00002afd744638ec in do_read_data (c=0x8163cb0) at src/event.c:105
105 src/event.c: No such file or directory.
in src/event.c
(gdb) p c[0]
$1 = {hostname = 0x80a77d0 "10.1.5.7", port = 0x80a77d9 "11210", sock = 10,
root_ai = 0x80a8340, curr_ai = 0x80a8340, output = { data = 0x8128a80 "\200\t", size = 16384, avail = 0}, output_cookies = { data = 0x812ca90 "\020?\004B", size = 2048, avail = 16}, cmd_log = { data = 0x812d2a0 "\200\t", size = 16384, avail = 52}, pending = { data = 0x8123890 "\200\001", size = 16384, avail = 0}, pending_cookies = { data = 0x8123080 "\220?\004B", size = 2048, avail = 0},
current_packet = 18446744073709551615, input = {data = 0x81312b0 "\201\t", size = 8192, avail = 8192}, sasl_conn = 0x81278a0, ev_event = {ev_next = { tqe_next = 0x0, tqe_prev = 0x80aa230}, ev_active_next = {tqe_next = 0x0, tqe_prev = 0x80a6350}, ev_signal_next = {tqe_next = 0x0, tqe_prev = 0x0}, min_heap_idx = 4294967295, ev_base = 0x80a5930,
ev_fd = 10, ev_events = 18, ev_ncalls = 0, ev_pncalls = 0x4204ee2e,
ev_timeout = {tv_sec = 0, tv_usec = 0}
, ev_pri = 0,
ev_callback = 0x2afd74463a78 <libcouchbase_server_event_handler>,
ev_arg = 0x8163cb0, ev_res = 2, ev_flags = 130}, ev_flags = 2,
connected = true,
ev_handler = 0x2afd74463a78 <libcouchbase_server_event_handler>,
instance = 0x80aa200}
(gdb) p req[0]
$2 = {request =
,
bytes = "\201\t\000\000\004\000\000\000\000\000\037?\022'\000\000\000\000\000\000\000\003?\006"}
Context from my libcouchbase:
58 case PROTOCOL_BINARY_RES:
59 libcouchbase_server_purge_implicit_responses(c, res->response.opaque);
60 c->instance->response_handler[res->response.opcode](c,
61 command_cookie,
62 res);
63 req = (protocol_binary_request_header*)c->cmd_log.data;
64 processed = ntohl(req->request.bodylen) + sizeof(*req);
65 assert(c->cmd_log.avail >= processed);
66 memmove(c->cmd_log.data, c->cmd_log.data + processed,
67 c->cmd_log.avail - processed);
68 c->cmd_log.avail -= processed;
69 req = (protocol_binary_request_header*)c->input.data;
70
71 memmove(c->output_cookies.data, cptr, (size_t)(end - cptr));
72 c->output_cookies.avail -= sizeof(command_cookie);
73 break;
74 default:
75 abort();
76 }
77 }
78
79 processed = ntohl(req->request.bodylen) + sizeof(*req);
80 memmove(c->input.data, c->input.data + processed,
81 c->input.avail - processed);
82 c->input.avail -= processed;
83 }
84
85 if (operations == operations_per_call)
89
90 nr = recv(c->sock,
91 c->input.data + c->input.avail,
92 c->input.size - c->input.avail,
93 0);
94
95 if (nr == -1) {
96 switch (errno)
104 } else if (nr == 0)
{ 105 abort(); 106 }else
{ 107 c->input.avail += (size_t)nr; 108 }109 } while (true);