Details
Description
The cluster can be accessed at http://10.3.121.206:8091/index.html#sec=monitor_servers
Setup:
1.14 node cluster [previously rebalance had failed with a time-out error - MB-5360]
2. Add new nodes [214] to the cluster
3. Restart servers [226, 232]
4. After the servers are restarted, Start Rebalance
5. Within a minute, stop rebalance
6. Issue a rebalance on the cluster
Output
Rebalance exited with reason
Successive attempts to rebalance also fail
Attaching the logs from all the servers at -https://s3.amazonaws.com/bugdb/jira/MB-5364/logs3.tar
Sample output from 232
[root@rvm-0115 ~]# ps -e -o f,s,pid,uid,ppid,pgid,sid,size,stackp,sz,vsz,rss,maj_flt,psr,time,args --forest
F S PID UID PPID PGID SID SZ STACKP SZ VSZ RSS MAJFL PSR TIME COMMAND
1 S 2 0 0 0 0 0 00000000 0 0 0 0 3 00:00:00 [kthreadd]
1 S 3 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [migration/0]
1 S 4 0 2 0 0 0 00000000 0 0 0 0 0 00:00:03 _ [ksoftirqd/0]
1 S 5 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [migration/0]
5 S 6 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [watchdog/0]
1 S 7 0 2 0 0 0 00000000 0 0 0 0 1 00:00:00 _ [migration/1]
1 S 8 0 2 0 0 0 00000000 0 0 0 0 1 00:00:00 _ [migration/1]
1 S 9 0 2 0 0 0 00000000 0 0 0 0 1 00:00:04 _ [ksoftirqd/1]
5 S 10 0 2 0 0 0 00000000 0 0 0 0 1 00:00:01 _ [watchdog/1]
1 S 11 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [migration/2]
1 S 12 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [migration/2]
1 S 13 0 2 0 0 0 00000000 0 0 0 0 2 00:00:03 _ [ksoftirqd/2]
5 S 14 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [watchdog/2]
1 S 15 0 2 0 0 0 00000000 0 0 0 0 3 00:00:00 _ [migration/3]
1 S 16 0 2 0 0 0 00000000 0 0 0 0 3 00:00:00 _ [migration/3]
1 S 17 0 2 0 0 0 00000000 0 0 0 0 3 00:00:02 _ [ksoftirqd/3]
5 S 18 0 2 0 0 0 00000000 0 0 0 0 3 00:00:00 _ [watchdog/3]
5 S 19 0 2 0 0 0 00000000 0 0 0 0 0 00:00:02 _ [events/0]
1 S 20 0 2 0 0 0 00000000 0 0 0 0 1 00:06:05 _ [events/1]
1 S 21 0 2 0 0 0 00000000 0 0 0 0 2 00:00:22 _ [events/2]
1 S 22 0 2 0 0 0 00000000 0 0 0 0 3 00:00:03 _ [events/3]
1 S 23 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [cpuset]
1 S 24 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [khelper]
1 S 25 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [netns]
1 S 26 0 2 0 0 0 00000000 0 0 0 0 3 00:00:00 _ [async/mgr]
1 S 27 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [pm]
1 S 28 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [sync_supers]
1 S 29 0 2 0 0 0 00000000 0 0 0 0 2 00:00:06 _ [bdi-default]
1 S 30 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [kintegrityd/0]
1 S 31 0 2 0 0 0 00000000 0 0 0 0 1 00:00:00 _ [kintegrityd/1]
1 S 32 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [kintegrityd/2]
1 S 33 0 2 0 0 0 00000000 0 0 0 0 3 00:00:00 _ [kintegrityd/3]
1 S 34 0 2 0 0 0 00000000 0 0 0 0 0 00:00:14 _ [kblockd/0]
1 S 35 0 2 0 0 0 00000000 0 0 0 0 1 00:00:02 _ [kblockd/1]
1 S 36 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [kblockd/2]
1 S 37 0 2 0 0 0 00000000 0 0 0 0 3 00:00:00 _ [kblockd/3]
1 S 38 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [kacpid]
1 S 39 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [kacpi_notify]
1 S 40 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [kacpi_hotplug]
1 S 41 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [ata/0]
1 S 42 0 2 0 0 0 00000000 0 0 0 0 1 00:00:00 _ [ata/1]
1 S 43 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [ata/2]
1 S 44 0 2 0 0 0 00000000 0 0 0 0 3 00:00:00 _ [ata/3]
1 S 45 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [ata_aux]
1 S 46 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [ksuspend_usbd]
1 S 47 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [khubd]
5 S 48 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [kseriod]
1 S 49 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [md/0]
1 S 50 0 2 0 0 0 00000000 0 0 0 0 1 00:00:00 _ [md/1]
1 S 51 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [md/2]
1 S 52 0 2 0 0 0 00000000 0 0 0 0 3 00:00:00 _ [md/3]
1 S 53 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [md_misc/0]
1 S 54 0 2 0 0 0 00000000 0 0 0 0 1 00:00:00 _ [md_misc/1]
1 S 55 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [md_misc/2]
1 S 56 0 2 0 0 0 00000000 0 0 0 0 3 00:00:00 _ [md_misc/3]
1 S 57 0 2 0 0 0 00000000 0 0 0 0 0 00:00:01 _ [khungtaskd]
1 S 58 0 2 0 0 0 00000000 0 0 0 0 2 00:00:45 _ [kswapd0]
1 S 59 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [ksmd]
1 S 60 0 2 0 0 0 00000000 0 0 0 0 3 00:00:48 _ [khugepaged]
1 S 61 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [aio/0]
1 S 62 0 2 0 0 0 00000000 0 0 0 0 1 00:00:00 _ [aio/1]
1 S 63 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [aio/2]
1 S 64 0 2 0 0 0 00000000 0 0 0 0 3 00:00:00 _ [aio/3]
1 S 65 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [crypto/0]
1 S 66 0 2 0 0 0 00000000 0 0 0 0 1 00:00:00 _ [crypto/1]
1 S 67 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [crypto/2]
1 S 68 0 2 0 0 0 00000000 0 0 0 0 3 00:00:00 _ [crypto/3]
1 S 73 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [kthrotld/0]
1 S 74 0 2 0 0 0 00000000 0 0 0 0 1 00:00:00 _ [kthrotld/1]
1 S 75 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [kthrotld/2]
1 S 76 0 2 0 0 0 00000000 0 0 0 0 3 00:00:00 _ [kthrotld/3]
1 S 78 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [kpsmoused]
1 S 79 0 2 0 0 0 00000000 0 0 0 0 1 00:00:00 _ [usbhid_resumer]
1 S 110 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [kstriped]
1 S 282 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [scsi_eh_0]
1 S 283 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [scsi_eh_1]
1 S 353 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [virtio-blk]
1 S 411 0 2 0 0 0 00000000 0 0 0 0 0 00:00:09 _ [kdmflush]
1 S 413 0 2 0 0 0 00000000 0 0 0 0 1 00:00:00 _ [kdmflush]
1 S 432 0 2 0 0 0 00000000 0 0 0 0 0 00:02:24 _ [kjournald]
1 S 903 0 2 0 0 0 00000000 0 0 0 0 0 00:00:10 _ [kdmflush]
1 S 938 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [kjournald]
1 S 939 0 2 0 0 0 00000000 0 0 0 0 0 00:02:53 _ [kjournald]
1 S 986 0 2 0 0 0 00000000 0 0 0 0 1 00:00:00 _ [kauditd]
1 S 1286 0 2 0 0 0 00000000 0 0 0 0 2 00:00:42 _ [flush-253:0]
1 S 1360 0 2 0 0 0 00000000 0 0 0 0 0 00:00:00 _ [rpciod/0]
1 S 1361 0 2 0 0 0 00000000 0 0 0 0 1 00:00:00 _ [rpciod/1]
1 S 1362 0 2 0 0 0 00000000 0 0 0 0 2 00:00:00 _ [rpciod/2]
1 S 1363 0 2 0 0 0 00000000 0 0 0 0 3 00:00:00 _ [rpciod/3]
1 S 18403 0 2 0 0 0 00000000 0 0 0 0 3 00:00:00 _ [flush-253:2]
4 S 1 0 0 1 1 416 f338ae00 4850 19400 1020 183 1 00:00:01 /sbin/init
5 S 520 0 1 520 520 804 b6e05ca0 2780 11120 252 1 2 00:00:00 /sbin/udevd -d
5 S 1807 0 520 520 520 2120 b6e05ca0 3109 12436 320 0 0 00:00:00 _ /sbin/udevd -d
5 S 1808 0 520 520 520 2120 b6e05ca0 3109 12436 320 0 2 00:00:00 _ /sbin/udevd -d
1 S 1200 0 1 1200 1200 604 6a089370 2293 9172 464 149 1 00:00:00 /sbin/dhclient -1 -q -lf /var/lib/dhclient/dhclient
5 S 1244 0 1 1244 1244 76088 dec8a5a0 23306 93224 604 37 0 00:00:01 auditd
5 S 1269 0 1 1266 987 227824 3f0adaa0 63771 255084 1020 111 0 00:00:00 /sbin/rsyslogd -i /var/run/syslogd.pid -c 4
5 S 1300 0 1 1300 1300 304 beb4dda0 2301 9204 484 5 1 00:01:23 irqbalance
5 S 1314 32 1 1314 1314 304 8097de60 4756 19024 532 5 2 00:00:01 rpcbind
5 S 1332 29 1 1332 1332 328 5fe781a0 5800 23200 668 1 0 00:00:00 rpc.statd
1 S 1367 0 1 1367 1367 316 3a82fcc0 7377 29508 236 1 1 00:00:00 rpc.idmapd
5 S 1458 81 1 1458 1458 76204 51ad0bd0 24337 97348 588 136 3 00:00:00 dbus-daemon --system
4 S 1470 0 1 1470 1470 712 8f4006f0 47286 189144 860 1 0 00:00:00 cupsd -C /etc/cups/cupsd.conf
1 S 1495 0 1 1495 1495 268 51a06ed0 1033 4132 452 1 1 00:00:00 /usr/sbin/acpid
5 S 1504 68 1 1504 1504 804 9cc43a20 6295 25180 1280 45 1 00:00:05 hald
0 S 1505 0 1504 1504 1504 296 275e7f70 4540 18160 632 1 0 00:00:00 _ hald-runner
0 S 1533 0 1505 1504 1504 292 47a75700 5069 20276 592 2 3 00:00:00 _ hald-addon-input: Listening on /dev/input/e
4 S 1546 68 1505 1504 1504 296 d56313a0 4465 17860 680 2 2 00:00:00 _ hald-addon-acpi: listening on acpid socket
5 S 1565 0 1 1565 1565 350124 c5387eb0 96440 385760 864 1 0 00:00:06 automount --pid-file /var/run/autofs.pid
1 S 1581 0 1 1581 1581 784 0db446b0 1704 6816 268 1 3 00:00:00 /usr/sbin/mcelog --daemon
5 S 1592 0 1 1592 1592 608 92cfede0 16017 64068 500 82 0 00:00:00 /usr/sbin/sshd
4 S 18409 0 1592 18409 18409 792 318cc660 24454 97816 3892 25 0 00:00:00 _ sshd: root@pts/0
4 S 18417 0 18409 18417 18417 424 7fc6ccf0 27098 108392 1784 4 0 00:00:00 _ -bash
4 R 18435 0 18417 18435 18417 1196 8b07a5f0 27074 108296 1028 2 0 00:00:00 _ ps -e -o f,s,pid,uid,ppid,pgid,sid,size
4 S 1668 0 1 1668 1668 596 9dc730c0 19669 78676 1024 5 2 00:00:12 /usr/libexec/postfix/master
4 S 1682 89 1668 1668 1668 704 75d058a0 19732 78928 988 5 1 00:00:01 _ qmgr -l -t fifo -u
4 S 16992 89 1668 1668 1668 600 b6a58e30 19689 78756 2468 36 1 00:00:00 _ pickup -l -t fifo -u
1 S 1692 0 1 1692 1692 292 4d687c70 29710 118840 704 1 0 00:00:00 /usr/sbin/abrtd
0 S 1700 0 1 1700 1700 268 90eac5a0 2304 9216 496 46 1 00:00:00 abrt-dump-oops -d /var/spool/abrt -rwx /var/log/mes
1 S 1711 498 1 1711 1711 379996 6190c740 121031 484124 1728 11 0 00:01:42 /usr/sbin/qpidd --data-dir /var/lib/qpidd --daemon
1 S 1746 0 1 1746 1746 1420 182329f0 29311 117244 784 0 2 00:00:06 crond
5 S 1757 0 1 1757 1757 480 f71c08c0 5373 21492 280 7 2 00:00:00 /usr/sbin/atd
1 S 1773 0 1 1773 1773 268 ebc61ca0 1028 4112 232 14 0 00:00:00 /usr/bin/rhsmcertd 240 1440
1 S 1774 0 1773 1773 1773 268 ebc61ca0 1028 4112 228 16 1 00:00:00 _ /usr/bin/rhsmcertd 240 1440
4 S 1790 0 1 1790 1790 700 593d12d0 19284 77136 1024 7 0 00:00:00 login – root
4 S 1881 0 1790 1881 1881 420 36dc99a0 27097 108388 1060 1 0 00:00:00 _ -bash
4 S 1792 0 1 1792 1792 268 61130be0 1029 4116 448 0 1 00:00:00 /sbin/mingetty /dev/tty2
4 S 1794 0 1 1794 1794 268 c72eb1f0 1029 4116 448 1 2 00:00:00 /sbin/mingetty /dev/tty3
4 S 1796 0 1 1796 1796 268 568ffd90 1029 4116 448 1 1 00:00:00 /sbin/mingetty /dev/tty4
4 S 1798 0 1 1798 1798 268 36f6c4d0 1029 4116 448 1 1 00:00:00 /sbin/mingetty /dev/tty5
4 S 1800 0 1 1800 1800 268 5522fc50 1029 4116 448 1 2 00:00:00 /sbin/mingetty /dev/tty6
4 S 1815 0 1 1458 1458 4078148 bb71c960 1028479 4113916 868 167 1 00:00:00 /usr/sbin/console-kit-daemon --no-daemon
1 S 17492 497 1 17491 17491 300 37f3a580 2720 10880 324 0 1 00:00:00 /opt/couchbase/lib/erlang/erts-5.8.4/bin/epmd -daem
0 S 17507 497 1 17506 17506 1563840 b2caeee0 396309 1585236 135728 4 3 00:04:22 /opt/couchbase/lib/erlang/erts-5.8.4/bin/beam.smp -
0 S 17536 497 17507 17536 17536 292 4f218220 26539 106156 1228 0 0 00:00:00 _ sh -s disksup
0 S 17538 497 17507 17538 17538 264 3eccf990 1027 4108 552 0 3 00:00:00 _ /opt/couchbase/lib/erlang/lib/os_mon-2.2.6/priv
0 S 17541 497 17507 17541 17541 264 a75ba820 1026 4104 408 1 0 00:00:00 _ /opt/couchbase/lib/erlang/lib/os_mon-2.2.6/priv
0 S 17542 497 17507 17542 17542 268 be857a00 2711 10844 536 0 1 00:00:00 _ inet_gethost 4
1 S 17543 497 17542 17542 17542 268 be857a00 2711 10844 432 0 3 00:00:00 | _ inet_gethost 4
1 S 18404 497 17542 17542 17542 268 be857a00 2711 10844 412 0 3 00:00:00 | _ inet_gethost 4
0 S 17546 497 17507 17546 17546 402676 fdb20730 103525 414100 4592 3 1 00:00:01 _ /opt/couchbase/bin/moxi -Z port_listen=11211,de
0 S 17547 497 17507 17547 17547 3719992 42858900 938264 3753056 3600168 380 0 00:03:09 _ /opt/couchbase/bin/memcached -X /opt/couchbase
0 S 17548 497 17507 17548 17548 272 c6abfa10 1046 4184 532 1 3 00:00:00 _ portsigar for ns_1@10.3.121.232