Details
-
Bug
-
Resolution: Fixed
-
Critical
-
7.1.0
-
Untriaged
-
-
1
-
Yes
Description
Build : 7.1.0-2534
Test : Couchstore longevity
Scale : 3
Iteration : 2nd
Rebalance to add a data node and hard failover & remove another data node is stuck at the indexer phase for 20+ hrs as 2 indexes are stuck in Moving state.
Following is the rebalance that got stuck
[2022-03-29T18:28:58-07:00, sequoiatools/couchbase-cli:7.1:8df955] server-add -c 172.23.108.103:8091 --server-add https://172.23.121.117 -u Administrator -p password --server-add-username Administrator --server-add-password password --services data
|
[2022-03-29T18:29:24-07:00, sequoiatools/couchbase-cli:7.1:50299f] failover -c 172.23.108.103:8091 --server-failover 172.23.106.100:8091 -u Administrator -p password --hard
|
[2022-03-29T18:29:36-07:00, sequoiatools/couchbase-cli:7.1:e14a29] rebalance -c 172.23.108.103:8091 -u Administrator -p password
|
The indexes that are in moving state are :
{
|
"bucket" : "bucket4",
|
"collection" : "coll_3",
|
"completion" : 50,
|
"definition" : "CREATE INDEX `idx4_276TmUrb` ON `bucket4`.`scope_4`.`coll_3`(`price`,`city`,`name`) WITH { \"defer_build\":true, \"nodes\":[ \"172.23.104.155:8091\",\"172.23.104.70:8091\",\"172.23.120.245:8091\",\"172.23.96.251:8091\",\"172.23.96.252:8091\" ], \"num_replica\":3 }",
|
"defnId" : 13575203599725531269,
|
"hosts" : [
|
"172.23.104.155:8091",
|
"172.23.120.245:8091"
|
],
|
"indexName" : "idx4_276TmUrb",
|
"indexType" : "plasma",
|
"instId" : 1310356575091214700,
|
"lastScanTime" : "Wed Mar 30 14:51:25 PDT 2022",
|
"name" : "idx4_276TmUrb",
|
"numPartition" : 2,
|
"numReplica" : 3,
|
"partitionMap" : {
|
"172.23.104.155:8091" : [
|
0
|
],
|
"172.23.120.245:8091" : [
|
0
|
]
|
},
|
"partitioned" : false,
|
"progress" : 50.132661182764,
|
"replicaId" : 0,
|
"scheduled" : false,
|
"scope" : "scope_4",
|
"secExprs" : [
|
"`price`",
|
"`city`",
|
"`name`"
|
],
|
"stale" : false,
|
"status" : "Moving"
|
}
|
{
|
"bucket" : "bucket4",
|
"collection" : "coll_4",
|
"completion" : 93,
|
"definition" : "CREATE INDEX `idx9_jUJd3mr` ON `bucket4`.`scope_4`.`coll_4`((all (array flatten_keys((`r`.`author`), ((`r`.`ratings`).`Rooms`)) for `r` in `reviews` end)),`free_parking`) PARTITION BY hash((meta().`id`)) WITH { \"defer_build\":true, \"nodes\":[ \"172.23.104.70:8091\",\"172.23.120.245:8091\",\"172.23.123.28:8091\",\"172.23.96.251:8091\",\"172.23.96.252:8091\" ], \"num_replica\":1, \"num_partition\":5 }",
|
"defnId" : 7568660132294045479,
|
"hosts" : [
|
"172.23.104.70:8091",
|
"172.23.120.245:8091",
|
"172.23.123.28:8091",
|
"172.23.96.251:8091",
|
"172.23.96.252:8091"
|
],
|
"indexName" : "idx9_jUJd3mr",
|
"indexType" : "plasma",
|
"instId" : 15176737760513757860,
|
"lastScanTime" : "Wed Mar 30 14:51:15 PDT 2022",
|
"name" : "idx9_jUJd3mr",
|
"numPartition" : 6,
|
"numReplica" : 1,
|
"partitionMap" : {
|
"172.23.104.70:8091" : [
|
1
|
],
|
"172.23.120.245:8091" : [
|
4
|
],
|
"172.23.123.28:8091" : [
|
3,
|
2
|
],
|
"172.23.96.251:8091" : [
|
5
|
],
|
"172.23.96.252:8091" : [
|
4
|
]
|
},
|
"partitioned" : true,
|
"progress" : 93.7666482291136,
|
"replicaId" : 0,
|
"scheduled" : false,
|
"scope" : "scope_4",
|
"secExprs" : [
|
"(all (array flatten_keys((`r`.`author`), ((`r`.`ratings`).`Rooms`)) for `r` in `reviews` end))",
|
"`free_parking`"
|
],
|
"stale" : false,
|
"status" : "Moving"
|
}
|
The attached set of logs were collected a couple of hours after the rebalance started. If you need logs from a later time, please let me know.
Marking this as a regression since we haven't seen this kind of issue in a few weeks now.
Attachments
Issue Links
- relates to
-
MB-52396 Add cinfo lite in KVSender and Projector
- Open