Details
-
Bug
-
Resolution: Not a Bug
-
Major
-
7.2.0
-
Enterprise Edition 7.2.0 build 5318
-
Untriaged
-
Centos 64-bit
-
0
-
Unknown
Description
Steps
- Run 7.2 longevity test for Magma buckets with deduplication turned off for 7 days.
./sequoia -client 172.23.104.254:2375 -provider file:centos_third_cluster.yml -test tests/integration/7.2/test_7.2.yml -scope tests/integration/7.2/scope_7.2_magma.yml -scale 3 -repeat 0 -log_level 0 -version 7.2.0-5318 -skip_setup=false -skip_test=false -skip_teardown=true -skip_cleanup=false -continue=false -collect_on_error=false -stop_on_error=false -duration=604800 -show_topology=true
- Test was stuck at FTS rebalance step due to MB-56272 while rebalancing out failed over KV node.
- Stopped and restarted the rebalance operation by applying the workaround mentioned in MB-56272.
- GSI rebalance is stuck at 10% for more than 4 hours with 2 of the Indexes stuck in moving state after restarting the rebalance operation.
curl -s -u Administrator:password http://172.23.108.139:8091/pools/default/rebalanceProgress | jq
{
"status": "running",
"ns_1@172.23.105.210": {
"progress": 0
},
"ns_1@172.23.108.140": {
"progress": 0
},
"ns_1@172.23.108.141": {
"progress": 1
},
"ns_1@172.23.108.61": {
"progress": 0.1
},
"ns_1@172.23.105.91": {
"progress": 0.1
},
"ns_1@172.23.108.132": {
"progress": 0.1
},
"ns_1@172.23.107.142": {
"progress": 0
},
"ns_1@172.23.106.32": {
"progress": 0
},
"ns_1@172.23.108.143": {
"progress": 1
},
"ns_1@172.23.108.144": {
"progress": 0
},
"ns_1@172.23.108.134": {
"progress": 1
},
"ns_1@172.23.108.34": {
"progress": 0.1
},
"ns_1@172.23.105.134": {
"progress": 0.1
},
"ns_1@172.23.108.145": {
"progress": 1
},
"ns_1@172.23.104.216": {
"progress": 0
},
"ns_1@172.23.108.136": {
"progress": 1
},
"ns_1@172.23.107.236": {
"progress": 1
},
"ns_1@172.23.108.146": {
"progress": 0
},
"ns_1@172.23.106.37": {
"progress": 1
},
"ns_1@172.23.105.38": {
"progress": 0
},
"ns_1@172.23.108.148": {
"progress": 1
},
"ns_1@172.23.104.249": {
"progress": 0
},
"ns_1@172.23.108.129": {
"progress": 0
},
"ns_1@172.23.108.139": {
"progress": 1
},
"ns_1@172.23.105.39": {
"progress": 0
}
}
{
"defnId": 5213508093030662000,
"instId": 13356109758857568000,
"name": "idx11_761ZF (replica 1)",
"bucket": "bucket4",
"scope": "scope_0",
"collection": "coll_4",
"secExprs": [
"(all (array flatten_keys(((`r`.`ratings`).`Rooms`), ((`r`.`ratings`).`Cleanliness`)) for `r` in `reviews` end))",
"`email`",
"`free_parking`"
],
"indexType": "plasma",
"status": "Moving",
"definition": "CREATE INDEX `idx11_761ZF` ON `bucket4`.`scope_0`.`coll_4`((all (array flatten_keys(((`r`.`ratings`).`Rooms`), ((`r`.`ratings`).`Cleanliness`)) for `r` in `reviews` end)),`email`,`free_parking`) PARTITION BY hash((meta().`id`)) WITH { \"defer_build\":true, \"nodes\":[ \"172.23.105.134:8091\",\"172.23.105.91:8091\",\"172.23.108.132:8091\",\"172.23.108.34:8091\",\"172.23.108.61:8091\" ], \"num_replica\":2, \"num_partition\":5 }",
"hosts": [
"172.23.105.134:8091"
],
"completion": 0,
"progress": 0.0033070144392672862,
"scheduled": false,
"partitioned": true,
"numPartition": 1,
"partitionMap": {
"172.23.105.134:8091": [
4
]
},
"numReplica": 2,
"indexName": "idx11_761ZF",
"replicaId": 1,
"stale": false,
"lastScanTime": "NA"
},
{
"defnId": 7459528310031783000,
"instId": 3274665186802382300,
"name": "idx4_GkBg1 (replica 3)",
"bucket": "bucket4",
"scope": "scope_5",
"collection": "coll_2",
"secExprs": [
"`price`",
"`city`",
"`name`"
],
"indexType": "plasma",
"status": "Moving",
"definition": "CREATE INDEX `idx4_GkBg1` ON `bucket4`.`scope_5`.`coll_2`(`price`,`city`,`name`) PARTITION BY hash((meta().`id`)) WITH { \"defer_build\":true, \"nodes\":[ \"172.23.105.134:8091\",\"172.23.105.91:8091\",\"172.23.108.132:8091\",\"172.23.108.34:8091\",\"172.23.108.61:8091\" ], \"num_replica\":3, \"num_partition\":5 }",
"hosts": [
"172.23.105.134:8091"
],
"completion": 0,
"progress": 0.0032544311315507723,
"scheduled": false,
"partitioned": true,
"numPartition": 1,
"partitionMap": {
"172.23.105.134:8091": [
3
]
},
"numReplica": 3,
"indexName": "idx4_GkBg1",
"replicaId": 3,
"stale": false,
"lastScanTime": "NA"
}
Problem seems to be on Indexing node 172.23.105.134 in both cases.