Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-56705

[System Test] Indexing rebalance is stuck at 10% as KV node became unresponsive while rebalance was ongoing

    XMLWordPrintable

Details

    • Untriaged
    • Centos 64-bit
    • 0
    • Unknown

    Description

      Steps

      1. Run 7.2 longevity test for Magma buckets with deduplication turned off for 7 days.

        ./sequoia -client 172.23.104.254:2375 -provider file:centos_third_cluster.yml -test tests/integration/7.2/test_7.2.yml -scope tests/integration/7.2/scope_7.2_magma.yml -scale 3 -repeat 0 -log_level 0 -version 7.2.0-5318 -skip_setup=false -skip_test=false -skip_teardown=true -skip_cleanup=false -continue=false -collect_on_error=false -stop_on_error=false -duration=604800 -show_topology=true
        

      2. Test was stuck at FTS rebalance step due to MB-56272 while rebalancing out failed over KV node.
      3. Stopped and restarted the rebalance operation by applying the workaround mentioned in MB-56272.
      4. GSI rebalance is stuck at 10% for more than 4 hours with 2 of the Indexes stuck in moving state after restarting the rebalance operation.

        curl -s -u Administrator:password http://172.23.108.139:8091/pools/default/rebalanceProgress | jq
        {
          "status": "running",
          "ns_1@172.23.105.210": {
            "progress": 0
          },
          "ns_1@172.23.108.140": {
            "progress": 0
          },
          "ns_1@172.23.108.141": {
            "progress": 1
          },
          "ns_1@172.23.108.61": {
            "progress": 0.1
          },
          "ns_1@172.23.105.91": {
            "progress": 0.1
          },
          "ns_1@172.23.108.132": {
            "progress": 0.1
          },
          "ns_1@172.23.107.142": {
            "progress": 0
          },
          "ns_1@172.23.106.32": {
            "progress": 0
          },
          "ns_1@172.23.108.143": {
            "progress": 1
          },
          "ns_1@172.23.108.144": {
            "progress": 0
          },
          "ns_1@172.23.108.134": {
            "progress": 1
          },
          "ns_1@172.23.108.34": {
            "progress": 0.1
          },
          "ns_1@172.23.105.134": {
            "progress": 0.1
          },
          "ns_1@172.23.108.145": {
            "progress": 1
          },
          "ns_1@172.23.104.216": {
            "progress": 0
          },
          "ns_1@172.23.108.136": {
            "progress": 1
          },
          "ns_1@172.23.107.236": {
            "progress": 1
          },
          "ns_1@172.23.108.146": {
            "progress": 0
          },
          "ns_1@172.23.106.37": {
            "progress": 1
          },
          "ns_1@172.23.105.38": {
            "progress": 0
          },
          "ns_1@172.23.108.148": {
            "progress": 1
          },
          "ns_1@172.23.104.249": {
            "progress": 0
          },
          "ns_1@172.23.108.129": {
            "progress": 0
          },
          "ns_1@172.23.108.139": {
            "progress": 1
          },
          "ns_1@172.23.105.39": {
            "progress": 0
          }
        }
        

        {
              "defnId": 5213508093030662000,
              "instId": 13356109758857568000,
              "name": "idx11_761ZF (replica 1)",
              "bucket": "bucket4",
              "scope": "scope_0",
              "collection": "coll_4",
              "secExprs": [
                "(all (array flatten_keys(((`r`.`ratings`).`Rooms`), ((`r`.`ratings`).`Cleanliness`)) for `r` in `reviews` end))",
                "`email`",
                "`free_parking`"
              ],
              "indexType": "plasma",
              "status": "Moving",
              "definition": "CREATE INDEX `idx11_761ZF` ON `bucket4`.`scope_0`.`coll_4`((all (array flatten_keys(((`r`.`ratings`).`Rooms`), ((`r`.`ratings`).`Cleanliness`)) for `r` in `reviews` end)),`email`,`free_parking`) PARTITION BY hash((meta().`id`)) WITH {  \"defer_build\":true, \"nodes\":[ \"172.23.105.134:8091\",\"172.23.105.91:8091\",\"172.23.108.132:8091\",\"172.23.108.34:8091\",\"172.23.108.61:8091\" ], \"num_replica\":2, \"num_partition\":5 }",
              "hosts": [
                "172.23.105.134:8091"
              ],
              "completion": 0,
              "progress": 0.0033070144392672862,
              "scheduled": false,
              "partitioned": true,
              "numPartition": 1,
              "partitionMap": {
                "172.23.105.134:8091": [
                  4
                ]
              },
              "numReplica": 2,
              "indexName": "idx11_761ZF",
              "replicaId": 1,
              "stale": false,
              "lastScanTime": "NA"
        },
        {
              "defnId": 7459528310031783000,
              "instId": 3274665186802382300,
              "name": "idx4_GkBg1 (replica 3)",
              "bucket": "bucket4",
              "scope": "scope_5",
              "collection": "coll_2",
              "secExprs": [
                "`price`",
                "`city`",
                "`name`"
              ],
              "indexType": "plasma",
              "status": "Moving",
              "definition": "CREATE INDEX `idx4_GkBg1` ON `bucket4`.`scope_5`.`coll_2`(`price`,`city`,`name`) PARTITION BY hash((meta().`id`)) WITH {  \"defer_build\":true, \"nodes\":[ \"172.23.105.134:8091\",\"172.23.105.91:8091\",\"172.23.108.132:8091\",\"172.23.108.34:8091\",\"172.23.108.61:8091\" ], \"num_replica\":3, \"num_partition\":5 }",
              "hosts": [
                "172.23.105.134:8091"
              ],
              "completion": 0,
              "progress": 0.0032544311315507723,
              "scheduled": false,
              "partitioned": true,
              "numPartition": 1,
              "partitionMap": {
                "172.23.105.134:8091": [
                  3
                ]
              },
              "numReplica": 3,
              "indexName": "idx4_GkBg1",
              "replicaId": 3,
              "stale": false,
              "lastScanTime": "NA"
        }
        

      Problem seems to be on Indexing node 172.23.105.134 in both cases.

      Attachments

        No reviews matched the request. Check your Options in the drop-down menu of this sections header.

        Activity

          People

            Balakumaran.Gopal Balakumaran Gopal
            sujay.gad Sujay Gad
            Votes:
            0 Vote for this issue
            Watchers:
            8 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Gerrit Reviews

                There are no open Gerrit changes

                PagerDuty