Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-60118

[Rebalance] : /pools/default/tasks endpoint returns stale status during current rebalance

    XMLWordPrintable

Details

    • Bug
    • Resolution: Duplicate
    • Major
    • None
    • 7.6.0
    • ns_server
    • Couchbase Enterprise Edition build 7.6.0-1907

    Description

      Steps to reproduce

      1. Created a 4 node kv cluster
      2. Created 10 buckets with different configurations
      3. Created 5 scopes per bucket and 20 collections per scope
      4. Loaded data onto each collection (Around 4000 docs onto each collection)
      5. Added in another kv node and started a rebalance

      During rebalance a request was made to /pools/default/tasks

      When there is vbucket movement, the endpoint returns the expected results

      $curl -u Administrator:password http://172.23.96.168:8091/pools/default/tasks | jq    
       
                 
        % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                       Dload  Upload   Total   Spent    Left  Speed
      100  5830  100  5830    0     0   8309      0 --:--:-- --:--:-- --:--:--  8376
      [
        {
          "statusId": "26a88b4996bff08e44df51b8c029e9be",
          "type": "rebalance",
          "subtype": "rebalance",
          "recommendedRefreshPeriod": 0.25,
          "status": "running",
          "progress": 15.56955154907165,
          "perNode": {
            "ns_1@172.23.96.220": {
              "progress": 14.34878587196468
            },
            "ns_1@172.23.96.221": {
              "progress": 17.84269662921348
            },
            "ns_1@172.23.96.196": {
              "progress": 14.44444444444444
            },
            "ns_1@172.23.97.78": {
              "progress": 18.08823529411765
            },
            "ns_1@172.23.96.168": {
              "progress": 13.12359550561798
            }
          },
          "detailedProgress": {
            "bucket": "bucket3",
            "bucketNumber": 2,
            "bucketsCount": 10,
            "perNode": {
              "ns_1@172.23.97.78": {
                "ingoing": {
                  "docsTotal": 0,
                  "docsTransferred": 0,
                  "activeVBucketsLeft": 0,
                  "replicaVBucketsLeft": 0
                },
                "outgoing": {
                  "docsTotal": 58400,
                  "docsTransferred": 42100,
                  "activeVBucketsLeft": 39,
                  "replicaVBucketsLeft": 432
                }
              },
              "ns_1@172.23.96.221": {
                "ingoing": {
                  "docsTotal": 70200,
                  "docsTransferred": 16200,
                  "activeVBucketsLeft": 0,
                  "replicaVBucketsLeft": 142
                },
                "outgoing": {
                  "docsTotal": 57438,
                  "docsTransferred": 37138,
                  "activeVBucketsLeft": 0,
                  "replicaVBucketsLeft": 0
                }
              },
              "ns_1@172.23.96.220": {
                "ingoing": {
                  "docsTotal": 72638,
                  "docsTransferred": 27638,
                  "activeVBucketsLeft": 12,
                  "replicaVBucketsLeft": 115
                },
                "outgoing": {
                  "docsTotal": 53502,
                  "docsTransferred": 8902,
                  "activeVBucketsLeft": 0,
                  "replicaVBucketsLeft": 12
                }
              },
              "ns_1@172.23.96.196": {
                "ingoing": {
                  "docsTotal": 69900,
                  "docsTransferred": 30200,
                  "activeVBucketsLeft": 15,
                  "replicaVBucketsLeft": 104
                },
                "outgoing": {
                  "docsTotal": 53300,
                  "docsTransferred": 9700,
                  "activeVBucketsLeft": 0,
                  "replicaVBucketsLeft": 15
                }
              },
              "ns_1@172.23.96.168": {
                "ingoing": {
                  "docsTotal": 67902,
                  "docsTransferred": 26702,
                  "activeVBucketsLeft": 12,
                  "replicaVBucketsLeft": 110
                },
                "outgoing": {
                  "docsTotal": 58000,
                  "docsTransferred": 2900,
                  "activeVBucketsLeft": 0,
                  "replicaVBucketsLeft": 12
                }
              }
            }
          },
          "stageInfo": {
            "data": {
              "totalProgress": 15.56955154907165,
              "perNodeProgress": {
                "ns_1@172.23.96.220": 0.1434878587196468,
                "ns_1@172.23.96.221": 0.1784269662921348,
                "ns_1@172.23.96.196": 0.1444444444444444,
                "ns_1@172.23.97.78": 0.1808823529411765,
                "ns_1@172.23.96.168": 0.1312359550561798
              },
              "startTime": "2023-12-12T23:58:32.815-08:00",
              "completedTime": false,
              "timeTaken": 197376,
              "details": {
                "bucket7": {
                  "compactionInfo": {
                    "perNode": {
                      "ns_1@172.23.96.168": {
                        "averageTime": 0
                      },
                      "ns_1@172.23.96.220": {
                        "averageTime": 3
                      },
                      "ns_1@172.23.96.196": {
                        "averageTime": 0
                      },
                      "ns_1@172.23.97.78": {
                        "averageTime": 0.25
                      },
                      "ns_1@172.23.96.221": {
                        "averageTime": 0
                      }
                    }
                  },
                  "vbucketLevelInfo": {
                    "move": {
                      "averageTime": 485.487684729064,
                      "totalCount": 812,
                      "remainingCount": 0
                    },
                    "backfill": {
                      "averageTime": 136.2204433497537
                    },
                    "takeover": {
                      "averageTime": 23.51470588235294
                    },
                    "persistence": {
                      "averageTime": 36.0218818380744
                    }
                  },
                  "replicationInfo": {
                    "ns_1@172.23.96.220": {
                      "inDocsTotal": 42800,
                      "inDocsLeft": 0,
                      "outDocsTotal": 30500,
                      "outDocsLeft": 0
                    },
                    "ns_1@172.23.96.221": {
                      "inDocsTotal": 50400,
                      "inDocsLeft": 0,
                      "outDocsTotal": 15700,
                      "outDocsLeft": 0
                    },
                    "ns_1@172.23.96.196": {
                      "inDocsTotal": 40800,
                      "inDocsLeft": 0,
                      "outDocsTotal": 32000,
                      "outDocsLeft": 0
                    },
                    "ns_1@172.23.96.168": {
                      "inDocsTotal": 33300,
                      "inDocsLeft": 0,
                      "outDocsTotal": 39100,
                      "outDocsLeft": 0
                    },
                    "ns_1@172.23.97.78": {
                      "inDocsTotal": 0,
                      "inDocsLeft": 0,
                      "outDocsTotal": 50000,
                      "outDocsLeft": 0
                    }
                  },
                  "startTime": "2023-12-12T23:58:36.343-08:00",
                  "completedTime": "2023-12-12T23:59:45.276-08:00",
                  "timeTaken": 68933
                },
                "bucket3": {
                  "compactionInfo": {
                    "perNode": {
                      "ns_1@172.23.97.78": {
                        "averageTime": 0
                      }
                    }
                  },
                  "vbucketLevelInfo": {
                    "move": {
                      "averageTime": 1218.529636711281,
                      "totalCount": 994,
                      "remainingCount": 471
                    },
                    "backfill": {
                      "averageTime": 863.6615678776291
                    },
                    "takeover": {
                      "averageTime": 47.83636363636364
                    },
                    "persistence": {
                      "averageTime": 66.6804293971924
                    }
                  },
                  "replicationInfo": {
                    "ns_1@172.23.96.220": {
                      "inDocsTotal": 72638,
                      "inDocsLeft": 45000,
                      "outDocsTotal": 53502,
                      "outDocsLeft": 44600
                    },
                    "ns_1@172.23.96.221": {
                      "inDocsTotal": 70200,
                      "inDocsLeft": 54000,
                      "outDocsTotal": 57438,
                      "outDocsLeft": 20300
                    },
                    "ns_1@172.23.96.196": {
                      "inDocsTotal": 69900,
                      "inDocsLeft": 39700,
                      "outDocsTotal": 53300,
                      "outDocsLeft": 43600
                    },
                    "ns_1@172.23.96.168": {
                      "inDocsTotal": 67902,
                      "inDocsLeft": 41200,
                      "outDocsTotal": 58000,
                      "outDocsLeft": 55100
                    },
                    "ns_1@172.23.97.78": {
                      "inDocsTotal": 0,
                      "inDocsLeft": 0,
                      "outDocsTotal": 58400,
                      "outDocsLeft": 16300
                    }
                  },
                  "startTime": "2023-12-12T23:59:45.765-08:00",
                  "completedTime": false,
                  "timeTaken": 124425
                }
              }
            }
          },
          "rebalanceId": "34aa0967b4c57dc656d1759043abf365",
          "nodesInfo": {
            "active_nodes": [
              "ns_1@172.23.96.168",
              "ns_1@172.23.96.196",
              "ns_1@172.23.96.220",
              "ns_1@172.23.96.221",
              "ns_1@172.23.97.78"
            ],
            "keep_nodes": [
              "ns_1@172.23.96.168",
              "ns_1@172.23.96.196",
              "ns_1@172.23.96.220",
              "ns_1@172.23.96.221"
            ],
            "eject_nodes": [
              "ns_1@172.23.97.78"
            ],
            "delta_nodes": [],
            "failed_nodes": []
          },
          "masterNode": "ns_1@172.23.96.168"
        },
        {
          "node": "ns_1@172.23.96.168",
          "type": "clusterLogsCollection",
          "perNode": {
            "ns_1@172.23.96.168": {
              "path": "/opt/couchbase/var/lib/couchbase/tmp/collectinfo-2023-12-12T174819-ns_1@172.23.96.168.zip",
              "status": "uploaded",
              "url": "https://cb-engineering.s3.amazonaws.com/MB-59828/collectinfo-2023-12-12T174819-ns_1%40172.23.96.168.zip"
            },
            "ns_1@172.23.96.196": {
              "path": "/opt/couchbase/var/lib/couchbase/tmp/collectinfo-2023-12-12T174819-ns_1@172.23.96.196.zip",
              "status": "uploaded",
              "url": "https://cb-engineering.s3.amazonaws.com/MB-59828/collectinfo-2023-12-12T174819-ns_1%40172.23.96.196.zip"
            },
            "ns_1@172.23.96.220": {
              "path": "/opt/couchbase/var/lib/couchbase/tmp/collectinfo-2023-12-12T174819-ns_1@172.23.96.220.zip",
              "status": "uploaded",
              "url": "https://cb-engineering.s3.amazonaws.com/MB-59828/collectinfo-2023-12-12T174819-ns_1%40172.23.96.220.zip"
            },
            "ns_1@172.23.96.221": {
              "path": "/opt/couchbase/var/lib/couchbase/tmp/collectinfo-2023-12-12T174819-ns_1@172.23.96.221.zip",
              "status": "uploaded",
              "url": "https://cb-engineering.s3.amazonaws.com/MB-59828/collectinfo-2023-12-12T174819-ns_1%40172.23.96.221.zip"
            },
            "ns_1@172.23.97.78": {
              "path": "/opt/couchbase/var/lib/couchbase/tmp/collectinfo-2023-12-12T174819-ns_1@172.23.97.78.zip",
              "status": "uploaded",
              "url": "https://cb-engineering.s3.amazonaws.com/MB-59828/collectinfo-2023-12-12T174819-ns_1%40172.23.97.78.zip"
            }
          },
          "progress": 100,
          "ts": "2023-12-12 17:48:19",
          "status": "completed"
        }
      ] 

      But once vbucket movement for one of the buckets has finished and the vbucket movement for the next bucket is not yet started, the endpoint returns stale result of the previous rebalance

      curl -u Administrator:password http://172.23.96.168:8091/pools/default/tasks | jq
        % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                       Dload  Upload   Total   Spent    Left  Speed
      100  1647  100  1647    0     0    153      0  0:00:10  0:00:10 --:--:--   371
      [
        {
          "statusId": "26a88b4996bff08e44df51b8c029e9be",
          "type": "rebalance",
          "subtype": "rebalance",
          "status": "notRunning",
          "statusIsStale": true,
          "masterRequestTimedOut": true,
          "lastReportURI": "/logs/rebalanceReport?reportID=fcf767f16744dadb417b2d14e327b13e"
        },
        {
          "node": "ns_1@172.23.96.168",
          "type": "clusterLogsCollection",
          "perNode": {
            "ns_1@172.23.96.168": {
              "path": "/opt/couchbase/var/lib/couchbase/tmp/collectinfo-2023-12-12T174819-ns_1@172.23.96.168.zip",
              "status": "uploaded",
              "url": "https://cb-engineering.s3.amazonaws.com/MB-59828/collectinfo-2023-12-12T174819-ns_1%40172.23.96.168.zip"
            },
            "ns_1@172.23.96.196": {
              "path": "/opt/couchbase/var/lib/couchbase/tmp/collectinfo-2023-12-12T174819-ns_1@172.23.96.196.zip",
              "status": "uploaded",
              "url": "https://cb-engineering.s3.amazonaws.com/MB-59828/collectinfo-2023-12-12T174819-ns_1%40172.23.96.196.zip"
            },
            "ns_1@172.23.96.220": {
              "path": "/opt/couchbase/var/lib/couchbase/tmp/collectinfo-2023-12-12T174819-ns_1@172.23.96.220.zip",
              "status": "uploaded",
              "url": "https://cb-engineering.s3.amazonaws.com/MB-59828/collectinfo-2023-12-12T174819-ns_1%40172.23.96.220.zip"
            },
            "ns_1@172.23.96.221": {
              "path": "/opt/couchbase/var/lib/couchbase/tmp/collectinfo-2023-12-12T174819-ns_1@172.23.96.221.zip",
              "status": "uploaded",
              "url": "https://cb-engineering.s3.amazonaws.com/MB-59828/collectinfo-2023-12-12T174819-ns_1%40172.23.96.221.zip"
            },
            "ns_1@172.23.97.78": {
              "path": "/opt/couchbase/var/lib/couchbase/tmp/collectinfo-2023-12-12T174819-ns_1@172.23.97.78.zip",
              "status": "uploaded",
              "url": "https://cb-engineering.s3.amazonaws.com/MB-59828/collectinfo-2023-12-12T174819-ns_1%40172.23.97.78.zip"
            }
          },
          "progress": 100,
          "ts": "2023-12-12 17:48:19",
          "status": "completed"
        }
      ] 

       

      Seconds later when vbucket movement for the next bucket starts, the endpoint returns expected results again

       

      A screen-recording for the same has been attached Screen Recording 2023-12-12 at 10.47.27 PM-1.mov

      Attachments

        Issue Links

          No reviews matched the request. Check your Options in the drop-down menu of this sections header.

          Activity

            People

              peter.searby Peter Searby
              raghav.sk Raghav S K
              Votes:
              0 Vote for this issue
              Watchers:
              6 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Gerrit Reviews

                  There are no open Gerrit changes

                  PagerDuty