Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-20131

KV+XDCR rebalance-in gets stuck

    XMLWordPrintable

Details

    • Bug
    • Resolution: Cannot Reproduce
    • Major
    • 4.1.2
    • 4.1.2
    • None
    • Untriaged
    • Centos 64-bit
    • Unknown

    Description

      Build: 4.1.2-6070.

      Initial configuration: source cluster with 4 nodes, destination cluster with 5 nodes.
      Target configuration: source cluster with 5 nodes, destination cluster with 5 nodes.

      Test configuration: 1B documents, 800 bytes, unidirectional replication, 10K mixed front-end operations.

      Rebalance is in the same state for 4 hours now:

      [
          {
              "detailedProgress": {
                  "bucket": "bucket-1",
                  "bucketNumber": 1,
                  "bucketsCount": 1,
                  "perNode": {
                      "ns_1@172.23.96.100": {
                          "ingoing": {
                              "activeVBucketsLeft": 0,
                              "docsTotal": 979589,
                              "docsTransferred": 979220,
                              "replicaVBucketsLeft": 0
                          },
                          "outgoing": {
                              "activeVBucketsLeft": 5,
                              "docsTotal": 99744878,
                              "docsTransferred": 99358428,
                              "replicaVBucketsLeft": 0
                          }
                      },
                      "ns_1@172.23.96.101": {
                          "ingoing": {
                              "activeVBucketsLeft": 5,
                              "docsTotal": 1444,
                              "docsTransferred": 1255,
                              "replicaVBucketsLeft": 0
                          },
                          "outgoing": {
                              "activeVBucketsLeft": 0,
                              "docsTotal": 100407181,
                              "docsTransferred": 100407181,
                              "replicaVBucketsLeft": 5
                          }
                      },
                      "ns_1@172.23.96.102": {
                          "ingoing": {
                              "activeVBucketsLeft": 0,
                              "docsTotal": 1677,
                              "docsTransferred": 1677,
                              "replicaVBucketsLeft": 0
                          },
                          "outgoing": {
                              "activeVBucketsLeft": 28,
                              "docsTotal": 100720865,
                              "docsTransferred": 76111652,
                              "replicaVBucketsLeft": 0
                          }
                      },
                      "ns_1@172.23.96.103": {
                          "ingoing": {
                              "activeVBucketsLeft": 0,
                              "docsTotal": 1639,
                              "docsTransferred": 1639,
                              "replicaVBucketsLeft": 0
                          },
                          "outgoing": {
                              "activeVBucketsLeft": 0,
                              "docsTotal": 99742782,
                              "docsTransferred": 99742782,
                              "replicaVBucketsLeft": 0
                          }
                      },
                      "ns_1@172.23.96.104": {
                          "ingoing": {
                              "activeVBucketsLeft": 28,
                              "docsTotal": 399631357,
                              "docsTransferred": 374636252,
                              "replicaVBucketsLeft": 5
                          },
                          "outgoing": {
                              "activeVBucketsLeft": 0,
                              "docsTotal": 0,
                              "docsTransferred": 0,
                              "replicaVBucketsLeft": 0
                          }
                      }
                  }
              },
              "perNode": {
                  "ns_1@172.23.96.100": {
                      "progress": 96.73202614379085
                  },
                  "ns_1@172.23.96.101": {
                      "progress": 96.73202614379085
                  },
                  "ns_1@172.23.96.102": {
                      "progress": 81.69934640522875
                  },
                  "ns_1@172.23.96.103": {
                      "progress": 100
                  },
                  "ns_1@172.23.96.104": {
                      "progress": 86.27450980392157
                  }
              },
              "progress": 92.2875816993464,
              "recommendedRefreshPeriod": 0.25,
              "status": "running",
              "subtype": "rebalance",
              "type": "rebalance"
          },
          {
              "cancelURI": "/controller/cancelXDCR/395b40d8f9bc8ee1813be741db51441b%2Fbucket-1%2Fbucket-1",
              "changesLeft": 1,
              "continuous": true,
              "docsChecked": 1075571792,
              "docsWritten": 41887544,
              "errors": [],
              "filterExpression": "",
              "id": "395b40d8f9bc8ee1813be741db51441b/bucket-1/bucket-1",
              "maxVBReps": null,
              "pauseRequested": false,
              "recommendedRefreshPeriod": 10,
              "replicationType": "xmem",
              "settingsURI": "/settings/replications/395b40d8f9bc8ee1813be741db51441b%2Fbucket-1%2Fbucket-1",
              "source": "bucket-1",
              "status": "running",
              "target": "/remoteClusters/395b40d8f9bc8ee1813be741db51441b/buckets/bucket-1",
              "type": "xdcr"
          },
          {
              "bucket": "bucket-1",
              "changesDone": 408,
              "progress": 33,
              "recommendedRefreshPeriod": 2,
              "status": "running",
              "totalChanges": 1235,
              "type": "bucket_compaction"
          }
      ]
      

      According to the master events these following vbuckets are currently being moved:

      65, 69, 673, 677, 681, 53, 57, 61

      All stuck at the same stage:

      {"vbucket":65,"type":"updateMap","ts":1468314271.927393,"bucket":"bucket-1","chainBefore":[],"chainAfter":["172.23.96.100:11209","172.23.96.101:11209"]}
      {"vbucket":65,"type":"vbucketStateChange","ts":1468314272.058645,"state":"replica","host":"172.23.96.101:11209","bucket":"bucket-1"}
      {"vbucket":65,"type":"vbucketStateChange","ts":1468314272.060222,"state":"active","host":"172.23.96.100:11209","bucket":"bucket-1"}
      {"vbucket":65,"type":"dcpAddStream","ts":1468314272.087532,"streamType":"add","side":"consumer","pid":"<17648.1469.0>","opaque":65,"node":"ns_1@172.23.96.101","connectionName":"replication:ns_1@172.23.96.100->ns_1@172.23.96.101:bucket-1","bucket":"bucket-1"}
      {"vbucket":65,"type":"dcpAddStreamResponse","ts":1468314272.116021,"success":true,"status":"success","side":"consumer","rawStatus":0,"pid":"<17648.1469.0>","opaque":65,"node":"ns_1@172.23.96.101","connectionName":"replication:ns_1@172.23.96.100->ns_1@172.23.96.101:bucket-1","bucket":"bucket-1"}
      {"vbucket":65,"type":"updateFastForwardMap","ts":1468330462.311486,"bucket":"bucket-1","chainBefore":[],"chainAfter":["172.23.96.101:11209","172.23.96.104:11209"]}
      {"vbucket":65,"type":"vbucketMoveStart","ts":1468335339.624153,"pid":"<0.16734.9>","node":"ns_1@172.23.96.100","bucket":"bucket-1","chainBefore":["172.23.96.100:11209","172.23.96.101:11209"],"chainAfter":["172.23.96.101:11209","172.23.96.104:11209"]}
      {"vbucket":65,"type":"vbucketStateChange","ts":1468335339.625756,"state":"replica","host":"172.23.96.104:11209","bucket":"bucket-1"}
      {"vbucket":65,"type":"vbucketStateChange","ts":1468335339.625773,"state":"replica","host":"172.23.96.101:11209","bucket":"bucket-1"}
      {"vbucket":65,"type":"dcpAddStream","ts":1468335339.626258,"streamType":"add","side":"consumer","pid":"<19800.20809.2>","opaque":65,"node":"ns_1@172.23.96.104","connectionName":"replication:ns_1@172.23.96.100->ns_1@172.23.96.104:bucket-1","bucket":"bucket-1"}
      {"vbucket":65,"type":"dcpAddStreamResponse","ts":1468335339.6276,"success":true,"status":"success","side":"consumer","rawStatus":0,"pid":"<19800.20809.2>","opaque":65,"node":"ns_1@172.23.96.104","connectionName":"replication:ns_1@172.23.96.100->ns_1@172.23.96.104:bucket-1","bucket":"bucket-1"}
      {"vbucket":65,"type":"indexingInitiated","ts":1468335339.629042,"node":"172.23.96.101:11209","bucket":"bucket-1"}
      {"vbucket":65,"type":"backfillPhaseEnded","ts":1468335392.253436,"bucket":"bucket-1"}
      {"vbucket":65,"type":"seqnoWaitingStarted","ts":1468335392.253771,"seqno":1005267,"node":"172.23.96.101:11209","bucket":"bucket-1"}
      {"vbucket":65,"type":"seqnoWaitingStarted","ts":1468335392.253771,"seqno":1005267,"node":"172.23.96.104:11209","bucket":"bucket-1"}
      

      It looks like this issue has nothing to do with XDCR though.

      Attachments

        No reviews matched the request. Check your Options in the drop-down menu of this sections header.

        Activity

          People

            pavelpaulau Pavel Paulau (Inactive)
            pavelpaulau Pavel Paulau (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            1 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Gerrit Reviews

                There are no open Gerrit changes

                PagerDuty