Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-60342

Getting less than expected results for KNN queries with large values of k

    XMLWordPrintable

Details

    Description

       

      7.6.0 build 1995 

      1. Create a cluster
      2. Load SIFTSMALL dataset
      3. Run a query with k=10
      4. Get expected results
      5. Run query with k = 10000
      6. This only returns 1299 results
      7. Run query with k=1500
      8.  This also returns only 1299 results

      Index Definition :

      {
        "type": "fulltext-index",
        "name": "bucket_sift_bucket_aman_idx_vtriy-sname-vector_data",
        "uuid": "b3099e607d994a22",
        "sourceType": "gocbcore",
        "sourceName": "sift_bucket_aman",
        "sourceUUID": "133e47c638c8c4aa71a59535b45926a1",
        "planParams": {
          "maxPartitionsPerPIndex": 147,
          "indexPartitions": 7
        },
        "params": {
          "doc_config": {
            "docid_prefix_delim": "",
            "docid_regexp": "",
            "mode": "scope.collection.type_field",
            "type_field": "type"
          },
          "mapping": {
            "analysis": {},
            "default_analyzer": "standard",
            "default_datetime_parser": "dateTimeOptional",
            "default_field": "_all",
            "default_mapping": {
              "dynamic": true,
              "enabled": false
            },
            "default_type": "_default",
            "docvalues_dynamic": true,
            "index_dynamic": true,
            "store_dynamic": false,
            "type_field": "_type",
            "types": {
              "sift_scope.sift_collection": {
                "dynamic": false,
                "enabled": true,
                "properties": {
                  "sname": {
                    "dynamic": false,
                    "enabled": true,
                    "fields": [
                      {
                        "index": true,
                        "name": "sname",
                        "type": "text"
                      }
                    ]
                  },
                  "vector_data": {
                    "dynamic": false,
                    "enabled": true,
                    "fields": [
                      {
                        "dims": 128,
                        "index": true,
                        "name": "vector_data",
                        "similarity": "dot_product",
                        "type": "vector"
                      }
                    ]
                  }
                }
              }
            }
          },
          "store": {
            "indexType": "scorch",
            "segmentVersion": 16
          }
        },
        "sourceParams": {}
      } 

      Query:

      {    "query":{       "match_none":{        }    },    "explain":true,    "knn":[{       "field":"vector_data",       "k":10000,       "vector":[16.0,34.0,46.0,3.0,1.0,6.0,27.0,44.0,0.0,9.0,77.0,32.0,37.0,83.0,41.0,3.0,19.0,14.0,15.0,17.0,18.0,34.0,22.0,22.0,12.0,23.0,52.0,12.0,0.0,0.0,0.0,1.0,8.0,22.0,15.0,16.0,16.0,35.0,92.0,94.0,1.0,3.0,9.0,13.0,22.0,120.0,120.0,12.0,120.0,15.0,5.0,0.0,0.0,39.0,74.0,120.0,115.0,24.0,14.0,7.0,2.0,7.0,4.0,34.0,1.0,18.0,36.0,120.0,41.0,10.0,2.0,0.0,1.0,8.0,93.0,120.0,35.0,21.0,7.0,0.0,120.0,104.0,57.0,7.0,0.0,1.0,2.0,20.0,120.0,51.0,2.0,13.0,18.0,12.0,3.0,20.0,7.0,6.0,9.0,18.0,18.0,24.0,19.0,4.0,5.0,12.0,114.0,71.0,13.0,2.0,0.0,2.0,60.0,30.0,81.0,54.0,0.0,0.0,0.0,13.0,44.0,10.0,1.0,3.0,7.0,15.0,20.0,38.0]    }] } 

      Similar for another dataset SIFT - 1M dataset

      Getting 1 result for k=2, 700 results for k=1000

      {"type": "fulltext-index","name": "ashok-2-siftsmall","uuid": "1c68e55b4219a9c7","sourceType": "gocbcore","sourceName": "ashok-b1","sourceUUID": "2b43948869e688341558a307869a412f","planParams": {"maxPartitionsPerPIndex": 86,"indexPartitions": 12,"numReplicas": 2},"params": {"doc_config": {"docid_prefix_delim": "","docid_regexp": "","mode": "scope.collection.type_field","type_field": "type"},"mapping": {"analysis": {},"default_analyzer": "standard","default_datetime_parser": "dateTimeOptional","default_field": "_all","default_mapping": {"dynamic": true,"enabled": false},"default_type": "_default","docvalues_dynamic": true,"index_dynamic": true,"store_dynamic": true,"type_field": "_type","types": {"ashok-1.ashok-1": {"dynamic": true,"enabled": true,"properties": {"vector_data": {"dynamic": false,"enabled": true,"fields": [{"dims": 128,"index": true,"name": "vector_data","similarity": "l2_norm","store": true,"type": "vector"}]}}}}},"store": {"indexType": "scorch","segmentVersion": 16}},"sourceParams": {}} 

      Attachments

        1. image (3).png
          image (3).png
          84 kB
        2. image-2024-01-11-15-46-45-370.png
          image-2024-01-11-15-46-45-370.png
          483 kB
        3. image-2024-01-13-10-51-45-761.png
          image-2024-01-13-10-51-45-761.png
          211 kB
        4. image-2024-01-13-10-54-35-919.png
          image-2024-01-13-10-54-35-919.png
          146 kB
        5. screenshot-1.png
          screenshot-1.png
          157 kB
        No reviews matched the request. Check your Options in the drop-down menu of this sections header.

        Activity

          People

            sarthak.dua Sarthak Dua
            sarthak.dua Sarthak Dua
            Votes:
            0 Vote for this issue
            Watchers:
            8 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Gerrit Reviews

                There are no open Gerrit changes

                PagerDuty