Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-35418

FTS: For a match_phrase query in one test, FTS doesn't returns same results as ES

    XMLWordPrintable

Details

    Description

      Build: 6.5.0.3143/3883/3274

      1: Load dataset attached
      2. Create index with :

       
      {
       "mapping": {
       "default_analyzer": "standard", 
       "default_datetime_parser": "dateTimeOptional", 
       "type_field": "type", 
       "default_mapping": {
       "default_analyzer": "", 
       "dynamic": true, 
       "enabled": false
       }, 
       "default_type": "_default", 
       "analysis": {
       "analyzers": {
       "customAnalyzer1": {
       "type": "custom", 
       "char_filters": [
       "mapping"
       ], 
       "tokenizer": "unicode", 
       "token_filters": [
       "shingle", 
       "front_edge_ngram"
       ]
       }
       }, 
       "token_maps": {
       "stopwords": {
       "tokens": [
       "i", 
       "me", 
       "my", 
       "myself", 
       "we", 
       "our", 
       "ours", 
       "ourselves", 
       "you", 
       "your", 
       "yours", 
       "yourself", 
       "yourselves", 
       "he", 
       "him", 
       "his", 
       "himself", 
       "she", 
       "her", 
       "hers", 
       "herself", 
       "it", 
       "its", 
       "itself", 
       "they", 
       "them", 
       "their", 
       "theirs", 
       "themselves", 
       "what", 
       "which", 
       "who", 
       "whom", 
       "this", 
       "that", 
       "these", 
       "those", 
       "am", 
       "is", 
       "are", 
       "was", 
       "were", 
       "be", 
       "been", 
       "being", 
       "have", 
       "has", 
       "had", 
       "having", 
       "do", 
       "does", 
       "did", 
       "doing", 
       "would", 
       "should", 
       "could", 
       "ought", 
       "i'm", 
       "you're", 
       "he's", 
       "she's", 
       "it's", 
       "we're", 
       "they're", 
       "i've", 
       "you've", 
       "we've", 
       "they've", 
       "i'd", 
       "you'd", 
       "he'd", 
       "she'd", 
       "we'd", 
       "they'd", 
       "i'll", 
       "you'll", 
       "he'll", 
       "she'll", 
       "we'll", 
       "they'll", 
       "isn't", 
       "aren't", 
       "wasn't", 
       "weren't", 
       "hasn't", 
       "haven't", 
       "hadn't", 
       "doesn't", 
       "don't", 
       "didn't", 
       "won't", 
       "wouldn't", 
       "shan't", 
       "shouldn't", 
       "can't", 
       "cannot", 
       "couldn't", 
       "mustn't", 
       "let's", 
       "that's", 
       "who's", 
       "what's", 
       "here's", 
       "there's", 
       "when's", 
       "where's", 
       "why's", 
       "how's", 
       "a", 
       "an", 
       "the", 
       "and", 
       "but", 
       "if", 
       "or", 
       "because", 
       "as", 
       "until", 
       "while", 
       "of", 
       "at", 
       "by", 
       "for", 
       "with", 
       "about", 
       "against", 
       "between", 
       "into", 
       "through", 
       "during", 
       "before", 
       "after", 
       "above", 
       "below", 
       "to", 
       "from", 
       "up", 
       "down", 
       "in", 
       "out", 
       "on", 
       "off", 
       "over", 
       "under", 
       "again", 
       "further", 
       "then", 
       "once", 
       "here", 
       "there", 
       "when", 
       "where", 
       "why", 
       "how", 
       "all", 
       "any", 
       "both", 
       "each", 
       "few", 
       "more", 
       "most", 
       "other", 
       "some", 
       "such", 
       "no", 
       "nor", 
       "not", 
       "only", 
       "own", 
       "same", 
       "so", 
       "than", 
       "too", 
       "very"
       ], 
       "type": "custom"
       }
       }, 
       "tokenizers": {
       "alphanumeric": {
       "regexp": "[0-9a-zA-Z_]*", 
       "type": "regexp"
       }
       }, 
       "char_filters": {
       "mapping": {
       "regexp": "[f]", 
       "type": "regexp", 
       "replace": "ph"
       }
       }, 
       "token_filters": {
       "dict_compound_en": {
       "dict_token_map": "stop_en", 
       "type": "dict_compound"
       }, 
       "keyword_marker": {
       "type": "keyword_marker", 
       "keywords_token_map": "stopwords"
       }, 
       "truncate": {
       "length": 10, 
       "type": "truncate_token"
       }, 
       "dict_compound_fr": {
       "dict_token_map": "articles_fr", 
       "type": "dict_compound"
       }, 
       "ngram": {
       "max": 5, 
       "type": "ngram", 
       "min": 3
       }, 
       "front_edge_ngram": {
       "max": 5, 
       "type": "edge_ngram", 
       "back": false, 
       "min": 3
       }, 
       "back_edge_ngram": {
       "max": 5, 
       "type": "edge_ngram", 
       "back": true, 
       "min": 3
       }, 
       "shingle": {
       "output_original": "false", 
       "min": 2, 
       "filler": "", 
       "max": 5, 
       "separator": "", 
       "type": "shingle"
       }, 
       "length": {
       "max": 5, 
       "type": "length", 
       "min": 3
       }, 
       "stopwords": {
       "stop_token_map": "stopwords", 
       "type": "stop_tokens"
       }
       }
       }, 
       "default_field": "_all", 
       "types": {
       "emp": {
       "enabled": true, 
       "dynamic": false, 
       "properties": {
       "dept": {
       "enabled": true, 
       "dynamic": false, 
       "properties": {}, 
       "fields": [
       {
       "index": true, 
       "name": "dept", 
       "include_in_all": true, 
       "type": "text", 
       "analyzer": "customAnalyzer1", 
       "include_term_vectors": true, 
       "store": false
       }
       ]
       }, 
       "join_date": {
       "enabled": true, 
       "dynamic": false, 
       "properties": {}, 
       "fields": [
       {
       "index": true, 
       "name": "join_date", 
       "include_in_all": true, 
       "type": "datetime", 
       "analyzer": "", 
       "include_term_vectors": true, 
       "store": false
       }
       ]
       }, 
       "manages": {
       "enabled": true, 
       "dynamic": false, 
       "properties": {
       "team_size": {
       "enabled": true, 
       "dynamic": false, 
       "properties": {}, 
       "fields": [
       {
       "index": true, 
       "name": "team_size", 
       "include_in_all": true, 
       "type": "number", 
       "analyzer": "", 
       "include_term_vectors": true, 
       "store": false
       }
       ]
       }, 
       "reports": {
       "enabled": true, 
       "dynamic": false, 
       "properties": {}, 
       "fields": [
       {
       "index": true, 
       "name": "reports", 
       "include_in_all": true, 
       "type": "text", 
       "analyzer": "customAnalyzer1", 
       "include_term_vectors": true, 
       "store": false
       }
       ]
       }
       }, 
       "fields": []
       }, 
       "languages_known": {
       "enabled": true, 
       "dynamic": false, 
       "properties": {}, 
       "fields": [
       {
       "index": true, 
       "name": "languages_known", 
       "include_in_all": true, 
       "type": "text", 
       "analyzer": "customAnalyzer1", 
       "include_term_vectors": true, 
       "store": false
       }
       ]
       }
       }, 
       "fields": []
       }
       }
       }
      
      

      3. Following query retruns 0 results where ES results in 2 results

       
      2019-08-02 10:18:31 | INFO | MainProcess | Cluster_Thread | [task.execute] ----------------------------------------------------- Query # 53 ----------------------------------------------------
      2019-08-02 10:18:31 | INFO | MainProcess | Cluster_Thread | [fts_base.run_fts_query] Running query \{"indexName": "custom_index", "from": 0, "fields": [], "explain": false, "ctl": {"timeout": 60000, "consistency": {"vectors": {}, "level": ""}}, "query": \{"field": "manages.reports", "match_phrase": "Balandria Deandra Kory"}, "size": 10000000} on node: 192.168.10.12:
      2019-08-02 10:18:31 | INFO | MainProcess | Cluster_Thread | [task.execute] Status: \{u'successful': 6, u'failed': 0, u'total': 6}
      2019-08-02 10:18:31 | INFO | MainProcess | Cluster_Thread | [task.execute] FTS hits for query: \{"field": "manages.reports", "match_phrase": "Balandria Deandra Kory"} is 0 (took 0.954437ms)
      2019-08-02 10:18:31 | INFO | MainProcess | Cluster_Thread | [es_base.search] ES query '\{u'query': {u'match_phrase': {u'manages.reports': u'Balandria Deandra Kory'}}}' 
      2019-08-02 10:18:31 | INFO | MainProcess | Cluster_Thread | [task.execute] ES hits for query: \{"query": {"match_phrase": {"manages.reports": "Balandria Deandra Kory"}}} on es_index is 2 (took 3ms)
      2019-08-02 10:18:31 | ERROR | MainProcess | Cluster_Thread | [task.execute] FAIL: FTS hits: 0, while ES hits: 2
      2019-08-02 10:26:55 | ERROR | MainProcess | Cluster_Thread | [task.execute] FAIL: Following 2 docs were not returned by FTS, but ES, printing 50: [u'emp10000825', u'emp10000884']
      
      

       

      Job: centos-fts_custom-map-rqg-scorch

      Test: ./testrunner -i test.ini -p get-logs=False,stop-on-failure=False,GROUP=P1,doc-per-day=1,bucket_size=200,scan_consistency=NOT_BOUNDED -t fts.stable_topology_fts.StableTopFTS.index_query_custom_mapping,items=1000,custom_map=True,num_custom_analyzers=1,multiple_filters=true,cm_id=35,num_queries=100,compare_es=True,GROUP=P1,disable_HTP=True,get-logs=False,stop-on-failure=False,index_type=upside_down,cluster=D+F,fts_quota=750

       

      Not sure if this is regression, this behaves the same with the build 6.5.0-3143/3274

      Attachments

        Issue Links

          No reviews matched the request. Check your Options in the drop-down menu of this sections header.

          Activity

            People

              girish.benakappa Girish Benakappa
              girish.benakappa Girish Benakappa
              Votes:
              0 Vote for this issue
              Watchers:
              7 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Gerrit Reviews

                  There are no open Gerrit changes

                  PagerDuty