Reckonsys
Reckonsys

Reputation: 361

Partial search not working on Elasticsearch+Haystack in spite of using Ngram and Edgengram for building index

I am building the indexes like :

class BookIndex(indexes.SearchIndex,indexes.Indexable):

text= indexes.EdgeNgramField(document=True,use_template=True)
content_auto = indexes.EdgeNgramField(model_attr='title')
isbn_13 = indexes.CharField(model_attr='isbn_13')
validate = indexes.IntegerField(model_attr='validate')
price = indexes.IntegerField(model_attr='price')
authors = indexes.EdgeNgramField()
reviews = indexes.CharField()
publishers = indexes.EdgeNgramField()
institutes = indexes.EdgeNgramField()
sellers = indexes.CharField()
category = indexes.CharField()
sub_category = indexes.CharField()

I even tried using Ngram but partial search is not working.

I am querying it like SearchQuerySet().all().filter(content=query) I also tried SearchQuerySet().filter(content__contains=query) even then it is not showing results for partial match.

Can someone please help me out?

Upvotes: 0

Views: 756

Answers (2)

Siddharth Dagar
Siddharth Dagar

Reputation: 1

Using elasticsearch-2.x with django-haystack versions <2.5 causes this issue. Check if your versions match these.

elasticsearch-2.x onwards, boost is no longer a support meta-data which haystack passes to it. (Please refer to the answer https://stackoverflow.com/a/36847352/5108155)
This issue was fixed in 2.5 verison of haystack.

While building (or updating) your index, elasticsearch never got the ngram analyzer you intended to apply to the field. You can validate this by manually running- curl 'http://<elasticsearch_address>/<index_name>/?pretty' This will show only the types on the fields and no analyzer property.

Interesting thing is that haystack doesn't throw this exception because of an internal silently_fail property in the ElasticSearchBackend class.

Upvotes: 0

Pierre Criulanscy
Pierre Criulanscy

Reputation: 8686

Haystack is not very good with ElasticSearch, you cannot use proper indexing values so you have to provide custom ElasticSearchBackEnd to enable it:

#in a search_backends.py file
from django.conf import settings
from haystack.backends.elasticsearch_backend import (
    ElasticsearchSearchBackend,
    ElasticsearchSearchEngine
)
from haystack.fields import EdgeNgramField as BaseEdgeNgramField, NgramField as BaseNgramField
from haystack.indexes import CharField

#just an example of which degree of configuration could be possible
CUSTOM_FIELD_TYPE = {
    'completion': {
        'type': 'completion',
        'payloads': True,
        'analyzer': 'suggest_analyzer',
        'preserve_separators': True,
        'preserve_position_increments': False
    },
}

# Custom Backend
class CustomElasticBackend(ElasticsearchSearchBackend):

    DEFAULT_ANALYZER = None

    def __init__(self, connection_alias, **connection_options):
        super(CustomElasticBackend, self).__init__(
                                connection_alias, **connection_options)
        user_settings = getattr(settings, 'ELASTICSEARCH_INDEX_SETTINGS', None)
        self.DEFAULT_ANALYZER = getattr(settings, 'ELASTICSEARCH_DEFAULT_ANALYZER', "snowball")
        if user_settings:
            setattr(self, 'DEFAULT_SETTINGS', user_settings)

    def build_schema(self, fields):
        content_field_name, mapping = super(CustomElasticBackend,
                                              self).build_schema(fields)

        for field_name, field_class in fields.items():
            field_mapping = mapping[field_class.index_fieldname]

            index_analyzer = getattr(field_class, 'index_analyzer', None)
            search_analyzer = getattr(field_class, 'search_analyzer', None)
            field_analyzer = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER)

            if field_mapping['type'] == 'string' and field_class.indexed:
                field_mapping["term_vector"] = "with_positions_offsets"
                if not hasattr(field_class, 'facet_for') and not field_class.field_type in('ngram', 'edge_ngram'):
                    field_mapping['analyzer'] = field_analyzer

            if field_class.field_type in CUSTOM_FIELD_TYPE:
                field_mapping = CUSTOM_FIELD_TYPE.get(field_class.field_type).copy()

            if index_analyzer and search_analyzer:
                field_mapping['index_analyzer'] = index_analyzer
                field_mapping['search_analyzer'] = search_analyzer
                if 'analyzer' in field_mapping:
                    del(field_mapping['analyzer'])

            mapping.update({field_class.index_fieldname: field_mapping})
        return (content_field_name, mapping)


class CustomElasticSearchEngine(ElasticsearchSearchEngine):
    backend = CustomElasticBackend


# Custom fields, just use the ones you need or create yours
class CustomFieldMixin(object):

    def __init__(self, **kwargs):
        self.analyzer = kwargs.pop('analyzer', None)
        self.index_analyzer = kwargs.pop('index_analyzer', None)
        self.search_analyzer = kwargs.pop('search_analyzer', None)
        super(CustomFieldMixin, self).__init__(**kwargs)

class CustomCharField(CustomFieldMixin, CharField):
    pass


class CustomCompletionField(CustomFieldMixin, CharField):
    field_type = 'completion'


class CustomEdgeNgramField(CustomFieldMixin, BaseEdgeNgramField):
    pass


class CustomNgramField(CustomFieldMixin, BaseNgramField):
    pass




#settings.py
ELASTICSEARCH_INDEX_SETTINGS = {
    'settings': {
        "analysis": {
            "analyzer": {
                "custom_analyzer": {
                    "type": "custom",
                    "tokenizer": "standard",
                    "filter":  [ "lowercase", "asciifolding" ]
                },
                "str_index_analyzer" : {
                    "type": "custom",
                    "tokenizer" : "haystack_ngram_tokenizer",
                    "filter" : ["stopwords", "asciifolding", "lowercase", "snowball", "elision", "worddelimiter"]
                },
                "str_search_analyzer" : {
                    "type": "custom",
                    "tokenizer" : "standard",
                    "filter" : ["stopwords", "asciifolding", "lowercase", "snowball", "elision", "worddelimiter"]
                },
                "suggest_analyzer": {
                    "type":"custom",
                    "tokenizer":"standard",
                    "filter":[
                        "stopwords",
                        "standard",
                        "lowercase",
                        "asciifolding"
                    ]
                },
            },
            "tokenizer": {
                "haystack_ngram_tokenizer": {
                    "type": "nGram",
                    "min_gram": 2,
                    "max_gram": 20,
                },
            },
            "filter": {
                "elision": {
                    "type": "elision",
                    "articles": ["l", "m", "t", "qu", "n", "s", "j", "d"]
                },
                "stopwords": {
                    "type": "stop",
                    "stopwords": ["_french_", "_english_"],
                    "ignore_case": True
                },
                "worddelimiter": {
                    "type": "word_delimiter"
                }
            }
        }
    }
}

#Haystack settings
HAYSTACK_CONNECTIONS = {
    'default': {
        ...
        'ENGINE': 'path.to.search_backends.CustomElasticSearchEngine',
        ...
    },
}

Upvotes: 1

Related Questions