kampias
kampias

Reputation: 529

Unable to search using elasticsearch in django with django-elasticsearch-dsl-drf (Set fielddata=true on [title.raw])

I have followed the quick start guide shown here, in order to experiment with elasticsearch searching and a sample Django app I am playing with.

Using elasticsearch 6.3.1 and latest django-elasticsearch-dsl-drf

The results is the following error.

RequestError at /search/movies/

RequestError(400, 'search_phase_execution_exception', 'Fielddata is disabled on text fields by default. Set fielddata=true on [title.raw] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead.')

I have added in the django project an extra app named search_indexes. Here is the documents.py from this app.

# Name of the Elasticsearch index
INDEX = Index('search_movies')

# See Elasticsearch Indices API reference for available settings
INDEX.settings(
    number_of_shards=1,
    number_of_replicas=1
)


html_strip = analyzer(
    'html_strip',
    tokenizer="standard",
    filter=["standard", "lowercase", "stop", "snowball"],
    char_filter=["html_strip"]
)


@INDEX.doc_type
class MovieDocument(DocType):
    """Movie Elasticsearch document."""

    id = fields.IntegerField(attr='id')

    title = fields.StringField(
        analyzer=html_strip,
        fields={
            'raw': fields.StringField(analyzer='keyword'),
        }
    )

    summary = fields.StringField(
        analyzer=html_strip,
        fields={
            'raw': fields.StringField(analyzer='keyword'),
        }
    )

Now, after running manage.py search_index --rebuild, i can visit the url http://localhost:9200/search_movies/_search?pretty where I can see that the index has been created properly and I can see the data as well.

Moving on to the next part this is my serializers.py file

from django_elasticsearch_dsl_drf.serializers import DocumentSerializer

from .documents import MovieDocument


class MovieDocumentSerializer(DocumentSerializer):
    """Serializer for the document."""
    class Meta(object):
        """Meta options."""
        # Specify the correspondent document class
        document = MovieDocument

        # List the serializer fields. Note, that the order of the fields
        # is preserved in the ViewSet.
        fields = (
            'id',
            'title',
            'summary',
            'people',
            'genres',
        )


and then my views.py

from django_elasticsearch_dsl_drf.constants import (
    LOOKUP_FILTER_TERMS,
    LOOKUP_FILTER_RANGE,
    LOOKUP_FILTER_PREFIX,
    LOOKUP_FILTER_WILDCARD,
    LOOKUP_QUERY_IN,
    LOOKUP_QUERY_GT,
    LOOKUP_QUERY_GTE,
    LOOKUP_QUERY_LT,
    LOOKUP_QUERY_LTE,
    LOOKUP_QUERY_EXCLUDE,
)
from django_elasticsearch_dsl_drf.filter_backends import (
    FilteringFilterBackend,
    IdsFilterBackend,
    OrderingFilterBackend,
    DefaultOrderingFilterBackend,
    SearchFilterBackend,
)
from django_elasticsearch_dsl_drf.viewsets import BaseDocumentViewSet
from django_elasticsearch_dsl_drf.pagination import PageNumberPagination

from .documents import MovieDocument
from .serializers import MovieDocumentSerializer


class MovieDocumentView(BaseDocumentViewSet):
    """The MovieDocument view."""

    document = MovieDocument
    serializer_class = MovieDocumentSerializer
    pagination_class = PageNumberPagination
    lookup_field = 'id'
    filter_backends = [
        FilteringFilterBackend,
        IdsFilterBackend,
        OrderingFilterBackend,
        DefaultOrderingFilterBackend,
        SearchFilterBackend,
    ]
    # Define search fields
    search_fields = (
        'title',
        'summary',
    )
    # Define filter fields
    filter_fields = {
        'id': {
            'field': 'id',
            # Note, that we limit the lookups of id field in this example,
            # to `range`, `in`, `gt`, `gte`, `lt` and `lte` filters.
            'lookups': [
                LOOKUP_FILTER_RANGE,
                LOOKUP_QUERY_IN,
                LOOKUP_QUERY_GT,
                LOOKUP_QUERY_GTE,
                LOOKUP_QUERY_LT,
                LOOKUP_QUERY_LTE,
            ],
        },
        'title': 'title.raw',
        'genres': {
            'field': 'genres',
            # Note, that we limit the lookups of `genres` field
            # to `terms, `prefix`, `wildcard`, `in` and
            # `exclude` filters.
            'lookups': [
                LOOKUP_FILTER_TERMS,
                LOOKUP_FILTER_PREFIX,
                LOOKUP_FILTER_WILDCARD,
                LOOKUP_QUERY_IN,
                LOOKUP_QUERY_EXCLUDE,
            ],
        },
        'genres.raw': {
            'field': 'genres.raw',
            'lookups': [
                LOOKUP_FILTER_TERMS,
                LOOKUP_FILTER_PREFIX,
                LOOKUP_FILTER_WILDCARD,
                LOOKUP_QUERY_IN,
                LOOKUP_QUERY_EXCLUDE,
            ],
        },
    }
    # Define ordering fields
    ordering_fields = {
        'id': 'id',
        'title': 'title.raw',
    }
    # Specify default ordering
    ordering = ('id', 'title')

Lastly my urls.py is the following

from django.conf.urls import url, include
from rest_framework import routers
from .views import MovieDocumentView


router = routers.DefaultRouter()
router.register(r'movies', MovieDocumentView, base_name='moviedocument')

urlpatterns = [
    url(r'^', include(router.urls)),
]

So, when i visit a url like http://localhost:8000/search/movies/ or http://localhost:8000/search/movies/?summary__contains=photography the error I mentioned above shows up.

Here is the stacktrace

Starting development server at http://127.0.0.1:8000/
Quit the server with CONTROL-C.
/usr/local/lib/python3.7/dist-packages/django_elasticsearch_dsl_drf/filter_backends/search/historical.py:231: UserWarning: SearchFilterBackend is deprecated. Switch to `CompoundSearchFilterBackend`.
  self.__class__.__name__
GET http://localhost:9200/search_movies/doc/_search [status:400 request:0.013s]
Internal Server Error: /search/movies/
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/django/core/handlers/exception.py", line 34, in inner
    response = get_response(request)
  File "/usr/local/lib/python3.7/dist-packages/django/core/handlers/base.py", line 115, in _get_response
    response = self.process_exception_by_middleware(e, request)
  File "/usr/local/lib/python3.7/dist-packages/django/core/handlers/base.py", line 113, in _get_response
    response = wrapped_callback(request, *callback_args, **callback_kwargs)
  File "/usr/local/lib/python3.7/dist-packages/django/views/decorators/csrf.py", line 54, in wrapped_view
    return view_func(*args, **kwargs)
  File "/home/er/.local/lib/python3.7/site-packages/rest_framework/viewsets.py", line 103, in view
    return self.dispatch(request, *args, **kwargs)
  File "/home/er/.local/lib/python3.7/site-packages/rest_framework/views.py", line 483, in dispatch
    response = self.handle_exception(exc)
  File "/home/er/.local/lib/python3.7/site-packages/rest_framework/views.py", line 443, in handle_exception
    self.raise_uncaught_exception(exc)
  File "/home/er/.local/lib/python3.7/site-packages/rest_framework/views.py", line 480, in dispatch
    response = handler(request, *args, **kwargs)
  File "/home/er/.local/lib/python3.7/site-packages/rest_framework/mixins.py", line 48, in list
    return Response(serializer.data)
  File "/home/er/.local/lib/python3.7/site-packages/rest_framework/serializers.py", line 765, in data
    ret = super(ListSerializer, self).data
  File "/home/er/.local/lib/python3.7/site-packages/rest_framework/serializers.py", line 262, in data
    self._data = self.to_representation(self.instance)
  File "/home/er/.local/lib/python3.7/site-packages/rest_framework/serializers.py", line 683, in to_representation
    self.child.to_representation(item) for item in iterable
  File "/home/er/.local/lib/python3.7/site-packages/elasticsearch_dsl/search.py", line 329, in __iter__
    return iter(self.execute())
  File "/home/er/.local/lib/python3.7/site-packages/elasticsearch_dsl/search.py", line 706, in execute
    **self._params
  File "/home/er/.local/lib/python3.7/site-packages/elasticsearch/client/utils.py", line 84, in _wrapped
    return func(*args, params=params, **kwargs)
  File "/home/er/.local/lib/python3.7/site-packages/elasticsearch/client/__init__.py", line 844, in search
    "GET", _make_path(index, doc_type, "_search"), params=params, body=body
  File "/home/er/.local/lib/python3.7/site-packages/elasticsearch/transport.py", line 353, in perform_request
    timeout=timeout,
  File "/home/er/.local/lib/python3.7/site-packages/elasticsearch/connection/http_urllib3.py", line 236, in perform_request
    self._raise_error(response.status, raw_data)
  File "/home/er/.local/lib/python3.7/site-packages/elasticsearch/connection/base.py", line 162, in _raise_error
    status_code, error_message, additional_info
elasticsearch.exceptions.RequestError: RequestError(400, 'search_phase_execution_exception', 'Fielddata is disabled on text fields by default. Set fielddata=true on [title.raw] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead.')
[07/Aug/2019 11:03:38] "GET /search/movies/?summary__contains=photography HTTP/1.1" 500 154541

Upvotes: 1

Views: 2124

Answers (1)

Val
Val

Reputation: 217514

What you're supposed to do is to use KeywordField instead of StringField with keyword analyzer:

title = fields.StringField(
    analyzer=html_strip,
    fields={
        'raw': fields.KeywordField(),            <---- change this
    }
)

Upvotes: 3

Related Questions