Reputation: 378
When trying to index new users in out Django app, Elastic is unable to index...returning a key error of key ['created']
Traceback:
Traceback (most recent call last):
File "manage.py", line 22, in <module>
execute_from_command_line(sys.argv)
File "/home/venv/local/lib/python2.7/site-packages/django/core/management/__init__.py", line 363, in execute_from_command_line
utility.execute()
File "/home/venv/local/lib/python2.7/site-packages/django/core/management/__init__.py", line 355, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/home//venv/local/lib/python2.7/site-packages/django/core/management/base.py", line 283, in run_from_argv
self.execute(*args, **cmd_options)
File "/home/venv/local/lib/python2.7/site-packages/django/core/management/base.py", line 330, in execute
output = self.handle(*args, **options)
File "/home/app/management/commands/index_users.py", line 19, in handle
bulk_indexing(User)
File "/home/uapp/management/commands/index_users.py", line 12, in bulk_indexing
bulk(client=es, actions=(m.indexing() for m in model.objects.all()))
File "/home/venv/local/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 257, in bulk
for ok, item in streaming_bulk(client, actions, **kwargs):
File "/home//venv/local/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 180, in streaming_bulk
client.transport.serializer):
File "/home/venv/local/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 58, in _chunk_actions
for action, data in actions:
File "/home/app/management/commands/index_users.py", line 12, in <genexpr>
bulk(client=es, actions=(m.indexing() for m in model.objects.all().iterator()))
File "/home/app/models.py", line 137, in indexing
obj.save(index="users")
File "/home/venv/local/lib/python2.7/site-packages/elasticsearch_dsl/document.py", line 418, in save
return meta['created']
KeyError: 'created'
User ES Index:
import logging
import json
from elasticsearch_dsl import Index, DocType, Integer, Text, Date, Completion, GeoPoint, analyzer, Q
from elasticsearch.helpers import bulk
from elasticsearch import Elasticsearch
from django.conf import settings
from app.es.connection import conn
logger = logging.getLogger("app.es")
users_index = Index('users')
users_index.settings(
number_of_shards=5,
number_of_replicas=0
)
@users_index.doc_type
class User(DocType):
email = Text()
first_name = Text()
last_name = Text()
date_joined = Date()
expertise = Text()
institution = Text()
position = Text()
# autocomplete fields
name_suggest = Completion(analyzer=analyzer("standard"))
def user_search(query):
s = User.search()
s.query = Q(Q(
'multi_match',
query=query,
fields=["_all"],
type="phrase_prefix"
))
logger.info("ES query: {}".format(json.dumps(s.to_dict())))
results = s.execute()
logger.info("Got {} hits.".format(results.hits.total))
payloads = []
return [
{
"name": hit.name_suggest,
"email": hit.email,
"position": hit.position,
"institution": hit.institution,
"expertise": ", ".join(hit.expertise or []),
"id": hit.meta.id,
} for hit in results.hits]
User model:
class User(AbstractBaseUser, PermissionsMixin):
"""Custom Django Auth User. We can extend this to include any metadata we want on users."""
email = models.EmailField(unique=True)
first_name = models.CharField(max_length=30)
last_name = models.CharField(max_length=30)
date_joined = models.DateTimeField(auto_now_add=True)
is_active = models.BooleanField(default=True)
is_staff = models.BooleanField(default=False)
is_superuser = models.BooleanField(default=False)
# Profile information
street_address = models.CharField(max_length=200, blank=True, null=True)
city = models.CharField(max_length=100, blank=True, null=True)
state = models.CharField(max_length=100, blank=True, null=True)
zip_code = models.CharField(max_length=10)
institution = models.CharField(max_length=100, blank=True, null=True)
phone_number = models.CharField(max_length=100, blank=True, null=True)
position = models.CharField(max_length=100)
notification_preference = models.CharField(max_length=1, choices=NOTIPREF, null=True)
terms_of_service = models.BooleanField(default=False)
expertise = models.ManyToManyField("Expertise")
notification_preference = models.CharField(max_length=1, choices=NOTIPREF, null=True)
backup_email_address = models.EmailField(unique=True, blank=True, null=True)
USERNAME_FIELD = "email"
objects = UserManager()
class Meta:
verbose_name = 'user'
verbose_name_plural = 'users'
@property
def full_name(self):
full_name = '%s %s' % (self.first_name, self.last_name) if self.first_name or self.last_name else self.email
return full_name.strip()
@property
def print_queue_size(self):
return PrintQueue.objects.filter(user=self).count()
@property
def notifications(self):
return self.notifications.all()
@property
def notifications(self):
return self.notifications.all()
@property
def num_unread_notifications(self):
return len(self.notifications.unread())
@property
def expertise_str(self):
return [str(t) for t in self.expertise.all()]
def get_short_name(self):
"""Required by django admin"""
return self.first_name
def email_user(self, subject, message, from_email=None, **kwargs):
"""Sends an email to this User."""
send_mail(subject, message, from_email, [self.email], **kwargs)
def update(self, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)
self.save()
def indexing(self):
obj = UserDoc(
meta={"id": self.id},
email=self.email,
first_name=self.first_name,x
last_name=self.last_name,
date_joined=self.date_joined,
expertise=self.expertise_str,
institution=self.institution,
position=self.position,
name_suggest=self.full_name,
)
obj.save(index="users")
return obj.to_dict(include_meta=True)
Indexing Command:
from django.core.management.base import BaseCommand
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
from app.models import User, Resource
def bulk_indexing(model):
es = Elasticsearch()
bulk(client=es, actions=(m.indexing() for m in model.objects.all().iterator()))
class Command(BaseCommand):
help = 'Index all users'
def handle(self, *args, **options):
bulk_indexing(User)
self.stdout.write(self.style.SUCCESS("Indexed all users."))
The problem seems to be with the iterator. Even more odd, the auto suggest seems to work on my staging site, but no on production. Both sites are using the EXACT same code. I'm at a total loss for words.
production mapping:
{
"users" : {
"mappings" : {
"user" : {
"properties" : {
"date_joined" : {
"type" : "date"
},
"email" : {
"type" : "text"
},
"expertise" : {
"type" : "text"
},
"first_name" : {
"type" : "text"
},
"institution" : {
"type" : "text"
},
"last_name" : {
"type" : "text"
},
"name_suggest" : {
"type" : "completion",
"analyzer" : "standard",
"preserve_separators" : true,
"preserve_position_increments" : true,
"max_input_length" : 50
},
"position" : {
"type" : "text"
}
}
}
}
}
}
staging mapping:
"users" : {
"mappings" : {
"doc" : {
"_all" : {
"enabled" : true
},
"properties" : {
"date_joined" : {
"type" : "date"
},
"email" : {
"type" : "text"
},
"expertise" : {
"type" : "text"
},
"first_name" : {
"type" : "text"
},
"institution" : {
"type" : "text"
},
"last_name" : {
"type" : "text"
},
"name_suggest" : {
"type" : "completion",
"analyzer" : "standard",
"preserve_separators" : true,
"preserve_position_increments" : true,
"max_input_length" : 50
},
"position" : {
"type" : "text"
}
}
},
"user" : {
"_all" : {
"enabled" : true
},
"properties" : {
"date_joined" : {
"type" : "date"
},
"email" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"expertise" : {
"type" : "text"
},
"first_name" : {
"type" : "text"
},
"institution" : {
"type" : "text"
},
"last_name" : {
"type" : "text"
},
"name_suggest" : {
"type" : "completion",
"analyzer" : "standard",
"preserve_separators" : true,
"preserve_position_increments" : true,
"max_input_length" : 50
},
"position" : {
"type" : "text"
}
}
}
}
}
new error:
Traceback (most recent call last):
File "manage.py", line 22, in <module>
execute_from_command_line(sys.argv)
File "/home/ubuntu/local/lib/python2.7/site-packages/django/core/management/__init__.py", line 363, in execute_from_command_line
utility.execute()
File "/home/ubuntu/local/lib/python2.7/site-packages/django/core/management/__init__.py", line 355, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/home/ubuntu/local/lib/python2.7/site-packages/django/core/management/base.py", line 283, in run_from_argv
self.execute(*args, **cmd_options)
File "/home/ubuntu/local/lib/python2.7/site-packages/django/core/management/base.py", line 330, in execute
output = self.handle(*args, **options)
File "/home/ubuntu/sixnexus/app/management/commands/index_users.py", line 17, in handle
bulk_indexing(User)
File "/home/ubuntu/sixnexus/app/management/commands/index_users.py", line 10, in bulk_indexing
bulk(client=es, actions=(m.indexing() for m in model.objects.all().iterator()))
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 257, in bulk
for ok, item in streaming_bulk(client, actions, **kwargs):
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 180, in streaming_bulk
client.transport.serializer):
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 58, in _chunk_actions
for action, data in actions:
File "/home/ubuntu/sixnexus/app/management/commands/index_users.py", line 10, in <genexpr>
bulk(client=es, actions=(m.indexing() for m in model.objects.all().iterator()))
File "/home/ubuntu/sixnexus/app/models.py", line 137, in indexing
obj.save(index="users")
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch_dsl/document.py", line 419, in save
**doc_meta
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/client/utils.py", line 76, in _wrapped
return func(*args, params=params, **kwargs)
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/client/__init__.py", line 300, in index
_make_path(index, doc_type, id), params=params, body=body)
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/transport.py", line 314, in perform_request
status, headers_response, data = connection.perform_request(method, url, params, body, headers=headers, ignore=ignore, timeout=timeout)
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/connection/http_urllib3.py", line 163, in perform_request
self._raise_error(response.status, raw_data)
File "/home/ubuntu/local/lib/python2.7/site-packages/elasticsearch/connection/base.py", line 125, in _raise_error
raise HTTP_EXCEPTIONS.get(status_code, TransportError)(status_code, error_message, additional_info)
elasticsearch.exceptions.RequestError: TransportError(400, u'illegal_argument_exception', u'Rejecting mapping update to [users] as the final mapping would have more than 1 type: [doc, user]')
Upvotes: 2
Views: 2369
Reputation: 1547
Found it. It is a bug reported here: https://github.com/elastic/elasticsearch-dsl-py/issues/793 Seems like your version of the library breaks in ES6 and above.
See, they have updated the line to:
https://github.com/elastic/elasticsearch-dsl-py/blob/75ee4e36027cd64c128094b3ff279ec332e8a784/elasticsearch_dsl/document.py#L426
from what it was originally. It was breaking in its form: return meta['created']
.
Upgrading the library should fix it.
Edit: The new error is because of the fact that multiple type mappings are disallowed in ES6 and above. Note that indices migrated from 5.x with multiple mappings work in ES6. You might want to separate the types into their indices, or whatever suits your needs. This document should help there: https://www.elastic.co/guide/en/elasticsearch/reference/6.x/removal-of-types.html
Upvotes: 1