cidthecoatrack
cidthecoatrack

Reputation: 1441

Nest for ElasticSearch is unusably slow

We recently updated from ElasticSearch 1.5 to 5.6, resolving a number of things such as field explosion and other issues. However, now that we are on the newer version, we are seeing unacceptable (and frankly ridiculous) performance concerns.

Hits | Took | Nest 1.5 | Nest 5.6
---------------------------------
0    | 1ms  | 100ms    | 1190ms
1    | 1ms  | 100ms    | 720ms
2    | 4ms  | 100ms    | 350ms
42   | 10ms | 1100ms   | 3270ms
63   | 9ms  | 1700ms   | 4700ms
100  | 25ms | 2800ms   | 7400ms

We have a static Nest client using a SingleNodeConnectionPool. The queries are very simple, and we are paging larger results (usually no more than 100). When we were on 1.5, all of these queries came back within 3 seconds. Why are the Nest requests now 3 to 4 times slower than they were before?

Build Index

PatternAnalyzer alphanumericAnalyzer = new PatternAnalyzer();
alphanumericAnalyzer.Lowercase = true;
alphanumericAnalyzer.Pattern = "[^a-zA-Z0-9áéíñóúüÁÉÍÑÓÚÜàâäôéèëêïîçùûüÿæœÀÂÄÔÉÈËÊÏΟÇÙÛÜÆŒäöüßÄÖÜẞàèéìíîòóùúÀÈÉÌÍÎÒÓÙÚ]";

CustomAnalyzer lowercaseKeywordAnalyzer = new CustomAnalyzer();
lowercaseKeywordAnalyzer.Tokenizer = "keyword";
lowercaseKeywordAnalyzer.Filter = new List<string>() { "lowercase" };

IndexSettings indexSettings = new IndexSettings();

indexSettings.NumberOfReplicas = NestClient.Config.Replicas;
indexSettings.NumberOfShards = NestClient.Config.Shards;

indexSettings.Analysis = new Analysis();
indexSettings.Analysis.Analyzers = new Analyzers();
indexSettings.Analysis.Tokenizers = new Tokenizers();

indexSettings.Analysis.Analyzers.Add("alphanumeric_analyzer", alphanumericAnalyzer);
indexSettings.Analysis.Analyzers.Add("keyword_analyzer", lowercaseKeywordAnalyzer);

indexSettings.Analysis.Tokenizers.Add("ngrams_tokenizer", new EdgeNGramTokenizer()
{
    MaxGram = NestClient.Config.MaxGram,
    MinGram = NestClient.Config.MinGram,
    TokenChars = new List<TokenChar>()
    {
        TokenChar.Letter,
        TokenChar.Digit
    }
});

indexSettings.Analysis.Analyzers.Add("ngrams_analyzer", new CustomAnalyzer()
{
    Filter = new List<string>()
    {
        "lowercase"
    },
    Tokenizer = "ngrams_tokenizer"
});

var createResponse = Client.CreateIndex(new CreateIndexRequest(IndexName)
{
    Settings = indexSettings
}).Log(isIndexRebuild: true);

var mapResult = Client.Map<SearchAsset>(m => m
    .AllField(x => x.Enabled(false))
    .AutoMap()
).Log(isIndexRebuild: true);

Search Asset

[ElasticsearchType(IdProperty = "assetID")]
public class SearchAsset
{
    public SearchAsset()
    {
        Extensions = new List<string>();
        Metadata = new List<MetadataValue>();
        Notes_Alphanumeric = new List<string>();
        Notes_Ngrams = new List<string>();
        UserFlags = new List<long>();
        AssetTypes = new List<string>();
    }

    private string filename;

    public long AssetID { get; set; }
    public long JobID { get; set; }
    public long JobFolderID { get; set; }
    public int Width { get; set; }
    public int Height { get; set; }

    [Text(Analyzer = "keyword_analyzer", Fielddata = true)]
    public string StorageFolderPath { get; set; }

    public bool Selected { get; set; }
    public long? SelectUserID { get; set; }
    public DateTime? SelectDateTime { get; set; }

    [Text(Analyzer = "keyword_analyzer", Fielddata = true)]
    public string JobFolderName { get; set; }

    [Text(Analyzer = "keyword_analyzer", Fielddata = true)]
    public string Filename
    {
        get { return filename; }
        set { filename = Filename_Alphanumeric = Filename_Ngrams = value; }
    }

    [Text(Analyzer = "alphanumeric_analyzer", SearchAnalyzer = "alphanumeric_analyzer")]
    public string Filename_Alphanumeric { get; private set; }

    [Text(Analyzer = "ngrams_analyzer", SearchAnalyzer = "alphanumeric_analyzer")]
    public string Filename_Ngrams { get; private set; }

    [Text(Analyzer = "keyword_analyzer", Fielddata = true)]
    public string OriginalTypeCd { get; set; }
    public int NoteCount { get; set; }
    public int PageCount { get; set; }

    public long Color { get; set; }
    public bool HasMarkup { get; set; }
    public long Status { get; set; }
    public int TotalGalleryCount { get; set; }
    public int ClosedGalleryCount { get; set; }

    //HACK: We would ideally script these in ES, but Nest/Painless has poor documentation, and we have yet to get something working within that framework.
    //Doing it here actually works, so relying on that instead.
    public bool NoStatus { get { return TotalGalleryCount == 0; } }
    public bool Flagged { get { return UserFlags.Count > 0; } }
    public bool NotPending { get { return TotalGalleryCount > 0 && TotalGalleryCount == ClosedGalleryCount; } }
    public bool Pending { get { return TotalGalleryCount > ClosedGalleryCount; } }
    public bool Notes { get { return NoteCount > 0; } }

    public long ByteCount { get; set; }
    public DateTime AddedOn { get; set; }

    [Object(Ignore = true)]
    public IndexItemType IndexItemType { get; set; }

    [Text(Analyzer = "keyword_analyzer", Fielddata = true)]
    public List<string> Extensions { get; set; }

    [Number]
    public List<long> UserFlags { get; set; }

    [Nested]
    public List<MetadataValue> Metadata { get; private set; }

    [Text]
    public List<string> AssetTypes { get; set; }

    [Text(Analyzer = "ngrams_analyzer", SearchAnalyzer = "alphanumeric_analyzer")]
    public List<string> Notes_Ngrams { get; private set; }

    [Text(Analyzer = "alphanumeric_analyzer", SearchAnalyzer = "alphanumeric_analyzer")]
    public List<string> Notes_Alphanumeric { get; private set; }
}

public class MetadataValue
{
    public long MetadataID { get; set; }
    [Text(Analyzer = "ngrams_analyzer", SearchAnalyzer = "alphanumeric_analyzer")]
    public string Ngrams { get; set; }
    [Text(Analyzer = "alphanumeric_analyzer", SearchAnalyzer = "alphanumeric_analyzer")]
    public string Alphanumeric { get; set; }
    public DateTime Date { get; set; }
}

Nest query code

public SearchResult RunSearch(IUser user, AssetCollection collection, Aggregations aggregations = null, FieldSelectors selectors = null)
{
    var elasticSearchManager = new ElasticSearchManager();

    var query = ElasticSearchHelper.BuildWhereExpression(user, collection);

    var sorts = ElasticSearchHelper.BuildOrderExpression<SearchAsset>(user, collection);

    //We want to specify the SearchAsset type so that we can both specify an index on the request, and also rely on the type mapping in the settings
    var request = new SearchRequest<SearchAsset>(elasticSearchManager.IndexName);
    request.Sort = sorts;
    request.Query = query;
    request.From = collection.FirstIndex;
    request.DocvalueFields = selectors.ElasticSearchFields.ToArray();
    request.Size = collection.LastIndex - collection.FirstIndex;
    request.Aggregations = new AggregationDictionary();

    //INFO: This allows us to log the NEST request body
    request.RequestConfiguration = new RequestConfiguration();
    request.RequestConfiguration.DisableDirectStreaming = true;

    var searchResponse = elasticSearchManager.Client.Search<SearchAsset>(request);
    ESLogger.LogElasticSearchResponse(searchResponse);

    Logger.Instance.LogInfo(new LogMessage(LogMessageAction.Search,
        new SearchContextLogData<SearchAsset>(searchResponse, collection),
        new UserContextLogData(user)
    ));

    SearchResult result = new SearchResult()
    {
        TotalCount = aggregations.CalculateTotalCount ? searchResponse.Total : 0
    };

    foreach (var searchAsset in searchResponse.Hits)
    {
        var asset = selectors.ApplyElasticSearchToAssetSelectors(searchAsset);
        result.Assets.Add(asset);
    }

    return result;
}

ElasticSearchManager

public class ElasticSearchManager
{
    public IElasticClient Client { get; private set; }
    public virtual string IndexName
    {
        get { return NestClient.IndexName; }
    }

    public ElasticSearchManager()
    {
        Client = NestClient.GetClient(null);
    }

    public IEnumerable<string> Tokenize(string field, string input)
    {
        var key = field + "_" + input;
        var tokens = GetCachedTokens(key);

        if (tokens != null)
            return tokens;

        tokens = new List<string>();

        var response = Client.Analyze(x => x.Field(field).Index(IndexName).Text(input)).Log();

        if (response.IsValid)
        {
            foreach (var token in response.Tokens)
            {
                tokens.Add(token.Token);
            }
        }

        CacheTokens(key, tokens);

        return tokens;
    }

    public IEnumerable<string> Tokenize<T>(Expression<Func<T, object>> field, string input)
        where T : class
    {
        var fieldName = field.Body.ToString();
        return Tokenize(fieldName, input);
    }

    private void CacheTokens(string input, IEnumerable<string> tokens)
    {
        if (HttpContext.Current != null)
        {
            HttpContext.Current.Items.Add(input, tokens);
        }
    }

    private List<string> GetCachedTokens(string input)
    {
        if (HttpContext.Current != null)
        {
            return HttpContext.Current.Items[input] as List<string>;
        }

        return null;
    }

    private void LoadMappings()
    {
        var mapResult = Client.Map<SearchAsset>(m => m
            .AllField(x => x.Enabled(false))
            .AutoMap()
        ).Log(isIndexRebuild: true);
    }
}

NestClient

//INFO: This class is a singleton for a reason
//Under the hood, Nest instantiate the Newtonsoft deserializer fresh for each instantiation of the client
//Therefore, if you instantiate the client fresh every request, then your deserializer gets 3x-4x slower
//Preserving the client as a singleton mitigates this cost
public static class NestClient
{
    public static string IndexName { get; private set; }

    public static ElasticSearchConfig Config
    {
        get
        {
            return SettingsManager.ElasticSearchConfig<ElasticSearchConfig>();
        }
    }

    private static object syncRoot = new object();
    private static IElasticClient instance = null;

    public static IElasticClient GetClient(string indexName = null)
    {
        if (instance == null)
        {
            IndexName = indexName;

            //if name != null, it will override the current index name in the db
            //use to create a new index, then update db when index is done building
            if (string.IsNullOrEmpty(indexName))
            {
                IndexName = Config.IndexName;
            }

            var uri = new Uri(Config.Url);
            var pool = new SingleNodeConnectionPool(uri);
            var settings = new ConnectionSettings(pool);
            settings.DefaultIndex(IndexName);

            //INFO: We want the SearchAsset object to be hard-bound to the index
            settings.InferMappingFor<SearchAsset>(m => m.IndexName(IndexName));

            instance = new ElasticClient(settings);
        }

        return instance;
    }
}

Example query output

Here in this gist

Upvotes: 0

Views: 1544

Answers (2)

m.2
m.2

Reputation: 1

As cidthecoatrack mentioned, De/Serialization is the root cause, however, we found that even with sending the naked json directly to the low level feature of Nest and receiving a raw json as well it still consumes double the time - still in milliseconds - if compared with the regular HTTP client so, we switched to use a regular HTTP client instead of Nest for sending our search queries.

And for sure with avoiding the serialization - if applicable - at least, will help a lot.

Hope that it helps.

Upvotes: 0

cidthecoatrack
cidthecoatrack

Reputation: 1441

So it turned out that the worst offender wasn't directly NEST at all, but later code that translated the C# representation of the ElasticSearch documents into our business-level objects.

foreach (var searchAsset in searchResponse.Hits)
{
    var asset = selectors.ApplyElasticSearchToAssetSelectors(searchAsset);
    result.Assets.Add(asset);
}

Our selectors (really translators/adapters) were using reflection to look at the ES documents and turn them into our more permanent objects.

public class FullFieldSelectors : FilenameFieldSelectors
{
    private readonly AssetTypesManager assetTypesManager;
    private readonly long userID;

    public FullFieldSelectors(long userID)
    {
        assetTypesManager = new AssetTypesManager();
        this.userID = userID;
    }

    public override Asset ConvertToAsset(IHit<SearchAsset> hit)
    {
        var asset = base.ConvertToAsset(hit);
        var searchAsset = hit.Source;

        asset.JobID = hit.Fields.Values<SearchAsset, long>(f => f.JobID).FirstOrDefault();
        asset.FolderID = hit.Fields.Values<SearchAsset, long>(f => f.JobFolderID).FirstOrDefault();
        asset.PlusRating = hit.Fields.Values<SearchAsset, long>(f => f.Rating).FirstOrDefault();
        asset.Select = hit.Fields.Values<SearchAsset, bool>(f => f.Selected).FirstOrDefault();
        asset.Alt = hit.Fields.Values<SearchAsset, bool>(f => f.Alted).FirstOrDefault();
        asset.Approve = hit.Fields.Values<SearchAsset, bool>(f => f.Approved).FirstOrDefault();
        asset.Kill = hit.Fields.Values<SearchAsset, bool>(f => f.Killed).FirstOrDefault();
        asset.Flag = ConvertNullEnumerable(hit.Fields.Values<SearchAsset, long>(f => f.UserFlags.Find(u => u == userID))).Contains(userID);
        asset.Color = (AssetColorCd)hit.Fields.Values<SearchAsset, long>(f => f.Color).FirstOrDefault();
        asset.FileExtension = ConvertNullEnumerable(hit.Fields.Values<SearchAsset, string>(f => f.Extensions.FirstOrDefault())).FirstOrDefault();
        asset.OriginalType = assetTypesManager.Restore(ConvertNullEnumerable(hit.Fields.Values<SearchAsset, string>(f => f.OriginalTypeCd)).FirstOrDefault());
        asset.NoteCount = hit.Fields.Values<SearchAsset, int>(f => f.NoteCount).FirstOrDefault();
        asset.Status = (AssetStatus)hit.Fields.Values<SearchAsset, long>(f => f.Status).FirstOrDefault();
        asset.ClosedGalleryCount = hit.Fields.Values<SearchAsset, int>(f => f.ClosedGalleryCount).FirstOrDefault();
        asset.Finalized = hit.Fields.Values<SearchAsset, bool>(f => f.Finalized).FirstOrDefault();
        asset.TotalGalleryCount = hit.Fields.Values<SearchAsset, int>(f => f.TotalGalleryCount).FirstOrDefault();
        asset.Width = hit.Fields.Values<SearchAsset, int>(f => f.Width).FirstOrDefault();
        asset.Height = hit.Fields.Values<SearchAsset, int>(f => f.Height).FirstOrDefault();
        asset.PageCount = hit.Fields.Values<SearchAsset, int>(f => f.PageCount).FirstOrDefault();
        asset.ByteCount = hit.Fields.Values<SearchAsset, long>(f => f.ByteCount).FirstOrDefault();
        asset.HasMarkup = hit.Fields.Values<SearchAsset, bool>(f => f.HasMarkup).FirstOrDefault();
        asset.StorageFolderPath = ConvertNullEnumerable(hit.Fields.Values<SearchAsset, string>(f => f.StorageFolderPath)).FirstOrDefault();
        asset.NewStorageLocation = ConvertNullEnumerable(hit.Fields.Values<SearchAsset, bool>(f => f.NewStorageLocation)).FirstOrDefault();
        asset.Archived = ConvertNullEnumerable(hit.Fields.Values<SearchAsset, bool>(f => f.Archived)).FirstOrDefault();

        if (hit.Source != null && hit.Source.Lightboxes != null && hit.Source.Lightboxes.Count > 0)
        {
            asset.LightboxAsset = new LightboxAsset()
            {
                AddedBy = hit.Source.Lightboxes.First().AddedBy,
                AssetID = asset.ID,
                LightboxID = hit.Source.Lightboxes.First().LightboxID,
                SeqOrder = hit.Source.Lightboxes.First().OrderID
            };
        }

        return asset;
    }
}

When we were on 1.X and had field explosion, this made sense, because we never knew which fields a document would or would not have. Once we got to 5.X, and we had fixed field explosion with sub documents, the translation became much more stable and reliable, so the overhead of reflection was no longer needed.

public class FullFieldSelectors : FilenameFieldSelectors
{
    private readonly AssetTypesManager assetTypesManager;
    private readonly long userID;

    public FullFieldSelectors(long userID)
    {
        assetTypesManager = new AssetTypesManager();
        this.userID = userID;
    }

    public override Asset ConvertToAsset(IHit<SearchAsset> hit)
    {
        var asset = base.ConvertToAsset(hit);
        var searchAsset = hit.Source;

        asset.JobID = searchAsset.JobID;
        asset.FolderID = searchAsset.JobFolderID;
        asset.PlusRating = searchAsset.Rating;
        asset.Select = searchAsset.Selected;
        asset.Alt = searchAsset.Alted;
        asset.Approve = searchAsset.Approved;
        asset.Kill = searchAsset.Killed;
        asset.Flag = searchAsset.UserFlags != null && searchAsset.UserFlags.Contains(userID);

        asset.Color = (AssetColorCd)searchAsset.Color;
        asset.FileExtension = string.Empty;

        if (searchAsset.Extensions != null && searchAsset.Extensions.Any())
            asset.FileExtension = searchAsset.Extensions.First();

        asset.OriginalType = assetTypesManager.Restore(searchAsset.OriginalTypeCd);
        asset.NoteCount = searchAsset.NoteCount;
        asset.Status = (AssetStatus)searchAsset.Status;
        asset.ClosedGalleryCount = searchAsset.ClosedGalleryCount;
        asset.Finalized = searchAsset.Finalized;
        asset.TotalGalleryCount = searchAsset.TotalGalleryCount;
        asset.Width = searchAsset.Width;
        asset.Height = searchAsset.Height;
        asset.PageCount = searchAsset.PageCount;
        asset.ByteCount = searchAsset.ByteCount;
        asset.HasMarkup = searchAsset.HasMarkup;
        asset.StorageFolderPath = searchAsset.StorageFolderPath ?? string.Empty;
        asset.NewStorageLocation = searchAsset.NewStorageLocation;
        asset.Archived = searchAsset.Archived;

        if (searchAsset.Lightboxes != null && searchAsset.Lightboxes.Any())
        {
            var searchLightbox = searchAsset.Lightboxes.First();

            asset.LightboxAsset = new LightboxAsset()
            {
                AddedBy = searchLightbox.AddedBy,
                AssetID = asset.ID,
                LightboxID = searchLightbox.LightboxID,
                SeqOrder = searchLightbox.OrderID
            };
        }

        return asset;
    }
}

This brought our queries to well under 3 seconds total (around 2.3 seconds on average for 100 hits).

Upvotes: 4

Related Questions