pirodex
pirodex

Reputation: 31

using NEST with Elastic Search to creating analyzer mapping to document fields

I am using NEST 0.11.0.0 and sample code blow. This code create index succesfully but my search query doesn't work. I want to be able search with product code with dashes like '14-KP-OM-00368'. What am I missing here?

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using ElasticSearchSample.Data;
using Nest;

namespace ElasticSearchSample
{
    class Program
    {
        static void Main(string[] args)
        {
            const string keyName = "products";
            var typeName = typeof(ProductEntity).ToString();

            var setting = new ConnectionSettings(new Uri("http://localhost:3330/"));
            setting.SetDefaultIndex("products");
            var elasticClient = new ElasticClient(setting);

            var products = GetProducts();

            foreach (var product in products)
            {
                elasticClient.MapFromAttributes<ProductEntity>();
                var settings = new IndexSettings { NumberOfReplicas = 0, NumberOfShards = 5 };

                settings.Analysis.Tokenizers.Add("keyword", new KeywordTokenizer());
                settings.Analysis.Tokenizers.Add("standard", new StandardTokenizer());
                settings.Analysis.TokenFilters.Add("standard", new StandardTokenFilter());
                settings.Analysis.TokenFilters.Add("lowercase", new LowercaseTokenFilter());
                settings.Analysis.TokenFilters.Add("stop", new StopTokenFilter());
                settings.Analysis.TokenFilters.Add("asciifolding", new AsciiFoldingTokenFilter());
                settings.Analysis.TokenFilters.Add("word_delimiter", new WordDelimiterTokenFilter());

                if (!elasticClient.IndexExists(keyName).Exists)
                    elasticClient.CreateIndex(keyName, settings);

                elasticClient.DeleteById(keyName, typeName, product.Identifier.ToString(), new DeleteParameters { Refresh = true });

                elasticClient.Index(product, keyName, typeName, product.Identifier.ToString(), new IndexParameters { Refresh = true });
            }

            var sp1 = Stopwatch.StartNew();

            //Search part 
            var result = elasticClient.Search<ProductEntity>(body => body.Type(typeName)
                                                                            .Fields(f => f.ProductCode).Query(q => q
                                                                            .Term(f => f.ProductCode.ToLower(), "14 kp om 01567", 2.0)));

            // I searched via lower case char without dash, with dash but no luck
            // but I want want to search with dash

            Console.WriteLine("ES Results : {0:#,#} documents in {1:#,#}ms: {2:#,#.##}: docs/ms\n\n", result.Documents.Count(), sp1.ElapsedMilliseconds, result.Documents.Count() / (double)sp1.ElapsedMilliseconds);
            Console.ReadLine();
        }

        public static List<ProductEntity> GetProducts()
        {
            var list = new List<ProductEntity> {
                                                        new ProductEntity {CreatedDate = new DateTime(2013, 6, 25),Identifier = Guid.NewGuid(),IsActive = true,ProductCode = "14-KP-OM-00368", ProductName = "Şemsiye", SortOrder = 1}, 
                                                        new ProductEntity {CreatedDate = new DateTime(2013, 6, 25),Identifier = Guid.NewGuid(),IsActive = true,ProductCode = "14-KP-OM-01567", ProductName = "Çeşme",SortOrder = 2 }
                                                    };
            return list;
        }
    }
}

namespace ElasticSearchSample.Data
{
    [ElasticType(Name = "ElasticSearchSample.Data.ProductEntity", SearchAnalyzer = "full_name", IndexAnalyzer = "partial_name", DateDetection = true, NumericDetection = true, DynamicDateFormats = new[] { "dateOptionalTime", "yyyy-MM-dd HH:mm:ss Z||yyyy-MM-dd Z" })]
    public class ProductEntity
    {
        [ElasticProperty(Type = FieldType.string_type, Index = FieldIndexOption.analyzed)]
        public Guid Identifier { get; set; }

        [ElasticProperty(Type = FieldType.string_type, Index = FieldIndexOption.analyzed)]
        public string ProductCode { get; set; }

        [ElasticProperty(Type = FieldType.string_type, Index = FieldIndexOption.analyzed)]
        public string ProductName { get; set; }

        [ElasticProperty(Type = FieldType.boolean_type, Index = FieldIndexOption.analyzed)]
        public bool IsActive { get; set; }

        [ElasticProperty(Type = FieldType.integer_type, Index = FieldIndexOption.analyzed)]
        public int? SortOrder { get; set; }

        [ElasticProperty(Type = FieldType.date_type, Index = FieldIndexOption.analyzed)]
        public DateTime CreatedDate { get; set; }
    }

}

Upvotes: 3

Views: 13209

Answers (1)

Martijn Laarman
Martijn Laarman

Reputation: 13536

You are creating analyzers but you are never telling elasticsearch what to use for ProductType so that will use the default analyzer which will break up 14-KP-OM-00368 into:

  • 14
  • kp
  • om
  • 00368

If you then issue a term query for 14 kp om 00368 it won't be able to find it as it tries to take it as a single term and the previous list does not contain that.

You want to do the following:

client.CreateIndex(keyName, c => c
    .NumberOfReplicas(0)
    .NumberOfShards(1)
    .Settings(s=>s //just as an example
        .Add("merge.policy.merge_factor","10")
        .Add("search.slowlog.threshold.fetch.warn", "1s")
    )   
    .AddMapping<ProductEntity>(m => m
        //this will explicitly map all the properties it finds
        //so es knows in advance Date's are dates, ints are int's
        //This will also apply ElasticType attributes
        .MapFromAttributes()
        //here we inform es our propertyType field is to be analyzed
        //using the keyword analyzers
        .Properties(props => props
            .String(s => s
                .Name(p => p.PropertyType)
                .IndexAnalyzer("keyword")
            )
        )
    )
    //just to show you can map more types at once
    .AddMapping<Person>(m => m.MapFromAttributes()) 
);

Now the keyword analyzer is active it wont break up the value of the property and takes it as is, now you will be able to use the term 14-KP-OM-00368.

Also note that you dont need to add the default analyzers to your index like you did in your example, the keyword analyzer is already available.

Upvotes: 4

Related Questions