Reputation: 6809
So far I have tried this with Solr formatted file:
zavesa => Gotove zavese,
zavesa, blago => Blago in dekorativno blago,
zavesa => Dodatki za zavese,
zavesa => Drogi in vodila za zavese
and
zavesa => Gotove zavese, Blago in dekorativno blago, Drogi in vodila za zavese, Dodatki za zavese
But I always get only the results which match "Drogi in vodila za zavese". But if I remove "Drogi in vodila za zavese":
zavesa => Gotove zavese, Blago in dekorativno blago, Dodatki za zavese
then I get only the results for "Blago in dekorativno blago".
I have also tried to use all lowercase:
zavesa => gotove zavese, blago in dekorativno blago, drogi in vodila za zavese, dodatki za zavese
the result is the same.
I want to get results for "Gotove zavese", "Blago in dekorativno blago", "Drogi in vodila za zavese" and "Dodatki za zavese" when someone searches for "zavesa".
Is this possible with Elasticsearch?
My synonym configuration
var indexSettings = new IndexSettings
{
NumberOfReplicas = 0, // If this is set to 1 or more, then the index becomes yellow, because it's running on a single node (development machine).
NumberOfShards = 5
};
indexSettings.Analysis = new Analysis();
indexSettings.Analysis.Analyzers = new Analyzers();
indexSettings.Analysis.TokenFilters = new TokenFilters();
var listOfSynonyms = new[] {
"zavesa => Gotove zavese, Blago in dekorativno blago, Drogi in vodila za zavese, Dodatki za zavese"
};
var customTokenFilterSynonyms = new SynonymTokenFilter
{
Synonyms = listOfSynonyms,
Format = SynonymFormat.Solr,
Tokenizer = "standard"
};
indexSettings.Analysis.TokenFilters.Add("customTokenFilterSynonym", customTokenFilterSynonyms);
CustomAnalyzer customAnalyzer = new CustomAnalyzer
{
Tokenizer = "standard",
Filter = new List<string> { "lowercase", "asciifolding", "customTokenFilterSynonym" }
};
indexSettings.Analysis.Analyzers.Add("customAnalyzerLowercaseSynonymAsciifolding", customAnalyzer);
var indexConfig = new IndexState
{
Settings = indexSettings
};
var request = new IndexExistsRequest(indexName);
var result = ElasticClient.IndexExists(request);
if (!result.Exists)
{
var response = ElasticClient.CreateIndex(indexName, c => c
.InitializeUsing(indexConfig)
.Mappings(m => m
.Map<ChildGroupModel>(mm => mm
.Properties(p => p
.Completion(cp => cp
.Name(elasticsearchModel => elasticsearchModel.TitleAutSuggest)
.Analyzer("customAnalyzerLowercaseSynonymAsciifolding")
.SearchAnalyzer("customAnalyzerLowercaseSynonymAsciifolding")
)
.Completion(cp => cp
.Name(elasticsearchModel => elasticsearchModel.TitleSloSuggest)
.Analyzer("customAnalyzerLowercaseSynonymAsciifolding")
.SearchAnalyzer("customAnalyzerLowercaseSynonymAsciifolding")
)
.Completion(cp => cp
.Name(elasticsearchModel => elasticsearchModel.TitleItaSuggest)
.Analyzer("customAnalyzerLowercaseSynonymAsciifolding")
.SearchAnalyzer("customAnalyzerLowercaseSynonymAsciifolding")
)
.Text(t => t.Name(model => model.TitleAut).Analyzer("customAnalyzerLowercaseSynonymAsciifolding").SearchAnalyzer("customAnalyzerLowercaseSynonymAsciifolding"))
.Text(t => t.Name(model => model.TitleSlo).Analyzer("customAnalyzerLowercaseSynonymAsciifolding").SearchAnalyzer("customAnalyzerLowercaseSynonymAsciifolding"))
.Text(t => t.Name(model => model.TitleIta).Analyzer("customAnalyzerLowercaseSynonymAsciifolding").SearchAnalyzer("customAnalyzerLowercaseSynonymAsciifolding"))
)
)
)
);
}
I'm testing this on the TitleSloSuggest
field.
Model
public class ChildGroupModel
{
[Column("id")]
public int Id { get; set; }
[Column("homepage_groups_id")]
public int GroupId { get; set; }
[Column("title_aut")]
public string TitleAut { get; set; }
public CompletionField TitleAutSuggest
{
get
{
return new CompletionField
{
Input = new[] { TitleAut }
};
}
}
[Column("title_slo")]
public string TitleSlo { get; set; }
public CompletionField TitleSloSuggest
{
get
{
return new CompletionField
{
Input = new[] { TitleSlo }
};
}
}
[Column("title_ita")]
public string TitleIta { get; set; }
public CompletionField TitleItaSuggest
{
get
{
return new CompletionField
{
Input = new[] { TitleIta }
};
}
}
}
These are the index settings:
// 20180601112924
// http://localhost:9200/child_groups_index/_settings
{
"child_groups_index_temp_1": {
"settings": {
"index": {
"number_of_shards": "5",
"provided_name": "child_groups_index_temp_1",
"creation_date": "1527844777425",
"analysis": {
"filter": {
"customTokenFilterSynonym": {
"format": "solr",
"type": "synonym",
"synonyms": [
"zavesa => gotove zavese, blago in dekorativno blago, drogi in vodila za zavese, dodatki za zavese"
],
"tokenizer": "standard"
}
},
"analyzer": {
"customAnalyzerLowercaseSynonymAsciifolding": {
"filter": [
"lowercase",
"asciifolding",
"customTokenFilterSynonym"
],
"type": "custom",
"tokenizer": "standard"
}
}
},
"number_of_replicas": "0",
"uuid": "WsHzMHm-QSKA4Xzxp02ipQ",
"version": {
"created": "6020399"
}
}
}
}
}
Upvotes: 1
Views: 703
Reputation: 6809
I figured out what I was doing wrong. This link helped: https://www.elastic.co/guide/en/elasticsearch/guide/current/multi-word-synonyms.html#_use_simple_contraction_for_phrase_queries
Instead of:
"zavesa => gotove zavese, blago in dekorativno blago, dodatki za zavese, drogi in vodila za zavese"
I needed:
"gotove zavese, blago in dekorativno blago, dodatki za zavese, drogi in vodila za zavese => zavesa"
or:
"gotove zavese => zavesa",
"blago in dekorativno blago => zavesa",
"dodatki za zavese => zavesa",
"drogi in vodila za zavese => zavesa"
Upvotes: 1