Reputation: 907
I have Logs stored in ElasticSearch and a Windows Application using C# and Nest which is executing searches against the ElasticSearch. The mapping in ElasticSearch is shown below:
"mappings": {
"qns": {
"properties": {
"@timestamp": {
"format": "strict_date_optional_time||epoch_millis",
"type": "date"
},
"Error_Description": {
"index": "not_analyzed",
"type": "string"
},
"Thread_Id": {
"index": "not_analyzed",
"type": "string"
},
"Error_Description_Analyzed": {
"type": "string"
},
"Error_Source": {
"index": "not_analyzed",
"type": "string"
},
"record": {
"type": "string"
},
"@version": {
"type": "string"
},
"Log_Level": {
"type": "string"
},
"Record": {
"type": "string"
},
"id": {
"type": "long"
},
"Error_Source_Analyzed": {
"type": "string"
},
"Timestamp": {
"format": "strict_date_optional_time||epoch_millis",
"type": "date"
}
}
}
}
The corresponding C# class is as follows:
[ElasticsearchType(IdProperty = "Id", Name = "qns")]
public class QNS
{
[Number(NumberType.Long, Name = "id")]
public long Id { get; set; }
[Date(Name = "Timestamp")]
public DateTime Timestamp { get; set; }
[String(Name = "Error_Description", Index = FieldIndexOption.NotAnalyzed)]
public string ErrorDescriptionKeyword { get; set; }
[String(Name = "Error_Description_Analyzed")]
public string ErrorDescriptionAnalyzed { get; set; }
[String(Name = "Error_Source", Index = FieldIndexOption.NotAnalyzed)]
public string ErrorSourceKeyword { get; set; }
[String(Name = "Error_Source_Analyzed")]
public string ErrorSourceAnalyzed { get; set; }
[String(Name = "Thread_Id", Index = FieldIndexOption.NotAnalyzed)]
public string ThreadId { get; set; }
[String(Name = "Log_Level")]
public string LogLevel { get; set; }
[String(Index = FieldIndexOption.NotAnalyzed)]
public string Record { get; set; }
}
I need a way to search for distinct error records that falls within a datetime range and matches a certain range of patterns. While I am able to get the result, but I am also getting all the documents that satisfy the search while I only need the distinct error strings. For the Distinct query I am using FluentNest(https://github.com/hoonzis/fluentnest). The code for retrieving the results is as follows:
private List<string> FindDistinctErrorsByPatternAndTimeRangeInternal(DateTime fromDateTime, DateTime toDateTime, List<pattern> patterns, string indexName, string type)
{
var documents = new List<QNS>();
var fromTime = fromDateTime.ToString(Constants.IndexSearch.ES_DATETIME_FORMAT);
var toTime = toDateTime.ToString(Constants.IndexSearch.ES_DATETIME_FORMAT);
var patternQueries = new List<QueryContainer>();
foreach (var p in patterns)
{
var pType = PatternType.unknown;
if (Enum.TryParse<PatternType>(p.Pattern_Type.ToLowerInvariant(), out pType))
{
switch (pType)
{
case PatternType.word:
patternQueries.Add(Query<QNS>.Regexp(r =>
r.Field(f =>
f.ErrorDescriptionAnalyzed)
.Value(p.Pattern_Description)
)
);
break;
case PatternType.phrase:
patternQueries.Add(Query<QNS>.MatchPhrase(m =>
m.Field(f =>
f.ErrorDescriptionAnalyzed)
.Query(p.Pattern_Description)
)
);
break;
case PatternType.unknown:
default:
break;
}
}
}
var datetimeQuery = Query<QNS>.QueryString(q =>
q.DefaultField(f =>
f.Timestamp).Query($"[{fromTime} TO {toTime}]")
);
var searchResults = client.Search<QNS>(s => s.Index(indexName)
.Type(type)
.Query(q =>
q.Filtered(f =>
f.Filter(fq =>
fq.Bool(b =>
b.MinimumShouldMatch(1).Should(patternQueries.ToArray())
)
)
.Query(qd =>
qd.Bool(b =>
b.Must(datetimeQuery)
)
)
)
)
.Sort(sort => sort.Ascending(SortSpecialField.DocumentIndexOrder))
.Aggregations(agg => agg.DistinctBy(q => q.ErrorDescriptionKeyword)));
var results = searchResults.Aggs.AsContainer<QNS>().GetDistinct(d => d.ErrorDescriptionKeyword);
return results.ToList();
}
I need to modify this code to only return the distinct error strings and not the entire result set. The number of hits from the query is around 3500 and only 2 distinct error strings are present. So it does not make sense to get all those records back as I am not going to use it. Can someone help me get to the right aggregation query using the date range and pattern regex/phrase match to only return the distinct error records using Nest or Nest/FluentNest.
Upvotes: 0
Views: 1042
Reputation: 2561
I think you are looking for the terms aggregation.
But you whole query is a bit strange. Do you have some legacy requirements.
First thing you have two fields ErrorDescriptionAnalyzed and ErrorDescriptionKeyword are you making a different field just to have one Analyzed and one not? Why don't you use multi-fields
Second Filtered method has obsolete for some time.
Here is a quick sample that I hope will help
ElasticClient db = new ElasticClient(uri);
db.DeleteIndex(indexName);
var mappings = new CreateIndexDescriptor(indexName).Mappings(ms => ms.Map<A>(map => map.
AutoMap().
Properties(props =>
props.String(p =>
p.Name(a => a.Text).
Fields(fields =>
fields.String(pr => pr.Name("raw").NotAnalyzed()))))));
db.CreateIndex(mappings);
foreach (var item in Enumerable.Range(0, 10).Select(i => new A
{
Price1 = random.NextDouble() * 1000,
Date = i % 3 == 0 ? new DateTime(1900, 1, 1) : DateTime.Now,
Text = i % 2 == 0 ? "ABC" : "EFG"
}))
{
db.Index(item, inx => inx.Index(indexName));
}
var toDate = DateTime.Now + TimeSpan.FromDays(1);
var fromDate = DateTime.Now - TimeSpan.FromDays(30);
var data = db.Search<A>(s =>
s.Index(indexName)
.Query(q=>
q.DateRange(r => r.Field(f => f.Date).GreaterThan(fromDate).LessThanOrEquals(toDate))
&&
(
//term query is for finding words by default all words are lowercase but you can set a different analyzer
q.Term(t => t.Field(f => f.Text).Value("ABC".ToLower()))
||
//Raw field is not analysed so no need to lower case you can use you query here if you want
q.Term(t => t.Field("text.raw").Value("EFG"))
)
).Aggregations(aggr => aggr.Terms("distinct", aterm => aterm.Field("text.raw"))));
Upvotes: 0