Reputation: 529
I did a lot of googeling and checked the documentation for NEST and ElasticSearch but I was not able to find a working example / solve my problem.
I made an example. In this example I want to query the number of distinct Last_Names and the SUM of the Salaries per family.
class Employee
{
public string First_Name { get; set; }
public string Last_Name { get; set; }
public int Salary { get; set; }
public Employee(string first_name, string last_name, int salary)
{
this.First_Name = first_name;
this.Last_Name = last_name;
this.Salary = salary;
}
public Employee() { }
}
private void button4_Click(object sender, EventArgs e)
{
// Create 4 employees
Employee al = new Employee("Al", "Bundy", 1500);
Employee bud = new Employee("Bud", "Bundy", 975);
Employee marcy = new Employee("Marcy", "Darcy", 4500);
Employee jefferson = new Employee("Jefferson", "Darcy", 0);
// add the 4 employees to the index
client.Index<Employee>(al);
client.Index<Employee>(bud);
client.Index<Employee>(marcy);
client.Index<Employee>(jefferson);
// query the index
var result = client.Search<Employee>(s => s
.Aggregations(a => a
.Terms("Families", ts => ts
.Field(o => o.Last_Name)
.Size(10)
.Aggregations(aa => aa
.Sum("FamilySalary", sa => sa
.Field(o => o.Salary)
)
)
)
)
);
// Get the number of different families (Result should be 2: Bundy and Darcy)
// and get the family-salary of family Bundy and the family-salary for the Darcys
var names = result.Aggs.Terms("Families");
// ?? var x = names.Sum("Bundy");
}
I need the following informations from elastic:
* there are two different families in the index
* family Bundy earns 2475
* family Darcy earns 4500
please help
Upvotes: 0
Views: 425
Reputation: 125488
Here's a complete example
private static void Main()
{
var defaultIndex = "employees";
var settings = new ConnectionSettings(new Uri("http://localhost:9200"))
.InferMappingFor<Employee>(i => i
.IndexName(defaultIndex)
)
.DefaultIndex(defaultIndex)
// following settings are useful while developing
// but probably don't want to use them in production
.DisableDirectStreaming()
.PrettyJson()
.OnRequestCompleted(callDetails =>
{
if (callDetails.RequestBodyInBytes != null)
{
Console.WriteLine(
$"{callDetails.HttpMethod} {callDetails.Uri} \n" +
$"{Encoding.UTF8.GetString(callDetails.RequestBodyInBytes)}");
}
else
{
Console.WriteLine($"{callDetails.HttpMethod} {callDetails.Uri}");
}
Console.WriteLine();
if (callDetails.ResponseBodyInBytes != null)
{
Console.WriteLine($"Status: {callDetails.HttpStatusCode}\n" +
$"{Encoding.UTF8.GetString(callDetails.ResponseBodyInBytes)}\n" +
$"{new string('-', 30)}\n");
}
else
{
Console.WriteLine($"Status: {callDetails.HttpStatusCode}\n" +
$"{new string('-', 30)}\n");
}
});
var client = new ElasticClient(settings);
if (client.IndexExists(defaultIndex).Exists)
client.DeleteIndex(defaultIndex);
client.CreateIndex(defaultIndex, c => c
.Settings(s => s
.NumberOfShards(1)
)
.Mappings(m => m
.Map<Employee>(mm => mm
.AutoMap()
)
)
);
// Create 4 employees
var al = new Employee("Al", "Bundy", 1500);
var bud = new Employee("Bud", "Bundy", 975);
var marcy = new Employee("Marcy", "Darcy", 4500);
var jefferson = new Employee("Jefferson", "Darcy", 0);
client.IndexMany(new [] { al, bud, marcy, jefferson });
// refresh the index after indexing. We do this here for example purposes,
// but in a production system, it's preferable to use the refresh interval
// see https://www.elastic.co/blog/refreshing_news
client.Refresh(defaultIndex);
// query the index
var result = client.Search<Employee>(s => s
.Aggregations(a => a
.Terms("Families", ts => ts
.Field(o => o.Last_Name.Suffix("keyword")) // use the keyword sub-field for terms aggregation
.Size(10)
.Aggregations(aa => aa
.Sum("FamilySalary", sa => sa
.Field(o => o.Salary)
)
)
)
)
);
// Get the number of different families (Result should be 2: Bundy and Darcy)
// and get the family-salary of family Bundy and the family-salary for the Darcys
var names = result.Aggs.Terms("Families");
foreach(var name in names.Buckets)
{
var sum = name.Sum("FamilySalary");
Console.WriteLine($"* family {name.Key} earns {sum.Value}");
}
}
public class Employee
{
public string First_Name { get; set; }
public string Last_Name { get; set; }
public int Salary { get; set; }
public Employee(string first_name, string last_name, int salary)
{
this.First_Name = first_name;
this.Last_Name = last_name;
this.Salary = salary;
}
public Employee() { }
}
This outputs
- family Bundy earns 2475
- family Darcy earns 4500
A few points:
refresh
after indexing operations because it causes a Lucene segment to be written in the underlying inverted index. Whilst there is a background process to merge segments, having many small segments can be a problem. It's best to let the refresh interval do its thing. Refresh is called here only to make the documents available to search following indexing.keyword
sub field that is created with automapping, when mapping a string
property. A keyword field datatype indexes the value verbatim and leverages doc values, a columnar data structure that works well for aggregations and sorting.Upvotes: 2