Reputation: 11885
I have an ES 7.8 cluster to store log data, one index for one tenant.
As you can see the default index.number_of_shards is one. (Please ignore the fact I dont have any replicas because the data are just imported)
This looks problematic as all primary shards are located on the same node. How can I assign them evenly on different nodes when creating the index?
Update1:
$ curl -sk 'myhost:19081/_cluster/settings?pretty'
{
"persistent" : { },
"transient" : { }
}
$ $ curl -sk 'myhost:19081/_cluster/allocation/explain?pretty&include_disk_info=true&include_yes_decisions=true'
{
"error" : {
"root_cause" : [
{
"type" : "illegal_argument_exception",
"reason" : "unable to find any unassigned shards to explain [ClusterAllocationExplainRequest[useAnyUnassignedShard=true,includeYesDecisions?=true]"
}
],
"type" : "illegal_argument_exception",
"reason" : "unable to find any unassigned shards to explain [ClusterAllocationExplainRequest[useAnyUnassignedShard=true,includeYesDecisions?=true]"
},
"status" : 400
}
$ curl -sk 'myhost:19081/_cat/nodeattrs?v'
node host ip attr value
node-001 10.96.110.92 10.96.110.92 ml.machine_memory 99750834176
node-001 10.96.110.92 10.96.110.92 ml.max_open_jobs 20
node-001 10.96.110.92 10.96.110.92 xpack.installed true
node-001 10.96.110.92 10.96.110.92 transform.node true
node-004 10.96.108.179 10.96.108.179 ml.machine_memory 99531649024
node-004 10.96.108.179 10.96.108.179 ml.max_open_jobs 20
node-004 10.96.108.179 10.96.108.179 xpack.installed true
node-004 10.96.108.179 10.96.108.179 transform.node true
node-003 10.96.113.19 10.96.113.19 ml.machine_memory 99531649024
node-003 10.96.113.19 10.96.113.19 ml.max_open_jobs 20
node-003 10.96.113.19 10.96.113.19 xpack.installed true
node-003 10.96.113.19 10.96.113.19 transform.node true
node-002 10.96.112.213 10.96.112.213 ml.machine_memory 99531649024
node-002 10.96.112.213 10.96.112.213 ml.max_open_jobs 20
node-002 10.96.112.213 10.96.112.213 xpack.installed true
node-002 10.96.112.213 10.96.112.213 transform.node true
node-005 10.96.101.214 10.96.101.214 ml.machine_memory 99516563456
node-005 10.96.101.214 10.96.101.214 ml.max_open_jobs 20
node-005 10.96.101.214 10.96.101.214 xpack.installed true
node-005 10.96.101.214 10.96.101.214 transform.node true
$ curl -sk 'myhost:19081/_all/_settings?include_defaults&filter_path=**.allocation&pretty'
{
// with several hundreds other identical results of indices
"my_index_1" : {
"defaults" : {
"index" : {
"routing" : {
"allocation" : {
"enable" : "all",
"total_shards_per_node" : "-1"
}
},
"allocation" : {
"max_retries" : "5",
"existing_shards_allocator" : "gateway_allocator"
}
}
}
}
}
Update2:
curl -sk -HContent-Type:application/json -d ' {"index": "my_index_1", "shard": 0, "primary": true }' 'myhost:19081/_cluster/allocation/explain?pretty&include_disk_info=true&include_yes_decisions=true'
{
"index" : "my_index_1",
"shard" : 0,
"primary" : true,
"current_state" : "started",
"current_node" : {
"id" : "CNyCF4_eTmCQYXh_Bhb0KQ",
"name" : "node004",
"transport_address" : "10.96.108.179:9300",
"attributes" : {
"ml.machine_memory" : "99531649024",
"ml.max_open_jobs" : "20",
"xpack.installed" : "true",
"transform.node" : "true"
},
"weight_ranking" : 1
},
"cluster_info" : {
"nodes" : {
"CNyCF4_eTmCQYXh_Bhb0KQ" : {
"node_name" : "node004",
"least_available" : {
"path" : "/data3/nodes/0",
"total_bytes" : 15999772393472,
"used_bytes" : 23527976960,
"free_bytes" : 15976244416512,
"free_disk_percent" : 99.9,
"used_disk_percent" : 0.1
},
"most_available" : {
"path" : "/data2/nodes/0",
"total_bytes" : 15999772393472,
"used_bytes" : 19824119808,
"free_bytes" : 15979948273664,
"free_disk_percent" : 99.9,
"used_disk_percent" : 0.1
}
},
"xiR8clLRSVirvkmlyDpgXg" : {
"node_name" : "node001",
"least_available" : {
"path" : "/data1/nodes/0",
"total_bytes" : 15999896125440,
"used_bytes" : 2815332352,
"free_bytes" : 15997080793088,
"free_disk_percent" : 100.0,
"used_disk_percent" : 0.0
},
"most_available" : {
"path" : "/data3/nodes/0",
"total_bytes" : 15999896125440,
"used_bytes" : 278740992,
"free_bytes" : 15999617384448,
"free_disk_percent" : 100.0,
"used_disk_percent" : 0.0
}
},
"afbAZaznQwaRtryF7yI4dA" : {
"node_name" : "node003",
"least_available" : {
"path" : "/data1/nodes/0",
"total_bytes" : 15999836385280,
"used_bytes" : 34533376,
"free_bytes" : 15999801851904,
"free_disk_percent" : 100.0,
"used_disk_percent" : 0.0
},
"most_available" : {
"path" : "/data1/nodes/0",
"total_bytes" : 15999836385280,
"used_bytes" : 34533376,
"free_bytes" : 15999801851904,
"free_disk_percent" : 100.0,
"used_disk_percent" : 0.0
}
},
"vhFAg67YSgquqP8tR-s98w" : {
"node_name" : "node002",
"least_available" : {
"path" : "/data1/nodes/0",
"total_bytes" : 15999836385280,
"used_bytes" : 34537472,
"free_bytes" : 15999801847808,
"free_disk_percent" : 100.0,
"used_disk_percent" : 0.0
},
"most_available" : {
"path" : "/data1/nodes/0",
"total_bytes" : 15999836385280,
"used_bytes" : 34537472,
"free_bytes" : 15999801847808,
"free_disk_percent" : 100.0,
"used_disk_percent" : 0.0
}
},
"KL8hcVTJTBmN9MTa3fX8eQ" : {
"node_name" : "node005",
"least_available" : {
"path" : "/data1/nodes/0",
"total_bytes" : 15999772393472,
"used_bytes" : 34983936,
"free_bytes" : 15999737409536,
"free_disk_percent" : 100.0,
"used_disk_percent" : 0.0
},
"most_available" : {
"path" : "/data1/nodes/0",
"total_bytes" : 15999772393472,
"used_bytes" : 34983936,
"free_bytes" : 15999737409536,
"free_disk_percent" : 100.0,
"used_disk_percent" : 0.0
}
}
},
"shard_sizes" : {
"[my_index_1][0][p]_bytes" : 2120083,
// redact several hundreds others
},
"shard_paths" : {
"[my_index_1][0], node[CNyCF4_eTmCQYXh_Bhb0KQ], [P], s[STARTED], a[id=dqceFOaFT0ugDALnFEJWvg]" : "/data2/nodes/0",
// redact several hundreds others
}
},
"can_remain_on_current_node" : "yes",
"can_rebalance_cluster" : "yes",
"can_rebalance_to_other_node" : "no",
"rebalance_explanation" : "cannot rebalance as no target node exists that can both allocate this shard and improve the cluster balance",
"node_allocation_decisions" : [
{
"node_id" : "KL8hcVTJTBmN9MTa3fX8eQ",
"node_name" : "node005",
"transport_address" : "10.96.101.214:9300",
"node_attributes" : {
"ml.machine_memory" : "99516563456",
"ml.max_open_jobs" : "20",
"xpack.installed" : "true",
"transform.node" : "true"
},
"node_decision" : "worse_balance",
"weight_ranking" : 1,
"deciders" : [
{
"decider" : "max_retry",
"decision" : "YES",
"explanation" : "shard has no previous failures"
},
{
"decider" : "replica_after_primary_active",
"decision" : "YES",
"explanation" : "shard is primary and can be allocated"
},
{
"decider" : "enable",
"decision" : "YES",
"explanation" : "all allocations are allowed"
},
{
"decider" : "node_version",
"decision" : "YES",
"explanation" : "can relocate primary shard from a node with version [7.8.0] to a node with equal-or-newer version [7.8.0]"
},
{
"decider" : "snapshot_in_progress",
"decision" : "YES",
"explanation" : "no snapshots are currently running"
},
{
"decider" : "restore_in_progress",
"decision" : "YES",
"explanation" : "ignored as shard is not being recovered from a snapshot"
},
{
"decider" : "filter",
"decision" : "YES",
"explanation" : "node passes include/exclude/require filters"
},
{
"decider" : "same_shard",
"decision" : "YES",
"explanation" : "this node does not hold a copy of this shard"
},
{
"decider" : "disk_threshold",
"decision" : "YES",
"explanation" : "enough disk for shard on node, free: [14.5tb], shard size: [2mb], free after allocating shard: [14.5tb]"
},
{
"decider" : "throttling",
"decision" : "YES",
"explanation" : "below shard recovery limit of outgoing: [0 < 2] incoming: [0 < 2]"
},
{
"decider" : "shards_limit",
"decision" : "YES",
"explanation" : "total shard limits are disabled: [index: -1, cluster: -1] <= 0"
},
{
"decider" : "awareness",
"decision" : "YES",
"explanation" : "allocation awareness is not enabled, set cluster setting [cluster.routing.allocation.awareness.attributes] to enable it"
}
]
},
{
"node_id" : "afbAZaznQwaRtryF7yI4dA",
"node_name" : "node003",
"transport_address" : "10.96.113.19:9300",
"node_attributes" : {
"ml.machine_memory" : "99531649024",
"ml.max_open_jobs" : "20",
"xpack.installed" : "true",
"transform.node" : "true"
},
"node_decision" : "worse_balance",
"weight_ranking" : 1,
"deciders" : [
{
"decider" : "max_retry",
"decision" : "YES",
"explanation" : "shard has no previous failures"
},
{
"decider" : "replica_after_primary_active",
"decision" : "YES",
"explanation" : "shard is primary and can be allocated"
},
{
"decider" : "enable",
"decision" : "YES",
"explanation" : "all allocations are allowed"
},
{
"decider" : "node_version",
"decision" : "YES",
"explanation" : "can relocate primary shard from a node with version [7.8.0] to a node with equal-or-newer version [7.8.0]"
},
{
"decider" : "snapshot_in_progress",
"decision" : "YES",
"explanation" : "no snapshots are currently running"
},
{
"decider" : "restore_in_progress",
"decision" : "YES",
"explanation" : "ignored as shard is not being recovered from a snapshot"
},
{
"decider" : "filter",
"decision" : "YES",
"explanation" : "node passes include/exclude/require filters"
},
{
"decider" : "same_shard",
"decision" : "YES",
"explanation" : "this node does not hold a copy of this shard"
},
{
"decider" : "disk_threshold",
"decision" : "YES",
"explanation" : "enough disk for shard on node, free: [14.5tb], shard size: [2mb], free after allocating shard: [14.5tb]"
},
{
"decider" : "throttling",
"decision" : "YES",
"explanation" : "below shard recovery limit of outgoing: [0 < 2] incoming: [0 < 2]"
},
{
"decider" : "shards_limit",
"decision" : "YES",
"explanation" : "total shard limits are disabled: [index: -1, cluster: -1] <= 0"
},
{
"decider" : "awareness",
"decision" : "YES",
"explanation" : "allocation awareness is not enabled, set cluster setting [cluster.routing.allocation.awareness.attributes] to enable it"
}
]
},
{
"node_id" : "vhFAg67YSgquqP8tR-s98w",
"node_name" : "node002",
"transport_address" : "10.96.112.213:9300",
"node_attributes" : {
"ml.machine_memory" : "99531649024",
"ml.max_open_jobs" : "20",
"xpack.installed" : "true",
"transform.node" : "true"
},
"node_decision" : "worse_balance",
"weight_ranking" : 1,
"deciders" : [
{
"decider" : "max_retry",
"decision" : "YES",
"explanation" : "shard has no previous failures"
},
{
"decider" : "replica_after_primary_active",
"decision" : "YES",
"explanation" : "shard is primary and can be allocated"
},
{
"decider" : "enable",
"decision" : "YES",
"explanation" : "all allocations are allowed"
},
{
"decider" : "node_version",
"decision" : "YES",
"explanation" : "can relocate primary shard from a node with version [7.8.0] to a node with equal-or-newer version [7.8.0]"
},
{
"decider" : "snapshot_in_progress",
"decision" : "YES",
"explanation" : "no snapshots are currently running"
},
{
"decider" : "restore_in_progress",
"decision" : "YES",
"explanation" : "ignored as shard is not being recovered from a snapshot"
},
{
"decider" : "filter",
"decision" : "YES",
"explanation" : "node passes include/exclude/require filters"
},
{
"decider" : "same_shard",
"decision" : "YES",
"explanation" : "this node does not hold a copy of this shard"
},
{
"decider" : "disk_threshold",
"decision" : "YES",
"explanation" : "enough disk for shard on node, free: [14.5tb], shard size: [2mb], free after allocating shard: [14.5tb]"
},
{
"decider" : "throttling",
"decision" : "YES",
"explanation" : "below shard recovery limit of outgoing: [0 < 2] incoming: [0 < 2]"
},
{
"decider" : "shards_limit",
"decision" : "YES",
"explanation" : "total shard limits are disabled: [index: -1, cluster: -1] <= 0"
},
{
"decider" : "awareness",
"decision" : "YES",
"explanation" : "allocation awareness is not enabled, set cluster setting [cluster.routing.allocation.awareness.attributes] to enable it"
}
]
},
{
"node_id" : "xiR8clLRSVirvkmlyDpgXg",
"node_name" : "node001",
"transport_address" : "10.96.110.92:9300",
"node_attributes" : {
"ml.machine_memory" : "99750834176",
"ml.max_open_jobs" : "20",
"xpack.installed" : "true",
"transform.node" : "true"
},
"node_decision" : "worse_balance",
"weight_ranking" : 1,
"deciders" : [
{
"decider" : "max_retry",
"decision" : "YES",
"explanation" : "shard has no previous failures"
},
{
"decider" : "replica_after_primary_active",
"decision" : "YES",
"explanation" : "shard is primary and can be allocated"
},
{
"decider" : "enable",
"decision" : "YES",
"explanation" : "all allocations are allowed"
},
{
"decider" : "node_version",
"decision" : "YES",
"explanation" : "can relocate primary shard from a node with version [7.8.0] to a node with equal-or-newer version [7.8.0]"
},
{
"decider" : "snapshot_in_progress",
"decision" : "YES",
"explanation" : "no snapshots are currently running"
},
{
"decider" : "restore_in_progress",
"decision" : "YES",
"explanation" : "ignored as shard is not being recovered from a snapshot"
},
{
"decider" : "filter",
"decision" : "YES",
"explanation" : "node passes include/exclude/require filters"
},
{
"decider" : "same_shard",
"decision" : "YES",
"explanation" : "this node does not hold a copy of this shard"
},
{
"decider" : "disk_threshold",
"decision" : "YES",
"explanation" : "enough disk for shard on node, free: [14.5tb], shard size: [2mb], free after allocating shard: [14.5tb]"
},
{
"decider" : "throttling",
"decision" : "YES",
"explanation" : "below shard recovery limit of outgoing: [0 < 2] incoming: [0 < 2]"
},
{
"decider" : "shards_limit",
"decision" : "YES",
"explanation" : "total shard limits are disabled: [index: -1, cluster: -1] <= 0"
},
{
"decider" : "awareness",
"decision" : "YES",
"explanation" : "allocation awareness is not enabled, set cluster setting [cluster.routing.allocation.awareness.attributes] to enable it"
}
]
}
]
}
Upvotes: 1
Views: 692
Reputation: 11885
As it turns out my cluster has a cluster.routing.allocation.balance.shard
of zero.
Solved this by
PUT /_cluster/settings
{
"persistent" : {
"cluster.routing.allocation.balance.shard" : "0.45"
}
}
Upvotes: 0
Reputation: 347
Elasticsearch automatically take cares of allocating shards to different node. Try to rebalance cluster that may fix the problem https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-cluster.html#:~:text=Elasticsearch%20runs%20an%20automatic%20process,from%20completely%20balancing%20the%20cluster.
Upvotes: 0
Reputation: 32386
Elasticsearch by default tries to spread the shards evenly on all the data-nodes, in your case its really strange why all the shards all on the same data-node.
You should debug the cause of it, hopefully you don't have a single data-node in your cluster, please provide your cluster settings in order to get more information on your cluster settings and setup.
Also provide the output of Shard allocation explain API.
For time being, You can manually move these shards on other data-nodes by using the cluster reroute API
Upvotes: 1