Reputation: 1594
I am trying to move data from S3 (.csv file's data) to elastic search cluster using logstash using custom templete. But it only shows docs.count=1 and rest of the records as docs.deleted when i check using following query in Kibana:-
GET /_cat/indices?v
My first question is :-
Now when I query this index using below query in Kibana :-
GET /my_file_index/_search
{
"query": {
"match_all": {}
}
}
I get only one record with comma separated data in "message" :
field, So the second question is :-
I tried giving columns field in logstash csv filter also but no luck.
columns => ["col1", "col2",...]
Any help would be appreciated.
EDIT-1: below is my logstash.conf file:-
input {
s3{
access_key_id => "xxx"
secret_access_key => "xxxx"
region => "eu-xxx-1"
bucket => "xxxx"
prefix => "abc/stocks_03-jul-2018.csv"
}
}
filter {
csv {
separator => ","
columns => ["AAA","BBB","CCC"]
}
}
output {
amazon_es {
index => "my_r_index"
document_type => "my_r_index"
hosts => "vpc-totemdev-xxxx.eu-xxx-1.es.amazonaws.com"
region => "eu-xxxx-1"
aws_access_key_id => 'xxxxx'
aws_secret_access_key => 'xxxxxx+xxxxx'
document_id => "%{id}"
template => "templates/template_2.json"
template_name => "my_r_index"
}
}
Note: Version of logstash : 6.3.1 Version of elasticsearch : 6.2
EDIT:-2 Adding template_2.json file along with sample csv header :-
{
"template" : "my_r_index",
"settings" : {
"index" : {
"number_of_shards" : 50,
"number_of_replicas" : 1
},
"index.codec" : "best_compression",
"index.refresh_interval" : "60s"
},
"mappings" : {
"_default_" : {
"_all" : { "enabled" : false },
"properties" : {
"SECURITY" : {
"type" : "keyword"
},
"SERVICEID" : {
"type" : "integer"
},
"MEMBERID" : {
"type" : "integer"
},
"VALUEDATE" : {
"type" : "date"
},
"COUNTRY" : {
"type" : "keyword"
},
"CURRENCY" : {
"type" : "keyword"
},
"ABC" : {
"type" : "integer"
},
"PQR" : {
"type" : "keyword"
},
"KKK" : {
"type" : "keyword"
},
"EXPIRYDATE" : {
"type" : "text",
"index" : "false"
},
"SOMEID" : {
"type" : "double",
"index" : "false"
},
"DDD" : {
"type" : "double",
"index" : "false"
},
"EEE" : {
"type" : "double",
"index" : "false"
},
"FFF" : {
"type" : "double",
"index" : "false"
},
"GGG" : {
"type" : "text",
"index" : "false"
},
"LLL" : {
"type" : "double",
"index" : "false"
},
"MMM" : {
"type" : "double",
"index" : "false"
},
"NNN" : {
"type" : "double",
"index" : "false"
},
"OOO" : {
"type" : "double",
"index" : "false"
},
"PPP" : {
"type" : "text",
"index" : "false"
},
"QQQ" : {
"type" : "integer",
"index" : "false"
},
"RRR" : {
"type" : "double",
"index" : "false"
},
"SSS" : {
"type" : "double",
"index" : "false"
},
"TTT" : {
"type" : "double",
"index" : "false"
},
"UUU" : {
"type" : "double",
"index" : "false"
},
"VVV" : {
"type" : "text",
"index" : "false"
},
"WWW" : {
"type" : "double",
"index" : "false"
},
"XXX" : {
"type" : "double",
"index" : "false"
},
"YYY" : {
"type" : "double",
"index" : "false"
},
"ZZZ" : {
"type" : "double",
"index" : "false"
},
"KNOCKORWARD" : {
"type" : "text",
"index" : "false"
},
"RANGEATSSPUT" : {
"type" : "double",
"index" : "false"
},
"STDATMESSPUT" : {
"type" : "double",
"index" : "false"
},
"CONSENSUPUT" : {
"type" : "double",
"index" : "false"
},
"CLIENTLESSPUT" : {
"type" : "double",
"index" : "false"
},
"KNOCKOUESSPUT" : {
"type" : "text",
"index" : "false"
},
"RANGACTOR" : {
"type" : "double",
"index" : "false"
},
"STDDACTOR" : {
"type" : "double",
"index" : "false"
},
"CONSCTOR" : {
"type" : "double",
"index" : "false"
},
"CLIENTOR" : {
"type" : "double",
"index" : "false"
},
"KNOCKOACTOR" : {
"type" : "text",
"index" : "false"
},
"RANGEPRICE" : {
"type" : "double",
"index" : "false"
},
"STANDARCE" : {
"type" : "double",
"index" : "false"
},
"NUMBERICE" : {
"type" : "integer",
"index" : "false"
},
"CONSECE" : {
"type" : "double",
"index" : "false"
},
"CLIECE" : {
"type" : "double",
"index" : "false"
},
"KNOCICE" : {
"type" : "text",
"index" : "false"
},
"SKEWICE" : {
"type" : "text",
"index" : "false"
},
"WILDISED" : {
"type" : "text",
"index" : "false"
},
"WILDATUS" : {
"type" : "text",
"index" : "false"
},
"RRF" : {
"type" : "double",
"index" : "false"
},
"SRF" : {
"type" : "double",
"index" : "false"
},
"CNRF" : {
"type" : "double",
"index" : "false"
},
"CTRF" : {
"type" : "double",
"index" : "false"
},
"RANADDLE" : {
"type" : "double",
"index" : "false"
},
"STANDANSTRADDLE" : {
"type" : "double",
"index" : "false"
},
"CONSLE" : {
"type" : "double",
"index" : "false"
},
"CLIDLE" : {
"type" : "double",
"index" : "false"
},
"KNOCKOADDLE" : {
"type" : "text",
"index" : "false"
},
"RANGEFM" : {
"type" : "double",
"index" : "false"
},
"SMIUM" : {
"type" : "double",
"index" : "false"
},
"CONIUM" : {
"type" : "double",
"index" : "false"
},
"CLIEEMIUM" : {
"type" : "double",
"index" : "false"
},
"KNOREMIUM" : {
"type" : "text",
"index" : "false"
},
"COT" : {
"type" : "double",
"index" : "false"
},
"CLIEEDSPOT" : {
"type" : "double",
"index" : "false"
},
"IME" : {
"type" : "keyword"
},
"KKE" : {
"type" : "keyword"
}
}
}
}
}
My excel content as:-
Header : Actual header is quite lengthy as have lot many columns, please consider other column names similar to below in continuation.
SECURITY | SERVICEID | MEMBERID | VALUEDATE...
First row : Again column values as below some columns has blank values , I have mentioned above real template file (in mapping file above) which has all column values.
KKK-LMN 2 1815 6/25/2018
PPL-ORL 2 1815 6/25/2018
SLB-ORD 2 1815 6/25/2018
3. Kibana query output
Query :
GET /my_r_index/_search
{
"query": {
"match_all": {}
}
}
Outout:
{
"_index": "my_r_index",
"_type": "my_r_index",
"_id": "IjjIZWUBduulDsi0vYot",
"_score": 1,
"_source": {
"@version": "1",
"message": "XXX-XXX-XXX-USD,2,3190,2018-07-03,UNITED STATES,USD,300,60,Put,2042-12-19,,,,.009108041,q,,,,.269171754,q,,,,,.024127966,q,,,,68.414017367,q,,,,.298398645,q,,,,.502677959,q,,,,,0.040880692400344164,q,,,,,,,159.361792143,,,,.631296636,q,,,,.154877384,q,,42.93,N,Y,\n",
"@timestamp": "2018-08-23T07:56:06.515Z"
}
},
...Other similar records as above.
EDIT-3:
Sample output after using autodetect_column_names => true :-
{
"took": 4,
"timed_out": false,
"_shards": {
"total": 10,
"successful": 10,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_index": "indr",
"_type": "logs",
"_id": "hAF1aWUBS_wbCH7ZG4tW",
"_score": 1,
"_source": {
"2": "2",
"1815": "1815",
"message": """
PPL-ORD-XNYS-USD,2,1815,6/25/2018,UNITED STATES
""",
"SLB-ORD-XNYS-USD": "PPL-ORD-XNYS-USD",
"6/25/2018": "6/25/2018",
"@timestamp": "2018-08-24T01:03:26.436Z",
"UNITED STATES": "UNITED STATES",
"@version": "1"
}
},
{
"_index": "indr",
"_type": "logs",
"_id": "kP11aWUBctDorPcGHICS",
"_score": 1,
"_source": {
"2": "2",
"1815": "1815",
"message": """
SLBUSD,2,1815,4/22/2018,UNITEDSTATES
""",
"SLB-ORD-XNYS-USD": "SLBUSD",
"6/25/2018": "4/22/2018",
"@timestamp": "2018-08-24T01:03:26.436Z",
"UNITED STATES": "UNITEDSTATES",
"@version": "1"
}
},
{
"_index": "indr",
"_type": "logs",
"_id": "j_11aWUBctDorPcGHICS",
"_score": 1,
"_source": {
"2": "SERVICE",
"1815": "CLIENT",
"message": """
UNDERLYING,SERVICE,CLIENT,VALUATIONDATE,COUNTRY
""",
"SLB-ORD-XNYS-USD": "UNDERLYING",
"6/25/2018": "VALUATIONDATE",
"@timestamp": "2018-08-24T01:03:26.411Z",
"UNITED STATES": "COUNTRY",
"@version": "1"
}
}
]
}
}
Upvotes: 0
Views: 887
Reputation: 217584
I'm pretty certain your single document has an id of %{id}
. The first problem comes from the fact that in your CSV file, you are not extracting a column whose name is id
and that's what you're using in document_id => "%{id}"
hence all rows are getting indexed with the id %{id}
and each indexation deletes the previous. At the end, you have a single document which has been indexed as many times as the rows in your CSV.
Regarding the second issue, you need to fix the filter section like below:
filter {
csv {
separator => ","
autodetect_column_names => true
}
date {
match => [ "VALUATIONDATE", "M/dd/yyyy" ]
}
}
Also you need to fix your index template like this (I've only added the format
setting in the VALUATIONDATE
field:
{
"order": 0,
"template": "helloindex",
"settings": {
"index": {
"codec": "best_compression",
"refresh_interval": "60s",
"number_of_shards": "10",
"number_of_replicas": "1"
}
},
"mappings": {
"_default_": {
"_all": {
"enabled": false
},
"properties": {
"UNDERLYING": {
"type": "keyword"
},
"SERVICE": {
"type": "integer"
},
"CLIENT": {
"type": "integer"
},
"VALUATIONDATE": {
"type": "date",
"format": "MM/dd/yyyy"
},
"COUNTRY": {
"type": "keyword"
}
}
}
},
"aliases": {}
}
Upvotes: 1