Reputation: 143
I am new to ELK and I am trying to load a locally stored .csv file through Logstash so that I can use it with Elasticsearch.
The logstash config file looks likes this:
input {
file {
path => "C:\ELK-Stack\Cars Data Set\cars.csv"
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
csv {
separator =>","
columns => ["maker","model","mileage","manufacture-year","engine_displacement","engine_power","body_type", "color_slug","stk_year","transmission","door_count","seat_count","fuel_type","date_created","date_last_seen", "price_eur"]
}
mutate {convert => ["mileage", "integer"]}
mutate {convert => ["price_eur", "float"]}
mutate {convert => ["door_count", "integer"]}
mutate {convert => ["engine_power", "integer"]}
mutate {convert => ["seat_count", "integer"]}
}
output {
elasticsearch {
hosts => ["localhost:9200"]}
index => "cars"
document_type => "sold_cars"
}
stdout {}
}
And the path of the file is: C:\ELK-Stack\Cars Data Set\cars.csv
I get an output that looks like this:
The .csv file has well over a million rows. Any help would be appreciated.
EDIT:
Now I am working on another dataset and unable to load it through logstash.
input {
file {
path => "C:\ELK-Stack\311.csv"
start_position => "beginning"
sincedb_path => "NUL"
}
}
filter {
csv {
separator =>","
columns => ["Unique Key","Created Date","Closed Date","Agency","Agency Name","Complaint Type","Descriptor", "Location Type","Incident Zip","Incident Address","Street Name","Cross Street 1","Cross Street 2","Intersection Street 1","Intersection Street 2", "Address Type", "City", "Landmark", "Facility Type", "Status", "Due Date", "Resolution Description", "Resolution Action Updated Date", "Community Board", "BBL", "Borough", "X Coordinate (State Plane)", "Y Coordinate (State Plane)", "Open Data Channel Type", "Park Facility Name", "Park Borough", "Vehicle Type", "Taxi Company Borough", "Taxi Pick Up Location", "Bridge Highway Name", "Bridge Highway Segment", "Latitude", "Longitude", "Location"]
}
mutate {convert => ["Unique Key", "integer"]}
mutate {convert => ["Created Date", "timestamp"]}
mutate {convert => ["Closed Date", "timestamp"]}
mutate {convert => ["Due Date", "timestamp"]}
mutate {convert => ["Resolution Action Updated Date", "timestamp"]}
mutate {convert => ["X Coordinate (State Plane)", "integer"]}
mutate {convert => ["X Coordinate (State Plane)", "integer"]}
mutate {convert => ["Latitude", "integer"]}
mutate {convert => ["Longitude", "integer"]}
mutate {convert => ["Location", "integer"]}
}
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "311"
}
stdout {}
}
Any ideas what could be wrong?
Upvotes: 0
Views: 153
Reputation: 7473
You have two errors in your configuration, the first one is a typo in your output block, a closing curly bracket in the hosts
line, this is described in the error log.
exception => "LogStash:ConfigurationError"
The wrong line is this one: hosts => ["localhost:9200"]}
This is the fixed configuration
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "cars"
}
stdout {}
}
And since you are running Logstash 7.5, The document_type
option was removed from version 7.0.
The second error is in your input block, you should use forward slashes even when running windows, but the sincedb_path
directing to /dev/null/
is a Linux/macOS configuration, on Windows you should use NUL
.
This is the correct configuration
input {
file {
path => "C:/ELK-Stack/Cars Data Set/cars.csv"
start_position => "beginning"
sincedb_path => "NUL"
}
}
Upvotes: 2