Reputation: 1149
Logstash version 1.5.0.1
I am trying to use the logstash s3 input plugin to download cloudfront logs and the cloudfront codec plugin to filter the stream.
I installed the cloudfront codec with bin/plugin install logstash-codec-cloudfront
.
I am getting the following: Error: Object: #Version: 1.0 is not a legal argument to this wrapper, cause it doesn't respond to "read".
Here is the full error message from /var/logs/logstash/logstash.log
{:timestamp=>"2015-08-05T13:35:20.809000-0400", :message=>"A plugin had an unrecoverable error. Will restart this plugin.\n Plugin: <LogStash::Inputs::S3 bucket=>\"[BUCKETNAME]\", prefix=>\"cloudfront/\", region=>\"us-east-1\", type=>\"cloudfront\", secret_access_key=>\"[SECRETKEY]/1\", access_key_id=>\"[KEYID]\", sincedb_path=>\"/opt/logstash_input/s3/cloudfront/sincedb\", backup_to_dir=>\"/opt/logstash_input/s3/cloudfront/backup\", temporary_directory=>\"/var/lib/logstash/logstash\">\n Error: Object: #Version: 1.0\n is not a legal argument to this wrapper, cause it doesn't respond to \"read\".", :level=>:error}
My logstash config file: /etc/logstash/conf.d/cloudfront.conf
input {
s3 {
bucket => "[BUCKETNAME]"
delete => false
interval => 60 # seconds
prefix => "cloudfront/"
region => "us-east-1"
type => "cloudfront"
codec => "cloudfront"
secret_access_key => "[SECRETKEY]"
access_key_id => "[KEYID]"
sincedb_path => "/opt/logstash_input/s3/cloudfront/sincedb"
backup_to_dir => "/opt/logstash_input/s3/cloudfront/backup"
use_ssl => true
}
}
I'm using a similar s3 input stream successfully to get my cloudtrail logs into logstash that is based on the Answer from a stackoverflow post.
CloudFront logfile from s3 (I only included the header from the file):
#Version: 1.0
#Fields: date time x-edge-location sc-bytes c-ip cs-method cs(Host) cs-uri-stem sc-status cs(Referer) cs(User-Agent) cs-uri-query cs(Cookie) x-edge-result-type x-edge-request-id x-host-header cs-protocol cs-bytes time-taken x-forwarded-for ssl-protocol ssl-cipher x-edge-response-result-type
The header looks like it is basically the correct format based on lines 26-29 from the cloudfront plugin github repo cloudfront_spec.rb and the official AWS CloudFront Access Logs docs.
Any ideas? Thanks!
[UPDATE 9/23/2015]
Based on this post I tried using the gzip_lines codec plugin, installed with bin/plugin install logstash-codec-gzip_lines
and parse the file with a filter, unfortunately I am getting the exact same error. It looks like it is an issue with the first character of the log file having #
.
For the record, here is the new attempt, including an updated pattern for parsing the cloudfront logfile due to four new fields:
/etc/logstash/conf.d/cloudfront.conf
input {
s3 {
bucket => "[BUCKETNAME]"
delete => false
interval => 60 # seconds
prefix => "cloudfront/"
region => "us-east-1"
type => "cloudfront"
codec => "gzip_lines"
secret_access_key => "[SECRETKEY]"
access_key_id => "[KEYID]"
sincedb_path => "/opt/logstash_input/s3/cloudfront/sincedb"
backup_to_dir => "/opt/logstash_input/s3/cloudfront/backup"
use_ssl => true
}
}
filter {
grok {
type => "cloudfront"
pattern => "%{DATE_EU:date}\t%{TIME:time}\t%{WORD:x_edge_location}\t(?:%{NUMBER:sc_bytes}|-)\t%{IPORHOST:c_ip}\t%{WORD:cs_method}\t%{HOSTNAME:cs_host}\t%{NOTSPACE:cs_uri_stem}\t%{NUMBER:sc_status}\t%{GREEDYDATA:referrer}\t%{GREEDYDATA:User_Agent}\t%{GREEDYDATA:cs_uri_stem}\t%{GREEDYDATA:cookies}\t%{WORD:x_edge_result_type}\t%{NOTSPACE:x_edge_request_id}\t%{HOSTNAME:x_host_header}\t%{URIPROTO:cs_protocol}\t%{INT:cs_bytes}\t%{GREEDYDATA:time_taken}\t%{GREEDYDATA:x_forwarded_for}\t%{GREEDYDATA:ssl_protocol}\t%{GREEDYDATA:ssl_cipher}\t%{GREEDYDATA:x_edge_response_result_type}"
}
mutate {
type => "cloudfront"
add_field => [ "listener_timestamp", "%{date} %{time}" ]
}
date {
type => "cloudfront"
match => [ "listener_timestamp", "yy-MM-dd HH:mm:ss" ]
}
}
Upvotes: 1
Views: 762
Reputation: 1149
FTR here is the full config that is working for me:
input {
s3 {
bucket => "[BUCKET NAME]"
delete => false
interval => 60 # seconds
prefix => "CloudFront/"
region => "us-east-1"
type => "cloudfront"
codec => "plain"
secret_access_key => "[SECRETKEY]"
access_key_id => "[KEYID]"
sincedb_path => "/opt/logstash_input/s3/cloudfront/sincedb"
backup_to_dir => "/opt/logstash_input/s3/cloudfront/backup"
use_ssl => true
}
}
filter {
if [type] == "cloudfront" {
if ( ("#Version: 1.0" in [message]) or ("#Fields: date" in [message])) {
drop {}
}
grok {
match => { "message" => "%{DATE_EU:date}\t%{TIME:time}\t%{WORD:x_edge_location}\t(?:%{NUMBER:sc_bytes}|-)\t%{IPORHOST:c_ip}\t%{WORD:cs_method}\t%{HOSTNAME:cs_host}\t%{NOTSPACE:cs_uri_stem}\t%{NUMBER:sc_status}\t%{GREEDYDATA:referrer}\t%{GREEDYDATA:User_Agent}\t%{GREEDYDATA:cs_uri_stem}\t%{GREEDYDATA:cookies}\t%{WORD:x_edge_result_type}\t%{NOTSPACE:x_edge_request_id}\t%{HOSTNAME:x_host_header}\t%{URIPROTO:cs_protocol}\t%{INT:cs_bytes}\t%{GREEDYDATA:time_taken}\t%{GREEDYDATA:x_forwarded_for}\t%{GREEDYDATA:ssl_protocol}\t%{GREEDYDATA:ssl_cipher}\t%{GREEDYDATA:x_edge_response_result_type}" }
}
mutate {
add_field => [ "received_at", "%{@timestamp}" ]
add_field => [ "listener_timestamp", "%{date} %{time}" ]
}
date {
match => [ "listener_timestamp", "yy-MM-dd HH:mm:ss" ]
}
date {
locale => "en"
timezone => "UCT"
match => [ "listener_timestamp", "yy-MM-dd HH:mm:ss" ]
target => "@timestamp"
add_field => { "debug" => "timestampMatched"}
}
}
}
Upvotes: 0
Reputation: 16
(this question should probably be marked as duplicate, but until then I copy my answer to the same question on ServerFault)
I had the same issue, changing from
codec > "gzip_lines"
to
codec => "plain"
in the input fixed it for me. Looks like S3 input automatically uncompress gzip files. https://github.com/logstash-plugins/logstash-input-s3/blob/master/lib/logstash/inputs/s3.rb#L13
Upvotes: 0