diff --git a/lib/logstash/filters/grok.rb b/lib/logstash/filters/grok.rb index 3ae930c..a8baca3 100644 --- a/lib/logstash/filters/grok.rb +++ b/lib/logstash/filters/grok.rb @@ -2,6 +2,7 @@ require "logstash/filters/base" require "logstash/namespace" require "logstash/environment" + require "logstash/event" require "logstash/patterns/core" require "grok-pure" # rubygem 'jls-grok' require "set" @@ -237,6 +238,11 @@ class LogStash::Filters::Grok < LogStash::Filters::Base # will be parsed and `hello world` will overwrite the original message. config :overwrite, :validate => :array, :default => [] + # If this attribute is set, the output of this filter will be an array + # of objects written to the key supplied in this config value, this is + # useful if your input is an array of messages to match. + config :output_objects, :validate => :string, :default => nil + attr_reader :timeout_enforcer # Register default pattern paths @@ -331,9 +337,11 @@ def match(groks, field, event) @logger.warn("Grok regexp threw exception", :exception => e.message, :backtrace => e.backtrace, :class => e.class.name) return false end - + private def match_against_groks(groks, field, input, event) + target_event = @output_objects ? LogStash::Event.new : event + input = input.to_s matched = false groks.each do |grok| @@ -341,11 +349,18 @@ def match_against_groks(groks, field, input, event) matched = @timeout_enforcer.grok_till_timeout(grok, field, input) if matched - grok.capture(matched) {|field, value| handle(field, value, event)} + grok.capture(matched) {|field, value| handle(field, value, target_event)} break if @break_on_match end end - + + if @output_objects + output_array = event.get(@output_objects) + output_array = [] unless output_array.is_a? Array + output_array << target_event.to_hash + event.set(@output_objects, output_array) + end + matched end diff --git a/spec/filters/grok_spec.rb b/spec/filters/grok_spec.rb index 9f79bad..622ac23 100644 --- a/spec/filters/grok_spec.rb +++ b/spec/filters/grok_spec.rb @@ -44,6 +44,56 @@ def pattern_path(path) end end + describe "build object from message" do + config <<-CONFIG + filter { + grok { + match => { "message" => "%{SYSLOGLINE}" } + output_objects => "syslogs" + } + } + CONFIG + + sample "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]" do + insist { subject.get("syslogs")[0]["tags"].nil? } + insist { subject.get("syslogs")[0]["logsource"] } == "evita" + insist { subject.get("syslogs")[0]["timestamp"] } == "Mar 16 00:01:25" + insist { subject.get("syslogs")[0]["message"] } == "connect from camomile.cloud9.net[168.100.1.3]" + insist { subject.get("syslogs")[0]["program"] } == "postfix/smtpd" + insist { subject.get("syslogs")[0]["pid"] } == "1713" + end + end + + describe "build objects from array of messages" do + config <<-CONFIG + filter { + grok { + match => { "message" => "%{SYSLOGLINE}" } + output_objects => "syslogs" + } + } + CONFIG + + sample("message" => [ + "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]", + "Mar 29 04:20:32 evita postfix/smtpd[1737]: connect from steve.cloud9.net[168.100.1.4]" + ]) do + insist { subject.get("syslogs")[0]["tags"].nil? } + insist { subject.get("syslogs")[0]["logsource"] } == "evita" + insist { subject.get("syslogs")[0]["timestamp"] } == "Mar 16 00:01:25" + insist { subject.get("syslogs")[0]["message"] } == "connect from camomile.cloud9.net[168.100.1.3]" + insist { subject.get("syslogs")[0]["program"] } == "postfix/smtpd" + insist { subject.get("syslogs")[0]["pid"] } == "1713" + + insist { subject.get("syslogs")[1]["tags"].nil? } + insist { subject.get("syslogs")[1]["logsource"] } == "evita" + insist { subject.get("syslogs")[1]["timestamp"] } == "Mar 29 04:20:32" + insist { subject.get("syslogs")[1]["message"] } == "connect from steve.cloud9.net[168.100.1.4]" + insist { subject.get("syslogs")[1]["program"] } == "postfix/smtpd" + insist { subject.get("syslogs")[1]["pid"] } == "1737" + end + end + describe "ietf 5424 syslog line" do # The logstash config goes here. # At this time, only filters are supported.