Skip to content

Commit

Permalink
Add an 'output_objects' config that builds an object for each grok pe…
Browse files Browse the repository at this point in the history
…rformed

Previous functionality grouped each field by name meaning you lost all
context about whatever you're parsing.
  • Loading branch information
w4 committed Sep 27, 2018
1 parent 95d4599 commit 289fcb9
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 3 deletions.
21 changes: 18 additions & 3 deletions lib/logstash/filters/grok.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
require "logstash/filters/base"
require "logstash/namespace"
require "logstash/environment"
require "logstash/event"
require "logstash/patterns/core"
require "grok-pure" # rubygem 'jls-grok'
require "set"
Expand Down Expand Up @@ -237,6 +238,11 @@ class LogStash::Filters::Grok < LogStash::Filters::Base
# will be parsed and `hello world` will overwrite the original message.
config :overwrite, :validate => :array, :default => []

# If this attribute is set, the output of this filter will be an array
# of objects written to the key supplied in this config value, this is
# useful if your input is an array of messages to match.
config :output_objects, :validate => :string, :default => nil

attr_reader :timeout_enforcer

# Register default pattern paths
Expand Down Expand Up @@ -331,21 +337,30 @@ def match(groks, field, event)
@logger.warn("Grok regexp threw exception", :exception => e.message, :backtrace => e.backtrace, :class => e.class.name)
return false
end

private
def match_against_groks(groks, field, input, event)
target_event = @output_objects ? LogStash::Event.new : event

input = input.to_s
matched = false
groks.each do |grok|
# Convert anything else to string (number, hash, etc)

matched = @timeout_enforcer.grok_till_timeout(grok, field, input)
if matched
grok.capture(matched) {|field, value| handle(field, value, event)}
grok.capture(matched) {|field, value| handle(field, value, target_event)}
break if @break_on_match
end
end


if @output_objects
output_array = event.get(@output_objects)
output_array = [] unless output_array.is_a? Array
output_array << target_event.to_hash
event.set(@output_objects, output_array)
end

matched
end

Expand Down
50 changes: 50 additions & 0 deletions spec/filters/grok_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,56 @@ def pattern_path(path)
end
end

describe "build object from message" do
config <<-CONFIG
filter {
grok {
match => { "message" => "%{SYSLOGLINE}" }
output_objects => "syslogs"
}
}
CONFIG

sample "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]" do
insist { subject.get("syslogs")[0]["tags"].nil? }
insist { subject.get("syslogs")[0]["logsource"] } == "evita"
insist { subject.get("syslogs")[0]["timestamp"] } == "Mar 16 00:01:25"
insist { subject.get("syslogs")[0]["message"] } == "connect from camomile.cloud9.net[168.100.1.3]"
insist { subject.get("syslogs")[0]["program"] } == "postfix/smtpd"
insist { subject.get("syslogs")[0]["pid"] } == "1713"
end
end

describe "build objects from array of messages" do
config <<-CONFIG
filter {
grok {
match => { "message" => "%{SYSLOGLINE}" }
output_objects => "syslogs"
}
}
CONFIG

sample("message" => [
"Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]",
"Mar 29 04:20:32 evita postfix/smtpd[1737]: connect from steve.cloud9.net[168.100.1.4]"
]) do
insist { subject.get("syslogs")[0]["tags"].nil? }
insist { subject.get("syslogs")[0]["logsource"] } == "evita"
insist { subject.get("syslogs")[0]["timestamp"] } == "Mar 16 00:01:25"
insist { subject.get("syslogs")[0]["message"] } == "connect from camomile.cloud9.net[168.100.1.3]"
insist { subject.get("syslogs")[0]["program"] } == "postfix/smtpd"
insist { subject.get("syslogs")[0]["pid"] } == "1713"

insist { subject.get("syslogs")[1]["tags"].nil? }
insist { subject.get("syslogs")[1]["logsource"] } == "evita"
insist { subject.get("syslogs")[1]["timestamp"] } == "Mar 29 04:20:32"
insist { subject.get("syslogs")[1]["message"] } == "connect from steve.cloud9.net[168.100.1.4]"
insist { subject.get("syslogs")[1]["program"] } == "postfix/smtpd"
insist { subject.get("syslogs")[1]["pid"] } == "1737"
end
end

describe "ietf 5424 syslog line" do
# The logstash config goes here.
# At this time, only filters are supported.
Expand Down

0 comments on commit 289fcb9

Please sign in to comment.