From b58f7d88ecb3eba1a6a25f1b04276ba952e085f4 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 1 Mar 2016 17:37:50 +0200 Subject: [PATCH 001/126] removing use of git and changed log stash version to below 3 --- logstash-output-cassandra.gemspec | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/logstash-output-cassandra.gemspec b/logstash-output-cassandra.gemspec index 0bdf483..ce4384d 100644 --- a/logstash-output-cassandra.gemspec +++ b/logstash-output-cassandra.gemspec @@ -11,7 +11,7 @@ Gem::Specification.new do |s| s.require_paths = ["lib"] # Files - s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*') + s.files = `find . -type f | grep -v ^./.git/ | sed "s/^\.\\///"`.split($\)+::Dir.glob('vendor/*') # Tests s.test_files = s.files.grep(%r{^(test|spec|features)/}) @@ -19,8 +19,7 @@ Gem::Specification.new do |s| s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" } # Gem dependencies - s.add_runtime_dependency "logstash-core", '>= 1.4.0', '< 2.0.0' + s.add_runtime_dependency "logstash-core", '>= 1.4.0', '< 3.0.0' s.add_runtime_dependency 'cassandra-driver' s.add_development_dependency 'logstash-devutils' end - From 51d768cae2292aef5dea84fa04c32bd6fd8a3eb2 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 2 Mar 2016 15:20:14 +0200 Subject: [PATCH 002/126] bumping plugin version and setting the required plugin versions --- logstash-output-cassandra.gemspec | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/logstash-output-cassandra.gemspec b/logstash-output-cassandra.gemspec index ce4384d..9281d6c 100644 --- a/logstash-output-cassandra.gemspec +++ b/logstash-output-cassandra.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-output-cassandra' - s.version = '0.1.1' + s.version = '0.2.0' s.licenses = ['Apache License (2.0)'] s.summary = "Store events into Cassandra" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" @@ -19,7 +19,7 @@ Gem::Specification.new do |s| s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" } # Gem dependencies - s.add_runtime_dependency "logstash-core", '>= 1.4.0', '< 3.0.0' - s.add_runtime_dependency 'cassandra-driver' + s.add_runtime_dependency "logstash-core", '>= 2.0.0', '< 3.0.0' + s.add_runtime_dependency 'cassandra-driver', '>= 2.0.0', '< 3.0.0' s.add_development_dependency 'logstash-devutils' end From b127fbe5970f9b9ab8750943334aa655d7c34660 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 2 Mar 2016 15:20:51 +0200 Subject: [PATCH 003/126] adding a thread safe buffer similar to what is used in the ES output plugin --- lib/logstash/outputs/buffer.rb | 124 +++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 lib/logstash/outputs/buffer.rb diff --git a/lib/logstash/outputs/buffer.rb b/lib/logstash/outputs/buffer.rb new file mode 100644 index 0000000..03edeb3 --- /dev/null +++ b/lib/logstash/outputs/buffer.rb @@ -0,0 +1,124 @@ +require 'concurrent' +java_import java.util.concurrent.locks.ReentrantLock + +module LogStash; module Outputs; class Cassandra + class Buffer + def initialize(logger, max_size, flush_interval, &block) + @logger = logger + # You need to aquire this for anything modifying state generally + @operations_mutex = Mutex.new + @operations_lock = java.util.concurrent.locks.ReentrantLock.new + + @stopping = Concurrent::AtomicBoolean.new(false) + @max_size = max_size + @submit_proc = block + + @buffer = [] + + @last_flush = Time.now + @flush_interval = flush_interval + @flush_thread = spawn_interval_flusher + end + + def push(item) + synchronize do |buffer| + push_unsafe(item) + end + end + alias_method :<<, :push + + # Push multiple items onto the buffer in a single operation + def push_multi(items) + raise ArgumentError, "push multi takes an array!, not an #{items.class}!" unless items.is_a?(Array) + synchronize do |buffer| + items.each {|item| push_unsafe(item) } + end + end + + def flush + synchronize { flush_unsafe } + end + + def stop(do_flush=true,wait_complete=true) + return if stopping? + @stopping.make_true + + # No need to acquire a lock in this case + return if !do_flush && !wait_complete + + synchronize do + flush_unsafe if do_flush + @flush_thread.join if wait_complete + end + end + + def contents + synchronize {|buffer| buffer} + end + + # For externally operating on the buffer contents + # this takes a block and will yield the internal buffer and executes + # the block in a synchronized block from the internal mutex + def synchronize + @operations_mutex.synchronize { yield(@buffer) } + end + + # These methods are private for various reasons, chief among them threadsafety! + # Many require the @operations_mutex to be locked to be safe + private + + def push_unsafe(item) + @buffer << item + if @buffer.size >= @max_size + flush_unsafe + end + end + + def spawn_interval_flusher + Thread.new do + loop do + sleep 0.2 + break if stopping? + synchronize { interval_flush } + end + end + end + + def interval_flush + if last_flush_seconds_ago >= @flush_interval + begin + @logger.debug? && @logger.debug("Flushing buffer at interval", + :instance => self.inspect, + :interval => @flush_interval) + flush_unsafe + rescue StandardError => e + @logger.warn("Error flushing buffer at interval!", + :instance => self.inspect, + :message => e.message, + :class => e.class.name, + :backtrace => e.backtrace + ) + rescue Exception => e + @logger.warn("Exception flushing buffer at interval!", :error => e.message, :class => e.class.name) + end + end + end + + def flush_unsafe + if @buffer.size > 0 + @submit_proc.call(@buffer) + @buffer.clear + end + + @last_flush = Time.now # This must always be set to ensure correct timer behavior + end + + def last_flush_seconds_ago + Time.now - @last_flush + end + + def stopping? + @stopping.true? + end + end +end end end From f53337168110e6c200b69168a1237dfa5f07bce2 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 2 Mar 2016 16:40:57 +0200 Subject: [PATCH 004/126] moved retry policy to the cassandra driver with control over the specific policy in use changed batch processing to match the new log stash 2.2 thread / batch recommendations (as done in the ES output plugin) added support for typed set hints added support for filtering the event data instead of hints and single filed support added support for event sprintf parsing for the table name, and the new settings --- lib/logstash/outputs/cassandra.rb | 442 ++++++++++++++++-------------- 1 file changed, 238 insertions(+), 204 deletions(-) diff --git a/lib/logstash/outputs/cassandra.rb b/lib/logstash/outputs/cassandra.rb index fdd7370..35e642c 100644 --- a/lib/logstash/outputs/cassandra.rb +++ b/lib/logstash/outputs/cassandra.rb @@ -2,10 +2,12 @@ require "logstash/outputs/base" require "logstash/namespace" require "time" +require "logstash/outputs/cassandra/buffer" +require "cassandra" class LogStash::Outputs::Cassandra < LogStash::Outputs::Base - + milestone 1 config_name "cassandra" @@ -16,152 +18,200 @@ class LogStash::Outputs::Cassandra < LogStash::Outputs::Base # Cassandra consistency level. # Options: "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one" # Default: "one" - config :consistency, :validate => ["any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one"], :default => "one" - + config :consistency, :validate => [ "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one" ], :default => "one" + # The keyspace to use config :keyspace, :validate => :string, :required => true - # The table to use + # The table to use (event level processing (e.g. %{[key]}) is supported) config :table, :validate => :string, :required => true - + # Username config :username, :validate => :string, :required => true # Password config :password, :validate => :string, :required => true - # Source - config :source, :validate => :string, :default => nil - - # Hints - config :hints, :validate => :hash, :default => {} - - # Number of seconds to wait after failure before retrying - config :retry_delay, :validate => :number, :default => 3, :required => false - - # Set max retry for each batch - config :max_retries, :validate => :number, :default => 3 - - # Ignore bad message - config :ignore_bad_messages, :validate => :boolean, :default => false - + # An optional hash describing how / what to transform / filter from the original event + # Each key is expected to be of the form { event_data => "..." column_name => "..." cassandra_type => "..." } + # Event level processing (e.g. %{[key]}) is supported for all three + config :filter_transform, :validate => :array, :default => nil + + # An optional string which points to the event specific location from which to pull the filter_transform definition + # The contents need to conform with those defined for the filter_transform config setting + # Event level processing (e.g. %{[key]}) is supported + config :filter_transform_event_key, :validate => :string, :default => nil + + # The retry policy to use + # The available options are: + # * default => retry once if needed / possible + # * downgrading_consistency => retry once with a best guess lowered consistency + # * failthrough => fail immediately (i.e. no retries) + config :retry_policy, :validate => [ "default", "downgrading_consistency", "failthrough" ], :default => "default", :required => true + + # The command execution timeout + config :request_timeout, :validate => :number, :default => 5 + # Ignore bad values config :ignore_bad_values, :validate => :boolean, :default => false - - # Batch size - config :batch_size, :validate => :number, :default => 1 - # Batch processor tic (sec) - config :batch_processor_thread_period, :validate => :number, :default => 1 + # In Logstashes >= 2.2 this setting defines the maximum sized bulk request Logstash will make + # You you may want to increase this to be in line with your pipeline's batch size. + # If you specify a number larger than the batch size of your pipeline it will have no effect, + # save for the case where a filter increases the size of an inflight batch by outputting + # events. + # + # In Logstashes <= 2.1 this plugin uses its own internal buffer of events. + # This config option sets that size. In these older logstashes this size may + # have a significant impact on heap usage, whereas in 2.2+ it will never increase it. + # To make efficient bulk API calls, we will buffer a certain number of + # events before flushing that out to Cassandra. This setting + # controls how many events will be buffered before sending a batch + # of events. Increasing the `flush_size` has an effect on Logstash's heap size. + # Remember to also increase the heap size using `LS_HEAP_SIZE` if you are sending big commands + # or have increased the `flush_size` to a higher value. + config :flush_size, :validate => :number, :default => 500 - public - def register - require "thread" - require "cassandra" - @@r = 0 + # The amount of time since last flush before a flush is forced. + # + # This setting helps ensure slow event rates don't get stuck in Logstash. + # For example, if your `flush_size` is 100, and you have received 10 events, + # and it has been more than `idle_flush_time` seconds since the last flush, + # Logstash will flush those 10 events automatically. + # + # This helps keep both fast and slow log streams moving along in + # near-real-time. + config :idle_flush_time, :validate => :number, :default => 1 - # Messages collector. When @batch_msg_queue.length > batch_size - # batch_size of messages are sent to Cassandra - @batch_msg_queue = Queue.new + def register() + @statement_cache = {} + assert_filter_transform_structure(@filter_transform) if @filter_transform + setup_buffer_and_handler() + assert_filter_transform_structure() + end - # Failed batches collector. Every retry_delay secs batches from the queue - # are pushed to Cassandra. If a try is failed a batch.try_count is incremented. - # If batch.try_count > max_retries, the batch is rejected - # with error message in error log - @failed_batch_queue = Queue.new + def receive(event) + @buffer << build_cassandra_action(event) + end - @statement_cache = {} - @batch = [] - + # Receive an array of events and immediately attempt to index them (no buffering) + def multi_receive(events) + events.each_slice(@flush_size) do |slice| + safe_submit(slice.map {|e| build_cassandra_action(e) }) + end + end + + def teardown() + close() + end + + def close() + @buffer.stop() + end + + private + def assert_filter_transform_structure(filter_transform) + for item in filter_transform + if !item.has_key?("event_key") || !item.has_key?("column_name") || !item.has_key?("cassandra_type") + raise "item is incorrectly configured in filter_transform:\nitem => #{item}\nfilter_transform => #{filter_transform}" + end + end + end + + def setup_buffer_and_handler + @buffer = ::LogStash::Outputs::Cassandra::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions| + safe_submit(actions) + end + end + + def setup_cassandra_session() cluster = Cassandra.cluster( username: @username, password: @password, hosts: @hosts, - consistency: @consistency.to_sym + consistency: @consistency.to_sym, + timeout: @request_timeout, + retry_policy: get_retry_policy(@retry_policy), + logger: @logger ) - - @session = cluster.connect(@keyspace) - - @logger.info("New Cassandra output", :username => @username, - :hosts => @hosts, :keyspace => @keyspace, :table => @table) - - @batch_processor_thread = Thread.new do - loop do - stop_it = Thread.current["stop_it"] - sleep(@batch_processor_thread_period) - send_batch2cassandra stop_it - break if stop_it - end + @session = cluster.connect(@keyspace) + @logger.info("New cassandra session created", + :username => @username, :hosts => @hosts, :keyspace => @keyspace) + end + + def get_retry_policy(policy_name) + case policy_name + when "default" + return ::Cassandra::Retry::Policies::Default.new + when "downgrading_consistency" + return ::Cassandra::Retry::Policies::DowngradingConsistency.new + when "failthrough" + return ::Cassandra::Retry::Policies::Fallthrough.new end + end - @failed_batch_processor_thread = Thread.new do - loop do - stop_it = Thread.current["stop_it"] - sleep(@retry_delay) - resend_batch2cassandra - break if stop_it + def build_cassandra_action(event) + action = {} + action["table"] = event.sprintf(@table) + filter_transform = get_field_transform(event) + if filter_transform + action["data"] = {} + for filter in filter_transform + add_event_value_from_filter_to_action(event, filter, action) end + else + action["data"] = event.to_hash() + # Filter out @timestamp, @version, etc to be able to use elasticsearch input plugin directly + action["data"].reject!{|key| %r{^@} =~ key} + # TODO: add the hint thing here?!... + #convert_values_to_cassandra_types!(action) end - end # def register - public - def receive(event) - return unless output?(event) + return action + end - if @source - msg = event[@source] - else - msg = event.to_hash - # Filter out @timestamp, @version, etc - # to be able to use elasticsearch input plugin directly - msg.reject!{|key| %r{^@} =~ key} + def get_field_transform(event) + filter_transform = nil + if @filter_transform_event_key + filter_transform = event.sprintf(@filter_transform_event_key) + assert_filter_transform_structure(filter_transform) + elsif @filter_transform + filter_transform = @filter_transform end + return filter_transform + end - if !msg.is_a?(Hash) - if @ignore_bad_messages - @logger.warn("Failed to get message from source. Skip it.", - :event => event) - return - end - @logger.fatal("Failed to get message from source. Source is empty or it is not a hash.", - :event => event) - raise "Failed to get message from source. Source is empty or it is not a hash." + def add_event_value_from_filter_to_action(event, filter, action) + event_data = event.sprintf(filter["event_data"]) + if filter.has_key?("cassandra_type") + cassandra_type = event.sprintf(filter["cassandra_type"]) + event_data = convert_value_to_cassandra_type(event_data, cassandra_type) end - - convert2cassandra_format! msg - - @batch_msg_queue.push(msg) - @logger.info("Queue message to be sent") - end # def receive + column_name = event.sprintf(filter["column_name"]) + action["data"][column_name] = event_data + end - private - def send_batch2cassandra stop_it = false - loop do - break if @batch_msg_queue.length < @batch_size and !stop_it - begin - batch = prepare_batch - break if batch.nil? - @session.execute(batch) - @logger.info "Batch sent successfully" - rescue Exception => e - @logger.warn "Failed to send batch (error: #{e.to_s}). Schedule it to send later." - @failed_batch_queue.push({:batch => batch, :try_count => 0}) - end + def safe_submit(actions) + begin + batch = prepare_batch(actions) + @session.execute(batch) + @logger.info("Batch sent successfully") + rescue Exception => e + @logger.warn("Failed to send batch (error: #{e.to_s}).") end end - private - def prepare_batch() + def prepare_batch(actions) statement_and_values = [] - while statement_and_values.length < @batch_size and !@batch_msg_queue.empty? - msg = @batch_msg_queue.pop - query = "INSERT INTO #{@keyspace}.#{@table} (#{msg.keys.join(', ')}) - VALUES (#{("?"*msg.keys.count).split(//)*", "})" + for action in actions + query = "INSERT INTO #{@keyspace}.#{action["table"]} (#{action["data"].keys.join(', ')}) + VALUES (#{("?" * action["data"].keys.count).split(//) * ", "})" - @statement_cache[query] = @session.prepare(query) unless @statement_cache.key?(query) - statement_and_values << [@statement_cache[query], msg.values] + if !@statement_cache.key?(query) + @statement_cache[query] = @session.prepare(query) + end + statement_and_values << [@statement_cache[query], action["data"].values] end - return nil if statement_and_values.empty? batch = @session.batch do |b| statement_and_values.each do |v| @@ -171,106 +221,90 @@ def prepare_batch() return batch end - private - def resend_batch2cassandra - while !@failed_batch_queue.empty? - batch_container = @failed_batch_queue.pop - batch = batch_container[:batch] - count = batch_container[:try_count] - begin - @session.execute(batch) - @logger.info "Batch sent" - rescue Exception => e - if count > @max_retries - @logger.fatal("Failed to send batch to Cassandra (error: #{e.to_s}) in #{@max_retries} tries") - else - @failed_batch_queue.push({:batch => batch, :try_count => count + 1}) - @logger.warn("Failed to send batch again (error: #{e.to_s}). Reschedule it.") - end - end - sleep(@retry_delay) - end - end - - public - def teardown - @batch_processor_thread["stop_it"] = true - @batch_processor_thread.join - - @failed_batch_processor_thread["stop_it"] = true - @failed_batch_processor_thread.join - end - - private - def convert2cassandra_format! msg - @hints.each do |key, value| - if msg.key?(key) - begin - msg[key] = case value - when 'uuid' - Cassandra::Types::Uuid.new(msg[key]) + def convert_value_to_cassandra_type(event_data, cassandra_type) + generator = get_cassandra_type_generator(cassandra_type) + typed_event_data = nil + begin + typed_event_data = generator.new(event_data) + rescue Exception => e + error_message = "Cannot convert `value (`#{event_data}`) to `#{cassandra_type}` type" + if @ignore_bad_values + case event_data + when 'int', 'varint', 'bigint', 'double', 'decimal', 'counter' + typed_event_data = 0 + when 'uuid', 'timeuuid' + typed_event_data = generator.new("00000000-0000-0000-0000-000000000000") when 'timestamp' - Cassandra::Types::Timestamp.new(Time::parse(msg[key])) + typed_event_data = generator.new(Time::parse("1970-01-01 00:00:00")) when 'inet' - Cassandra::Types::Inet.new(msg[key]) + typed_event_data = generator.new("0.0.0.0") when 'float' - Cassandra::Types::Float.new(msg[key]) - when 'varchar' - Cassandra::Types::Varchar.new(msg[key]) - when 'text' - Cassandra::Types::Text.new(msg[key]) - when 'blob' - Cassandra::Types::Blog.new(msg[key]) - when 'ascii' - Cassandra::Types::Ascii.new(msg[key]) - when 'bigint' - Cassandra::Types::Bigint.new(msg[key]) - when 'counter' - Cassandra::Types::Counter.new(msg[key]) - when 'int' - Cassandra::Types::Int.new(msg[key]) - when 'varint' - Cassandra::Types::Varint.new(msg[key]) + typed_event_data = generator.new(0) when 'boolean' - Cassandra::Types::Boolean.new(msg[key]) - when 'decimal' - Cassandra::Types::Decimal.new(msg[key]) - when 'double' - Cassandra::Types::Double.new(msg[key]) - when 'timeuuid' - Cassandra::Types::Timeuuid.new(msg[key]) - end - rescue Exception => e - # Ok, cannot convert the value, let's assign it in default one - if @ignore_bad_values - bad_value = msg[key] - msg[key] = case value - when 'int', 'varint', 'bigint', 'double', 'decimal', 'counter' - 0 - when 'uuid', 'timeuuid' - Cassandra::Uuid.new("00000000-0000-0000-0000-000000000000") - when 'timestamp' - Cassandra::Types::Timestamp.new(Time::parse("1970-01-01 00:00:00")) - when 'inet' - Cassandra::Types::Inet.new("0.0.0.0") - when 'float' - Cassandra::Types::Float.new(0) - when 'boolean' - Cassandra::Types::Boolean.new(false) - when 'text', 'varchar', 'ascii' - Cassandra::Types::Float.new(0) - when 'blob' - Cassandra::Types::Blob.new(nil) - end - @logger.warn("Cannot convert `#{key}` value (`#{bad_value}`) to `#{value}` type, set to `#{msg[key]}`", - :exception => e, :backtrace => e.backtrace) - else - @logger.fatal("Cannot convert `#{key}` value (`#{msg[key]}`) to `#{value}` type", - :exception => e, :backtrace => e.backtrace) - raise "Cannot convert `#{key}` value (`#{msg[key]}`) to `#{value}` type" - end + typed_event_data = generator.new(false) + when 'text', 'varchar', 'ascii' + typed_event_data = generator.new(0) + when 'blob' + typed_event_data = generator.new(nil) + when /^set\((.*)\)$/ + typed_event_data = generator.new([]) end + @logger.warn(error_message, :exception => e, :backtrace => e.backtrace) + else + @logger.error(error_message, :exception => e, :backtrace => e.backtrace) + raise error_message end end + return typed_event_data end -end # class LogStash::Outputs::Cassandra + + def get_cassandra_type_generator(name) + case name + when 'uuid' + return Cassandra::Types::Uuid + when 'timestamp' + return Cassandra::Types::Timestamp + when 'inet' + return Cassandra::Types::Inet + when 'float' + return Cassandra::Types::Float + when 'varchar' + return Cassandra::Types::Varchar + when 'text' + return Cassandra::Types::Text + when 'blob' + return Cassandra::Types::Blog + when 'ascii' + return Cassandra::Types::Ascii + when 'bigint' + return Cassandra::Types::Bigint + when 'counter' + return Cassandra::Types::Counter + when 'int' + return Cassandra::Types::Int + when 'varint' + return Cassandra::Types::Varint + when 'boolean' + return Cassandra::Types::Boolean + when 'decimal' + return Cassandra::Types::Decimal + when 'double' + return Cassandra::Types::Double + when 'timeuuid' + return Cassandra::Types::Timeuuid + when /^set\((.*)\)$/ + set_type = get_cassandra_type_generator($1) + return Cassandra::Types::Set(set_type) + else + raise "Unknown cassandra_type #{cassandra_type}" + end + end + + # def convert_values_to_cassandra_types!(msg) + # @hints.each do |key, value| + # if msg.key?(key) + # + # end + # end + # end +end From 326c08f392bedb2ca405a880c8b2497d1b7d2552 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 2 Mar 2016 22:48:20 +0200 Subject: [PATCH 005/126] setup to use jruby and have rspec succeed (even though there are no tests) --- Gemfile | 4 +++- lib/logstash/outputs/cassandra.rb | 8 ++++---- logstash-output-cassandra.gemspec | 3 ++- spec/outputs/cassandra_spec.rb | 1 + 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Gemfile b/Gemfile index d926697..f4169c4 100644 --- a/Gemfile +++ b/Gemfile @@ -1,2 +1,4 @@ +#ruby=jruby +#ruby-gemset=logstash-output-cassandra source 'https://rubygems.org' -gemspec \ No newline at end of file +gemspec diff --git a/lib/logstash/outputs/cassandra.rb b/lib/logstash/outputs/cassandra.rb index 35e642c..bbbe837 100644 --- a/lib/logstash/outputs/cassandra.rb +++ b/lib/logstash/outputs/cassandra.rb @@ -2,11 +2,11 @@ require "logstash/outputs/base" require "logstash/namespace" require "time" -require "logstash/outputs/cassandra/buffer" +require "logstash/outputs/buffer" require "cassandra" -class LogStash::Outputs::Cassandra < LogStash::Outputs::Base +class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base milestone 1 @@ -119,13 +119,13 @@ def assert_filter_transform_structure(filter_transform) end def setup_buffer_and_handler - @buffer = ::LogStash::Outputs::Cassandra::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions| + @buffer = ::LogStash::Outputs::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions| safe_submit(actions) end end def setup_cassandra_session() - cluster = Cassandra.cluster( + cluster = ::Cassandra.cluster( username: @username, password: @password, hosts: @hosts, diff --git a/logstash-output-cassandra.gemspec b/logstash-output-cassandra.gemspec index 9281d6c..18cc83b 100644 --- a/logstash-output-cassandra.gemspec +++ b/logstash-output-cassandra.gemspec @@ -11,7 +11,7 @@ Gem::Specification.new do |s| s.require_paths = ["lib"] # Files - s.files = `find . -type f | grep -v ^./.git/ | sed "s/^\.\\///"`.split($\)+::Dir.glob('vendor/*') + s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT'] # Tests s.test_files = s.files.grep(%r{^(test|spec|features)/}) @@ -19,6 +19,7 @@ Gem::Specification.new do |s| s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" } # Gem dependencies + s.add_runtime_dependency 'concurrent-ruby' s.add_runtime_dependency "logstash-core", '>= 2.0.0', '< 3.0.0' s.add_runtime_dependency 'cassandra-driver', '>= 2.0.0', '< 3.0.0' s.add_development_dependency 'logstash-devutils' diff --git a/spec/outputs/cassandra_spec.rb b/spec/outputs/cassandra_spec.rb index 6bf5945..3f91fd4 100644 --- a/spec/outputs/cassandra_spec.rb +++ b/spec/outputs/cassandra_spec.rb @@ -1 +1,2 @@ require "logstash/devutils/rspec/spec_helper" +require "logstash/outputs/cassandra" From da2e8b4021833fe82a62cf85b380d0d6b352aa12 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 2 Mar 2016 23:24:18 +0200 Subject: [PATCH 006/126] added hint support when no transform filter is supplied --- lib/logstash/outputs/cassandra.rb | 41 ++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/lib/logstash/outputs/cassandra.rb b/lib/logstash/outputs/cassandra.rb index bbbe837..098be4f 100644 --- a/lib/logstash/outputs/cassandra.rb +++ b/lib/logstash/outputs/cassandra.rb @@ -42,6 +42,18 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base # Event level processing (e.g. %{[key]}) is supported config :filter_transform_event_key, :validate => :string, :default => nil + # An optional hints hash which will be used in case filter_transform or filter_transform_event_key are not in use + # It is used to trigger a forced type casting to the cassandra driver types in + # the form of a hash from column name to type name in the following manner: + # hints => { + # id => "int" + # at => "timestamp" + # resellerId => "int" + # errno => "int" + # duration => "float" + # ip => "inet" } + config :hints, :validate => :hash, :default => {} + # The retry policy to use # The available options are: # * default => retry once if needed / possible @@ -153,24 +165,20 @@ def get_retry_policy(policy_name) def build_cassandra_action(event) action = {} action["table"] = event.sprintf(@table) - filter_transform = get_field_transform(event) + filter_transform = get_filter_transform(event) if filter_transform action["data"] = {} for filter in filter_transform add_event_value_from_filter_to_action(event, filter, action) end else - action["data"] = event.to_hash() - # Filter out @timestamp, @version, etc to be able to use elasticsearch input plugin directly - action["data"].reject!{|key| %r{^@} =~ key} - # TODO: add the hint thing here?!... - #convert_values_to_cassandra_types!(action) + add_event_data_using_configured_hints(event, action) end return action end - def get_field_transform(event) + def get_filter_transform(event) filter_transform = nil if @filter_transform_event_key filter_transform = event.sprintf(@filter_transform_event_key) @@ -207,7 +215,7 @@ def prepare_batch(actions) query = "INSERT INTO #{@keyspace}.#{action["table"]} (#{action["data"].keys.join(', ')}) VALUES (#{("?" * action["data"].keys.count).split(//) * ", "})" - if !@statement_cache.key?(query) + if !@statement_cache.has_key?(query) @statement_cache[query] = @session.prepare(query) end statement_and_values << [@statement_cache[query], action["data"].values] @@ -300,11 +308,14 @@ def get_cassandra_type_generator(name) end end - # def convert_values_to_cassandra_types!(msg) - # @hints.each do |key, value| - # if msg.key?(key) - # - # end - # end - # end + def add_event_data_using_configured_hints(event, action) + action["data"] = event.to_hash() + # Filter out @timestamp, @version, etc to be able to use elasticsearch input plugin directly + action["data"].reject!{|key| %r{^@} =~ key} + @hints.each do |event_key, cassandra_type| + if action["data"].has_key?(event_key) + event_data = convert_value_to_cassandra_type(action["data"][event_key], cassandra_type) + end + end + end end From 9d71df4ccf43ba348c6a1382d21c694b2f83282d Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 2 Mar 2016 23:27:45 +0200 Subject: [PATCH 007/126] moved files to a folder --- lib/logstash/outputs/{ => cassandra}/buffer.rb | 0 lib/logstash/outputs/{ => cassandra}/cassandra.rb | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename lib/logstash/outputs/{ => cassandra}/buffer.rb (100%) rename lib/logstash/outputs/{ => cassandra}/cassandra.rb (100%) diff --git a/lib/logstash/outputs/buffer.rb b/lib/logstash/outputs/cassandra/buffer.rb similarity index 100% rename from lib/logstash/outputs/buffer.rb rename to lib/logstash/outputs/cassandra/buffer.rb diff --git a/lib/logstash/outputs/cassandra.rb b/lib/logstash/outputs/cassandra/cassandra.rb similarity index 100% rename from lib/logstash/outputs/cassandra.rb rename to lib/logstash/outputs/cassandra/cassandra.rb From 68d2393398599462225e95f28bef294a00e81133 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 2 Mar 2016 23:36:42 +0200 Subject: [PATCH 008/126] renames of cassandra files and folders to cassandra output --- lib/logstash/outputs/cassandra_output.rb | 321 ++++++++++++++++++ .../{cassandra => cassandra_output}/buffer.rb | 0 .../cassandra.rb | 0 3 files changed, 321 insertions(+) create mode 100644 lib/logstash/outputs/cassandra_output.rb rename lib/logstash/outputs/{cassandra => cassandra_output}/buffer.rb (100%) rename lib/logstash/outputs/{cassandra => cassandra_output}/cassandra.rb (100%) diff --git a/lib/logstash/outputs/cassandra_output.rb b/lib/logstash/outputs/cassandra_output.rb new file mode 100644 index 0000000..b7ae96c --- /dev/null +++ b/lib/logstash/outputs/cassandra_output.rb @@ -0,0 +1,321 @@ +# encoding: utf-8 +require "logstash/outputs/base" +require "logstash/namespace" +require "time" +require "logstash/outputs/cassandra/buffer" +require "cassandra" + +class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base + + milestone 1 + + config_name "cassandra" + + # List of Cassandra hostname(s) or IP-address(es) + config :hosts, :validate => :array, :required => true + + # Cassandra consistency level. + # Options: "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one" + # Default: "one" + config :consistency, :validate => [ "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one" ], :default => "one" + + # The keyspace to use + config :keyspace, :validate => :string, :required => true + + # The table to use (event level processing (e.g. %{[key]}) is supported) + config :table, :validate => :string, :required => true + + # Username + config :username, :validate => :string, :required => true + + # Password + config :password, :validate => :string, :required => true + + # An optional hash describing how / what to transform / filter from the original event + # Each key is expected to be of the form { event_data => "..." column_name => "..." cassandra_type => "..." } + # Event level processing (e.g. %{[key]}) is supported for all three + config :filter_transform, :validate => :array, :default => nil + + # An optional string which points to the event specific location from which to pull the filter_transform definition + # The contents need to conform with those defined for the filter_transform config setting + # Event level processing (e.g. %{[key]}) is supported + config :filter_transform_event_key, :validate => :string, :default => nil + + # An optional hints hash which will be used in case filter_transform or filter_transform_event_key are not in use + # It is used to trigger a forced type casting to the cassandra driver types in + # the form of a hash from column name to type name in the following manner: + # hints => { + # id => "int" + # at => "timestamp" + # resellerId => "int" + # errno => "int" + # duration => "float" + # ip => "inet" } + config :hints, :validate => :hash, :default => {} + + # The retry policy to use + # The available options are: + # * default => retry once if needed / possible + # * downgrading_consistency => retry once with a best guess lowered consistency + # * failthrough => fail immediately (i.e. no retries) + config :retry_policy, :validate => [ "default", "downgrading_consistency", "failthrough" ], :default => "default", :required => true + + # The command execution timeout + config :request_timeout, :validate => :number, :default => 5 + + # Ignore bad values + config :ignore_bad_values, :validate => :boolean, :default => false + + # In Logstashes >= 2.2 this setting defines the maximum sized bulk request Logstash will make + # You you may want to increase this to be in line with your pipeline's batch size. + # If you specify a number larger than the batch size of your pipeline it will have no effect, + # save for the case where a filter increases the size of an inflight batch by outputting + # events. + # + # In Logstashes <= 2.1 this plugin uses its own internal buffer of events. + # This config option sets that size. In these older logstashes this size may + # have a significant impact on heap usage, whereas in 2.2+ it will never increase it. + # To make efficient bulk API calls, we will buffer a certain number of + # events before flushing that out to Cassandra. This setting + # controls how many events will be buffered before sending a batch + # of events. Increasing the `flush_size` has an effect on Logstash's heap size. + # Remember to also increase the heap size using `LS_HEAP_SIZE` if you are sending big commands + # or have increased the `flush_size` to a higher value. + config :flush_size, :validate => :number, :default => 500 + + # The amount of time since last flush before a flush is forced. + # + # This setting helps ensure slow event rates don't get stuck in Logstash. + # For example, if your `flush_size` is 100, and you have received 10 events, + # and it has been more than `idle_flush_time` seconds since the last flush, + # Logstash will flush those 10 events automatically. + # + # This helps keep both fast and slow log streams moving along in + # near-real-time. + config :idle_flush_time, :validate => :number, :default => 1 + + def register() + @statement_cache = {} + assert_filter_transform_structure(@filter_transform) if @filter_transform + setup_buffer_and_handler() + assert_filter_transform_structure() + end + + def receive(event) + @buffer << build_cassandra_action(event) + end + + # Receive an array of events and immediately attempt to index them (no buffering) + def multi_receive(events) + events.each_slice(@flush_size) do |slice| + safe_submit(slice.map {|e| build_cassandra_action(e) }) + end + end + + def teardown() + close() + end + + def close() + @buffer.stop() + end + + private + def assert_filter_transform_structure(filter_transform) + for item in filter_transform + if !item.has_key?("event_key") || !item.has_key?("column_name") || !item.has_key?("cassandra_type") + raise "item is incorrectly configured in filter_transform:\nitem => #{item}\nfilter_transform => #{filter_transform}" + end + end + end + + def setup_buffer_and_handler + @buffer = ::LogStash::Outputs::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions| + safe_submit(actions) + end + end + + def setup_cassandra_session() + cluster = ::Cassandra.cluster( + username: @username, + password: @password, + hosts: @hosts, + consistency: @consistency.to_sym, + timeout: @request_timeout, + retry_policy: get_retry_policy(@retry_policy), + logger: @logger + ) + @session = cluster.connect(@keyspace) + @logger.info("New cassandra session created", + :username => @username, :hosts => @hosts, :keyspace => @keyspace) + end + + def get_retry_policy(policy_name) + case policy_name + when "default" + return ::Cassandra::Retry::Policies::Default.new + when "downgrading_consistency" + return ::Cassandra::Retry::Policies::DowngradingConsistency.new + when "failthrough" + return ::Cassandra::Retry::Policies::Fallthrough.new + end + end + + def build_cassandra_action(event) + action = {} + action["table"] = event.sprintf(@table) + filter_transform = get_filter_transform(event) + if filter_transform + action["data"] = {} + for filter in filter_transform + add_event_value_from_filter_to_action(event, filter, action) + end + else + add_event_data_using_configured_hints(event, action) + end + + return action + end + + def get_filter_transform(event) + filter_transform = nil + if @filter_transform_event_key + filter_transform = event.sprintf(@filter_transform_event_key) + assert_filter_transform_structure(filter_transform) + elsif @filter_transform + filter_transform = @filter_transform + end + return filter_transform + end + + def add_event_value_from_filter_to_action(event, filter, action) + event_data = event.sprintf(filter["event_data"]) + if filter.has_key?("cassandra_type") + cassandra_type = event.sprintf(filter["cassandra_type"]) + event_data = convert_value_to_cassandra_type(event_data, cassandra_type) + end + column_name = event.sprintf(filter["column_name"]) + action["data"][column_name] = event_data + end + + def safe_submit(actions) + begin + batch = prepare_batch(actions) + @session.execute(batch) + @logger.info("Batch sent successfully") + rescue Exception => e + @logger.warn("Failed to send batch (error: #{e.to_s}).") + end + end + + def prepare_batch(actions) + statement_and_values = [] + for action in actions + query = "INSERT INTO #{@keyspace}.#{action["table"]} (#{action["data"].keys.join(', ')}) + VALUES (#{("?" * action["data"].keys.count).split(//) * ", "})" + + if !@statement_cache.has_key?(query) + @statement_cache[query] = @session.prepare(query) + end + statement_and_values << [@statement_cache[query], action["data"].values] + end + + batch = @session.batch do |b| + statement_and_values.each do |v| + b.add(v[0], v[1]) + end + end + return batch + end + + def convert_value_to_cassandra_type(event_data, cassandra_type) + generator = get_cassandra_type_generator(cassandra_type) + typed_event_data = nil + begin + typed_event_data = generator.new(event_data) + rescue Exception => e + error_message = "Cannot convert `value (`#{event_data}`) to `#{cassandra_type}` type" + if @ignore_bad_values + case event_data + when 'int', 'varint', 'bigint', 'double', 'decimal', 'counter' + typed_event_data = 0 + when 'uuid', 'timeuuid' + typed_event_data = generator.new("00000000-0000-0000-0000-000000000000") + when 'timestamp' + typed_event_data = generator.new(Time::parse("1970-01-01 00:00:00")) + when 'inet' + typed_event_data = generator.new("0.0.0.0") + when 'float' + typed_event_data = generator.new(0) + when 'boolean' + typed_event_data = generator.new(false) + when 'text', 'varchar', 'ascii' + typed_event_data = generator.new(0) + when 'blob' + typed_event_data = generator.new(nil) + when /^set\((.*)\)$/ + typed_event_data = generator.new([]) + end + @logger.warn(error_message, :exception => e, :backtrace => e.backtrace) + else + @logger.error(error_message, :exception => e, :backtrace => e.backtrace) + raise error_message + end + end + return typed_event_data + end + + def get_cassandra_type_generator(name) + case name + when 'uuid' + return Cassandra::Types::Uuid + when 'timestamp' + return Cassandra::Types::Timestamp + when 'inet' + return Cassandra::Types::Inet + when 'float' + return Cassandra::Types::Float + when 'varchar' + return Cassandra::Types::Varchar + when 'text' + return Cassandra::Types::Text + when 'blob' + return Cassandra::Types::Blog + when 'ascii' + return Cassandra::Types::Ascii + when 'bigint' + return Cassandra::Types::Bigint + when 'counter' + return Cassandra::Types::Counter + when 'int' + return Cassandra::Types::Int + when 'varint' + return Cassandra::Types::Varint + when 'boolean' + return Cassandra::Types::Boolean + when 'decimal' + return Cassandra::Types::Decimal + when 'double' + return Cassandra::Types::Double + when 'timeuuid' + return Cassandra::Types::Timeuuid + when /^set\((.*)\)$/ + set_type = get_cassandra_type_generator($1) + return Cassandra::Types::Set(set_type) + else + raise "Unknown cassandra_type #{cassandra_type}" + end + end + + def add_event_data_using_configured_hints(event, action) + action["data"] = event.to_hash() + # Filter out @timestamp, @version, etc to be able to use elasticsearch input plugin directly + action["data"].reject!{|key| %r{^@} =~ key} + @hints.each do |event_key, cassandra_type| + if action["data"].has_key?(event_key) + event_data = convert_value_to_cassandra_type(action["data"][event_key], cassandra_type) + end + end + end + end +end end end diff --git a/lib/logstash/outputs/cassandra/buffer.rb b/lib/logstash/outputs/cassandra_output/buffer.rb similarity index 100% rename from lib/logstash/outputs/cassandra/buffer.rb rename to lib/logstash/outputs/cassandra_output/buffer.rb diff --git a/lib/logstash/outputs/cassandra/cassandra.rb b/lib/logstash/outputs/cassandra_output/cassandra.rb similarity index 100% rename from lib/logstash/outputs/cassandra/cassandra.rb rename to lib/logstash/outputs/cassandra_output/cassandra.rb From 849ba7e9e94c95fca3388ec819670de2c3900975 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 3 Mar 2016 00:09:10 +0200 Subject: [PATCH 009/126] extracted safe submitter and event parser from cassandra output --- .../{cassandra_output => cassandra}/buffer.rb | 2 +- .../outputs/cassandra/event_parser.rb | 145 ++++++++ .../outputs/cassandra/safe_submitter.rb | 71 ++++ lib/logstash/outputs/cassandra_output.rb | 203 +---------- .../outputs/cassandra_output/cassandra.rb | 321 ------------------ 5 files changed, 233 insertions(+), 509 deletions(-) rename lib/logstash/outputs/{cassandra_output => cassandra}/buffer.rb (98%) create mode 100644 lib/logstash/outputs/cassandra/event_parser.rb create mode 100644 lib/logstash/outputs/cassandra/safe_submitter.rb delete mode 100644 lib/logstash/outputs/cassandra_output/cassandra.rb diff --git a/lib/logstash/outputs/cassandra_output/buffer.rb b/lib/logstash/outputs/cassandra/buffer.rb similarity index 98% rename from lib/logstash/outputs/cassandra_output/buffer.rb rename to lib/logstash/outputs/cassandra/buffer.rb index 03edeb3..ca247b7 100644 --- a/lib/logstash/outputs/cassandra_output/buffer.rb +++ b/lib/logstash/outputs/cassandra/buffer.rb @@ -1,7 +1,7 @@ require 'concurrent' java_import java.util.concurrent.locks.ReentrantLock -module LogStash; module Outputs; class Cassandra +module LogStash; module Outputs; module Cassandra class Buffer def initialize(logger, max_size, flush_interval, &block) @logger = logger diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb new file mode 100644 index 0000000..52ac6c3 --- /dev/null +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -0,0 +1,145 @@ +# encoding: utf-8 +require "time" +require "cassandra" + + +module LogStash; module Outputs; module Cassandra + class EventParser + def initialize(table, filter_transform_event_key, filter_transform, hints, ignore_bad_values, logger) + @table = table + @filter_transform_event_key = filter_transform_event_key + @filter_transform = filter_transform + @hints = hints + @ignore_bad_values = ignore_bad_values + @logger = logger + end + + def parse(event) + action = {} + action["table"] = event.sprintf(@table) + filter_transform = get_filter_transform(event) + if filter_transform + action["data"] = {} + for filter in filter_transform + add_event_value_from_filter_to_action(event, filter, action) + end + else + add_event_data_using_configured_hints(event, action) + end + + return action + end + + private + def get_filter_transform(event) + filter_transform = nil + if @filter_transform_event_key + filter_transform = event.sprintf(@filter_transform_event_key) + assert_filter_transform_structure(filter_transform) + elsif @filter_transform + filter_transform = @filter_transform + end + return filter_transform + end + + def add_event_value_from_filter_to_action(event, filter, action) + event_data = event.sprintf(filter["event_data"]) + if filter.has_key?("cassandra_type") + cassandra_type = event.sprintf(filter["cassandra_type"]) + event_data = convert_value_to_cassandra_type(event_data, cassandra_type) + end + column_name = event.sprintf(filter["column_name"]) + action["data"][column_name] = event_data + end + + def add_event_data_using_configured_hints(event, action) + action["data"] = event.to_hash() + # Filter out @timestamp, @version, etc to be able to use elasticsearch input plugin directly + action["data"].reject!{|key| %r{^@} =~ key} + @hints.each do |event_key, cassandra_type| + if action["data"].has_key?(event_key) + event_data = convert_value_to_cassandra_type(action["data"][event_key], cassandra_type) + end + end + end + + def convert_value_to_cassandra_type(event_data, cassandra_type) + generator = get_cassandra_type_generator(cassandra_type) + typed_event_data = nil + begin + typed_event_data = generator.new(event_data) + rescue Exception => e + error_message = "Cannot convert `value (`#{event_data}`) to `#{cassandra_type}` type" + if @ignore_bad_values + case event_data + when 'int', 'varint', 'bigint', 'double', 'decimal', 'counter' + typed_event_data = 0 + when 'uuid', 'timeuuid' + typed_event_data = generator.new("00000000-0000-0000-0000-000000000000") + when 'timestamp' + typed_event_data = generator.new(Time::parse("1970-01-01 00:00:00")) + when 'inet' + typed_event_data = generator.new("0.0.0.0") + when 'float' + typed_event_data = generator.new(0) + when 'boolean' + typed_event_data = generator.new(false) + when 'text', 'varchar', 'ascii' + typed_event_data = generator.new(0) + when 'blob' + typed_event_data = generator.new(nil) + when /^set\((.*)\)$/ + typed_event_data = generator.new([]) + end + @logger.warn(error_message, :exception => e, :backtrace => e.backtrace) + else + @logger.error(error_message, :exception => e, :backtrace => e.backtrace) + raise error_message + end + end + return typed_event_data + end + + def get_cassandra_type_generator(name) + case name + when 'uuid' + return Cassandra::Types::Uuid + when 'timestamp' + return Cassandra::Types::Timestamp + when 'inet' + return Cassandra::Types::Inet + when 'float' + return Cassandra::Types::Float + when 'varchar' + return Cassandra::Types::Varchar + when 'text' + return Cassandra::Types::Text + when 'blob' + return Cassandra::Types::Blog + when 'ascii' + return Cassandra::Types::Ascii + when 'bigint' + return Cassandra::Types::Bigint + when 'counter' + return Cassandra::Types::Counter + when 'int' + return Cassandra::Types::Int + when 'varint' + return Cassandra::Types::Varint + when 'boolean' + return Cassandra::Types::Boolean + when 'decimal' + return Cassandra::Types::Decimal + when 'double' + return Cassandra::Types::Double + when 'timeuuid' + return Cassandra::Types::Timeuuid + when /^set\((.*)\)$/ + set_type = get_cassandra_type_generator($1) + return Cassandra::Types::Set(set_type) + else + raise "Unknown cassandra_type #{cassandra_type}" + end + end + end +end end end diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb new file mode 100644 index 0000000..ca28107 --- /dev/null +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -0,0 +1,71 @@ +# encoding: utf-8 +require "cassandra" + + +module LogStash; module Outputs; module Cassandra + class SafeSubmitter + def initialize(logger, username, password, hosts, consistency, request_timeout, retry_policy, keyspace) + @statement_cache = {} + @logger = logger + @keyspace = keyspace + setup_cassandra_session(logger, username, password, hosts, consistency, request_timeout, retry_policy, keyspace) + end + + def submit(actions) + begin + batch = prepare_batch(actions) + @session.execute(batch) + @logger.info("Batch sent successfully") + rescue Exception => e + @logger.warn("Failed to send batch (error: #{e.to_s}).") + end + end + + private + def setup_cassandra_session(logger, username, password, hosts, consistency, request_timeout, retry_policy) + cluster = ::Cassandra.cluster( + username: username, + password: password, + hosts: hosts, + consistency: consistency.to_sym, + timeout: request_timeout, + retry_policy: get_retry_policy(retry_policy), + logger: logger + ) + @session = cluster.connect(@keyspace) + @logger.info("New cassandra session created", + :username => username, :hosts => hosts, :keyspace => @keyspace) + end + + def get_retry_policy(policy_name) + case policy_name + when "default" + return ::Cassandra::Retry::Policies::Default.new + when "downgrading_consistency" + return ::Cassandra::Retry::Policies::DowngradingConsistency.new + when "failthrough" + return ::Cassandra::Retry::Policies::Fallthrough.new + end + end + + def prepare_batch(actions) + statement_and_values = [] + for action in actions + query = "INSERT INTO #{@keyspace}.#{action["table"]} (#{action["data"].keys.join(', ')}) + VALUES (#{("?" * action["data"].keys.count).split(//) * ", "})" + + if !@statement_cache.has_key?(query) + @statement_cache[query] = @session.prepare(query) + end + statement_and_values << [@statement_cache[query], action["data"].values] + end + + batch = @session.batch do |b| + statement_and_values.each do |v| + b.add(v[0], v[1]) + end + end + return batch + end + end +end end end diff --git a/lib/logstash/outputs/cassandra_output.rb b/lib/logstash/outputs/cassandra_output.rb index b7ae96c..fcc152f 100644 --- a/lib/logstash/outputs/cassandra_output.rb +++ b/lib/logstash/outputs/cassandra_output.rb @@ -1,11 +1,11 @@ # encoding: utf-8 require "logstash/outputs/base" require "logstash/namespace" -require "time" require "logstash/outputs/cassandra/buffer" -require "cassandra" +require "logstash/outputs/cassandra/event_parser" +require "logstash/outputs/cassandra/safe_submitter" -class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base +class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base milestone 1 @@ -95,20 +95,20 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base config :idle_flush_time, :validate => :number, :default => 1 def register() - @statement_cache = {} assert_filter_transform_structure(@filter_transform) if @filter_transform + setup_event_parser() + setup_safe_submitter() setup_buffer_and_handler() - assert_filter_transform_structure() end def receive(event) - @buffer << build_cassandra_action(event) + @buffer << @event_parser.parse(event) end # Receive an array of events and immediately attempt to index them (no buffering) def multi_receive(events) events.each_slice(@flush_size) do |slice| - safe_submit(slice.map {|e| build_cassandra_action(e) }) + @safe_submitter.submit(slice.map {|e| @event_parser.parse(e) }) end end @@ -130,192 +130,21 @@ def assert_filter_transform_structure(filter_transform) end def setup_buffer_and_handler - @buffer = ::LogStash::Outputs::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions| - safe_submit(actions) + @buffer = ::LogStash::Outputs::CassandraOutput::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions| + @safe_submitter.submit(actions) end end - def setup_cassandra_session() - cluster = ::Cassandra.cluster( - username: @username, - password: @password, - hosts: @hosts, - consistency: @consistency.to_sym, - timeout: @request_timeout, - retry_policy: get_retry_policy(@retry_policy), - logger: @logger + def setup_safe_submitter() + @safe_submitter = ::LogStash::Outputs::Cassandra::SafeSubmitter.new( + @ ) - @session = cluster.connect(@keyspace) - @logger.info("New cassandra session created", - :username => @username, :hosts => @hosts, :keyspace => @keyspace) end - def get_retry_policy(policy_name) - case policy_name - when "default" - return ::Cassandra::Retry::Policies::Default.new - when "downgrading_consistency" - return ::Cassandra::Retry::Policies::DowngradingConsistency.new - when "failthrough" - return ::Cassandra::Retry::Policies::Fallthrough.new - end - end - - def build_cassandra_action(event) - action = {} - action["table"] = event.sprintf(@table) - filter_transform = get_filter_transform(event) - if filter_transform - action["data"] = {} - for filter in filter_transform - add_event_value_from_filter_to_action(event, filter, action) - end - else - add_event_data_using_configured_hints(event, action) - end - - return action - end - - def get_filter_transform(event) - filter_transform = nil - if @filter_transform_event_key - filter_transform = event.sprintf(@filter_transform_event_key) - assert_filter_transform_structure(filter_transform) - elsif @filter_transform - filter_transform = @filter_transform - end - return filter_transform - end - - def add_event_value_from_filter_to_action(event, filter, action) - event_data = event.sprintf(filter["event_data"]) - if filter.has_key?("cassandra_type") - cassandra_type = event.sprintf(filter["cassandra_type"]) - event_data = convert_value_to_cassandra_type(event_data, cassandra_type) - end - column_name = event.sprintf(filter["column_name"]) - action["data"][column_name] = event_data - end - - def safe_submit(actions) - begin - batch = prepare_batch(actions) - @session.execute(batch) - @logger.info("Batch sent successfully") - rescue Exception => e - @logger.warn("Failed to send batch (error: #{e.to_s}).") - end - end - - def prepare_batch(actions) - statement_and_values = [] - for action in actions - query = "INSERT INTO #{@keyspace}.#{action["table"]} (#{action["data"].keys.join(', ')}) - VALUES (#{("?" * action["data"].keys.count).split(//) * ", "})" - - if !@statement_cache.has_key?(query) - @statement_cache[query] = @session.prepare(query) - end - statement_and_values << [@statement_cache[query], action["data"].values] - end - - batch = @session.batch do |b| - statement_and_values.each do |v| - b.add(v[0], v[1]) - end - end - return batch - end - - def convert_value_to_cassandra_type(event_data, cassandra_type) - generator = get_cassandra_type_generator(cassandra_type) - typed_event_data = nil - begin - typed_event_data = generator.new(event_data) - rescue Exception => e - error_message = "Cannot convert `value (`#{event_data}`) to `#{cassandra_type}` type" - if @ignore_bad_values - case event_data - when 'int', 'varint', 'bigint', 'double', 'decimal', 'counter' - typed_event_data = 0 - when 'uuid', 'timeuuid' - typed_event_data = generator.new("00000000-0000-0000-0000-000000000000") - when 'timestamp' - typed_event_data = generator.new(Time::parse("1970-01-01 00:00:00")) - when 'inet' - typed_event_data = generator.new("0.0.0.0") - when 'float' - typed_event_data = generator.new(0) - when 'boolean' - typed_event_data = generator.new(false) - when 'text', 'varchar', 'ascii' - typed_event_data = generator.new(0) - when 'blob' - typed_event_data = generator.new(nil) - when /^set\((.*)\)$/ - typed_event_data = generator.new([]) - end - @logger.warn(error_message, :exception => e, :backtrace => e.backtrace) - else - @logger.error(error_message, :exception => e, :backtrace => e.backtrace) - raise error_message - end - end - return typed_event_data - end - - def get_cassandra_type_generator(name) - case name - when 'uuid' - return Cassandra::Types::Uuid - when 'timestamp' - return Cassandra::Types::Timestamp - when 'inet' - return Cassandra::Types::Inet - when 'float' - return Cassandra::Types::Float - when 'varchar' - return Cassandra::Types::Varchar - when 'text' - return Cassandra::Types::Text - when 'blob' - return Cassandra::Types::Blog - when 'ascii' - return Cassandra::Types::Ascii - when 'bigint' - return Cassandra::Types::Bigint - when 'counter' - return Cassandra::Types::Counter - when 'int' - return Cassandra::Types::Int - when 'varint' - return Cassandra::Types::Varint - when 'boolean' - return Cassandra::Types::Boolean - when 'decimal' - return Cassandra::Types::Decimal - when 'double' - return Cassandra::Types::Double - when 'timeuuid' - return Cassandra::Types::Timeuuid - when /^set\((.*)\)$/ - set_type = get_cassandra_type_generator($1) - return Cassandra::Types::Set(set_type) - else - raise "Unknown cassandra_type #{cassandra_type}" - end - end - - def add_event_data_using_configured_hints(event, action) - action["data"] = event.to_hash() - # Filter out @timestamp, @version, etc to be able to use elasticsearch input plugin directly - action["data"].reject!{|key| %r{^@} =~ key} - @hints.each do |event_key, cassandra_type| - if action["data"].has_key?(event_key) - event_data = convert_value_to_cassandra_type(action["data"][event_key], cassandra_type) - end - end + def setup_event_parser() + @event_parser = ::LogStash::Outputs::Cassandra::EventParser.new( + @table, @filter_transform_event_key, @filter_transform, @hints, @ignore_bad_values, @logger + ) end end end end end diff --git a/lib/logstash/outputs/cassandra_output/cassandra.rb b/lib/logstash/outputs/cassandra_output/cassandra.rb deleted file mode 100644 index 098be4f..0000000 --- a/lib/logstash/outputs/cassandra_output/cassandra.rb +++ /dev/null @@ -1,321 +0,0 @@ -# encoding: utf-8 -require "logstash/outputs/base" -require "logstash/namespace" -require "time" -require "logstash/outputs/buffer" -require "cassandra" - - -class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base - - milestone 1 - - config_name "cassandra" - - # List of Cassandra hostname(s) or IP-address(es) - config :hosts, :validate => :array, :required => true - - # Cassandra consistency level. - # Options: "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one" - # Default: "one" - config :consistency, :validate => [ "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one" ], :default => "one" - - # The keyspace to use - config :keyspace, :validate => :string, :required => true - - # The table to use (event level processing (e.g. %{[key]}) is supported) - config :table, :validate => :string, :required => true - - # Username - config :username, :validate => :string, :required => true - - # Password - config :password, :validate => :string, :required => true - - # An optional hash describing how / what to transform / filter from the original event - # Each key is expected to be of the form { event_data => "..." column_name => "..." cassandra_type => "..." } - # Event level processing (e.g. %{[key]}) is supported for all three - config :filter_transform, :validate => :array, :default => nil - - # An optional string which points to the event specific location from which to pull the filter_transform definition - # The contents need to conform with those defined for the filter_transform config setting - # Event level processing (e.g. %{[key]}) is supported - config :filter_transform_event_key, :validate => :string, :default => nil - - # An optional hints hash which will be used in case filter_transform or filter_transform_event_key are not in use - # It is used to trigger a forced type casting to the cassandra driver types in - # the form of a hash from column name to type name in the following manner: - # hints => { - # id => "int" - # at => "timestamp" - # resellerId => "int" - # errno => "int" - # duration => "float" - # ip => "inet" } - config :hints, :validate => :hash, :default => {} - - # The retry policy to use - # The available options are: - # * default => retry once if needed / possible - # * downgrading_consistency => retry once with a best guess lowered consistency - # * failthrough => fail immediately (i.e. no retries) - config :retry_policy, :validate => [ "default", "downgrading_consistency", "failthrough" ], :default => "default", :required => true - - # The command execution timeout - config :request_timeout, :validate => :number, :default => 5 - - # Ignore bad values - config :ignore_bad_values, :validate => :boolean, :default => false - - # In Logstashes >= 2.2 this setting defines the maximum sized bulk request Logstash will make - # You you may want to increase this to be in line with your pipeline's batch size. - # If you specify a number larger than the batch size of your pipeline it will have no effect, - # save for the case where a filter increases the size of an inflight batch by outputting - # events. - # - # In Logstashes <= 2.1 this plugin uses its own internal buffer of events. - # This config option sets that size. In these older logstashes this size may - # have a significant impact on heap usage, whereas in 2.2+ it will never increase it. - # To make efficient bulk API calls, we will buffer a certain number of - # events before flushing that out to Cassandra. This setting - # controls how many events will be buffered before sending a batch - # of events. Increasing the `flush_size` has an effect on Logstash's heap size. - # Remember to also increase the heap size using `LS_HEAP_SIZE` if you are sending big commands - # or have increased the `flush_size` to a higher value. - config :flush_size, :validate => :number, :default => 500 - - # The amount of time since last flush before a flush is forced. - # - # This setting helps ensure slow event rates don't get stuck in Logstash. - # For example, if your `flush_size` is 100, and you have received 10 events, - # and it has been more than `idle_flush_time` seconds since the last flush, - # Logstash will flush those 10 events automatically. - # - # This helps keep both fast and slow log streams moving along in - # near-real-time. - config :idle_flush_time, :validate => :number, :default => 1 - - def register() - @statement_cache = {} - assert_filter_transform_structure(@filter_transform) if @filter_transform - setup_buffer_and_handler() - assert_filter_transform_structure() - end - - def receive(event) - @buffer << build_cassandra_action(event) - end - - # Receive an array of events and immediately attempt to index them (no buffering) - def multi_receive(events) - events.each_slice(@flush_size) do |slice| - safe_submit(slice.map {|e| build_cassandra_action(e) }) - end - end - - def teardown() - close() - end - - def close() - @buffer.stop() - end - - private - def assert_filter_transform_structure(filter_transform) - for item in filter_transform - if !item.has_key?("event_key") || !item.has_key?("column_name") || !item.has_key?("cassandra_type") - raise "item is incorrectly configured in filter_transform:\nitem => #{item}\nfilter_transform => #{filter_transform}" - end - end - end - - def setup_buffer_and_handler - @buffer = ::LogStash::Outputs::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions| - safe_submit(actions) - end - end - - def setup_cassandra_session() - cluster = ::Cassandra.cluster( - username: @username, - password: @password, - hosts: @hosts, - consistency: @consistency.to_sym, - timeout: @request_timeout, - retry_policy: get_retry_policy(@retry_policy), - logger: @logger - ) - @session = cluster.connect(@keyspace) - @logger.info("New cassandra session created", - :username => @username, :hosts => @hosts, :keyspace => @keyspace) - end - - def get_retry_policy(policy_name) - case policy_name - when "default" - return ::Cassandra::Retry::Policies::Default.new - when "downgrading_consistency" - return ::Cassandra::Retry::Policies::DowngradingConsistency.new - when "failthrough" - return ::Cassandra::Retry::Policies::Fallthrough.new - end - end - - def build_cassandra_action(event) - action = {} - action["table"] = event.sprintf(@table) - filter_transform = get_filter_transform(event) - if filter_transform - action["data"] = {} - for filter in filter_transform - add_event_value_from_filter_to_action(event, filter, action) - end - else - add_event_data_using_configured_hints(event, action) - end - - return action - end - - def get_filter_transform(event) - filter_transform = nil - if @filter_transform_event_key - filter_transform = event.sprintf(@filter_transform_event_key) - assert_filter_transform_structure(filter_transform) - elsif @filter_transform - filter_transform = @filter_transform - end - return filter_transform - end - - def add_event_value_from_filter_to_action(event, filter, action) - event_data = event.sprintf(filter["event_data"]) - if filter.has_key?("cassandra_type") - cassandra_type = event.sprintf(filter["cassandra_type"]) - event_data = convert_value_to_cassandra_type(event_data, cassandra_type) - end - column_name = event.sprintf(filter["column_name"]) - action["data"][column_name] = event_data - end - - def safe_submit(actions) - begin - batch = prepare_batch(actions) - @session.execute(batch) - @logger.info("Batch sent successfully") - rescue Exception => e - @logger.warn("Failed to send batch (error: #{e.to_s}).") - end - end - - def prepare_batch(actions) - statement_and_values = [] - for action in actions - query = "INSERT INTO #{@keyspace}.#{action["table"]} (#{action["data"].keys.join(', ')}) - VALUES (#{("?" * action["data"].keys.count).split(//) * ", "})" - - if !@statement_cache.has_key?(query) - @statement_cache[query] = @session.prepare(query) - end - statement_and_values << [@statement_cache[query], action["data"].values] - end - - batch = @session.batch do |b| - statement_and_values.each do |v| - b.add(v[0], v[1]) - end - end - return batch - end - - def convert_value_to_cassandra_type(event_data, cassandra_type) - generator = get_cassandra_type_generator(cassandra_type) - typed_event_data = nil - begin - typed_event_data = generator.new(event_data) - rescue Exception => e - error_message = "Cannot convert `value (`#{event_data}`) to `#{cassandra_type}` type" - if @ignore_bad_values - case event_data - when 'int', 'varint', 'bigint', 'double', 'decimal', 'counter' - typed_event_data = 0 - when 'uuid', 'timeuuid' - typed_event_data = generator.new("00000000-0000-0000-0000-000000000000") - when 'timestamp' - typed_event_data = generator.new(Time::parse("1970-01-01 00:00:00")) - when 'inet' - typed_event_data = generator.new("0.0.0.0") - when 'float' - typed_event_data = generator.new(0) - when 'boolean' - typed_event_data = generator.new(false) - when 'text', 'varchar', 'ascii' - typed_event_data = generator.new(0) - when 'blob' - typed_event_data = generator.new(nil) - when /^set\((.*)\)$/ - typed_event_data = generator.new([]) - end - @logger.warn(error_message, :exception => e, :backtrace => e.backtrace) - else - @logger.error(error_message, :exception => e, :backtrace => e.backtrace) - raise error_message - end - end - return typed_event_data - end - - def get_cassandra_type_generator(name) - case name - when 'uuid' - return Cassandra::Types::Uuid - when 'timestamp' - return Cassandra::Types::Timestamp - when 'inet' - return Cassandra::Types::Inet - when 'float' - return Cassandra::Types::Float - when 'varchar' - return Cassandra::Types::Varchar - when 'text' - return Cassandra::Types::Text - when 'blob' - return Cassandra::Types::Blog - when 'ascii' - return Cassandra::Types::Ascii - when 'bigint' - return Cassandra::Types::Bigint - when 'counter' - return Cassandra::Types::Counter - when 'int' - return Cassandra::Types::Int - when 'varint' - return Cassandra::Types::Varint - when 'boolean' - return Cassandra::Types::Boolean - when 'decimal' - return Cassandra::Types::Decimal - when 'double' - return Cassandra::Types::Double - when 'timeuuid' - return Cassandra::Types::Timeuuid - when /^set\((.*)\)$/ - set_type = get_cassandra_type_generator($1) - return Cassandra::Types::Set(set_type) - else - raise "Unknown cassandra_type #{cassandra_type}" - end - end - - def add_event_data_using_configured_hints(event, action) - action["data"] = event.to_hash() - # Filter out @timestamp, @version, etc to be able to use elasticsearch input plugin directly - action["data"].reject!{|key| %r{^@} =~ key} - @hints.each do |event_key, cassandra_type| - if action["data"].has_key?(event_key) - event_data = convert_value_to_cassandra_type(action["data"][event_key], cassandra_type) - end - end - end -end From 594d2c8c710fd2ee4c7dc452dc719b07415e2305 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 3 Mar 2016 00:12:53 +0200 Subject: [PATCH 010/126] some small fixes to get the empty specs to run --- .../outputs/cassandra/event_parser.rb | 4 +- lib/logstash/outputs/cassandra_output.rb | 255 +++++++++--------- spec/outputs/cassandra_spec.rb | 2 +- 3 files changed, 130 insertions(+), 131 deletions(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index 52ac6c3..8ff7b2c 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -5,13 +5,13 @@ module LogStash; module Outputs; module Cassandra class EventParser - def initialize(table, filter_transform_event_key, filter_transform, hints, ignore_bad_values, logger) + def initialize(logger, table, filter_transform_event_key, filter_transform, hints, ignore_bad_values) + @logger = logger @table = table @filter_transform_event_key = filter_transform_event_key @filter_transform = filter_transform @hints = hints @ignore_bad_values = ignore_bad_values - @logger = logger end def parse(event) diff --git a/lib/logstash/outputs/cassandra_output.rb b/lib/logstash/outputs/cassandra_output.rb index fcc152f..9f46498 100644 --- a/lib/logstash/outputs/cassandra_output.rb +++ b/lib/logstash/outputs/cassandra_output.rb @@ -7,144 +7,143 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base - milestone 1 - - config_name "cassandra" - - # List of Cassandra hostname(s) or IP-address(es) - config :hosts, :validate => :array, :required => true - - # Cassandra consistency level. - # Options: "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one" - # Default: "one" - config :consistency, :validate => [ "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one" ], :default => "one" - - # The keyspace to use - config :keyspace, :validate => :string, :required => true - - # The table to use (event level processing (e.g. %{[key]}) is supported) - config :table, :validate => :string, :required => true - - # Username - config :username, :validate => :string, :required => true - - # Password - config :password, :validate => :string, :required => true - - # An optional hash describing how / what to transform / filter from the original event - # Each key is expected to be of the form { event_data => "..." column_name => "..." cassandra_type => "..." } - # Event level processing (e.g. %{[key]}) is supported for all three - config :filter_transform, :validate => :array, :default => nil - - # An optional string which points to the event specific location from which to pull the filter_transform definition - # The contents need to conform with those defined for the filter_transform config setting - # Event level processing (e.g. %{[key]}) is supported - config :filter_transform_event_key, :validate => :string, :default => nil - - # An optional hints hash which will be used in case filter_transform or filter_transform_event_key are not in use - # It is used to trigger a forced type casting to the cassandra driver types in - # the form of a hash from column name to type name in the following manner: - # hints => { - # id => "int" - # at => "timestamp" - # resellerId => "int" - # errno => "int" - # duration => "float" - # ip => "inet" } - config :hints, :validate => :hash, :default => {} - - # The retry policy to use - # The available options are: - # * default => retry once if needed / possible - # * downgrading_consistency => retry once with a best guess lowered consistency - # * failthrough => fail immediately (i.e. no retries) - config :retry_policy, :validate => [ "default", "downgrading_consistency", "failthrough" ], :default => "default", :required => true - - # The command execution timeout - config :request_timeout, :validate => :number, :default => 5 - - # Ignore bad values - config :ignore_bad_values, :validate => :boolean, :default => false - - # In Logstashes >= 2.2 this setting defines the maximum sized bulk request Logstash will make - # You you may want to increase this to be in line with your pipeline's batch size. - # If you specify a number larger than the batch size of your pipeline it will have no effect, - # save for the case where a filter increases the size of an inflight batch by outputting - # events. - # - # In Logstashes <= 2.1 this plugin uses its own internal buffer of events. - # This config option sets that size. In these older logstashes this size may - # have a significant impact on heap usage, whereas in 2.2+ it will never increase it. - # To make efficient bulk API calls, we will buffer a certain number of - # events before flushing that out to Cassandra. This setting - # controls how many events will be buffered before sending a batch - # of events. Increasing the `flush_size` has an effect on Logstash's heap size. - # Remember to also increase the heap size using `LS_HEAP_SIZE` if you are sending big commands - # or have increased the `flush_size` to a higher value. - config :flush_size, :validate => :number, :default => 500 - - # The amount of time since last flush before a flush is forced. - # - # This setting helps ensure slow event rates don't get stuck in Logstash. - # For example, if your `flush_size` is 100, and you have received 10 events, - # and it has been more than `idle_flush_time` seconds since the last flush, - # Logstash will flush those 10 events automatically. - # - # This helps keep both fast and slow log streams moving along in - # near-real-time. - config :idle_flush_time, :validate => :number, :default => 1 - - def register() - assert_filter_transform_structure(@filter_transform) if @filter_transform - setup_event_parser() - setup_safe_submitter() - setup_buffer_and_handler() - end + milestone 1 + + config_name "cassandra" + + # List of Cassandra hostname(s) or IP-address(es) + config :hosts, :validate => :array, :required => true + + # Cassandra consistency level. + # Options: "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one" + # Default: "one" + config :consistency, :validate => [ "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one" ], :default => "one" + + # The keyspace to use + config :keyspace, :validate => :string, :required => true + + # The table to use (event level processing (e.g. %{[key]}) is supported) + config :table, :validate => :string, :required => true + + # Username + config :username, :validate => :string, :required => true + + # Password + config :password, :validate => :string, :required => true + + # An optional hash describing how / what to transform / filter from the original event + # Each key is expected to be of the form { event_data => "..." column_name => "..." cassandra_type => "..." } + # Event level processing (e.g. %{[key]}) is supported for all three + config :filter_transform, :validate => :array, :default => nil + + # An optional string which points to the event specific location from which to pull the filter_transform definition + # The contents need to conform with those defined for the filter_transform config setting + # Event level processing (e.g. %{[key]}) is supported + config :filter_transform_event_key, :validate => :string, :default => nil + + # An optional hints hash which will be used in case filter_transform or filter_transform_event_key are not in use + # It is used to trigger a forced type casting to the cassandra driver types in + # the form of a hash from column name to type name in the following manner: + # hints => { + # id => "int" + # at => "timestamp" + # resellerId => "int" + # errno => "int" + # duration => "float" + # ip => "inet" } + config :hints, :validate => :hash, :default => {} + + # The retry policy to use + # The available options are: + # * default => retry once if needed / possible + # * downgrading_consistency => retry once with a best guess lowered consistency + # * failthrough => fail immediately (i.e. no retries) + config :retry_policy, :validate => [ "default", "downgrading_consistency", "failthrough" ], :default => "default", :required => true + + # The command execution timeout + config :request_timeout, :validate => :number, :default => 5 + + # Ignore bad values + config :ignore_bad_values, :validate => :boolean, :default => false + + # In Logstashes >= 2.2 this setting defines the maximum sized bulk request Logstash will make + # You you may want to increase this to be in line with your pipeline's batch size. + # If you specify a number larger than the batch size of your pipeline it will have no effect, + # save for the case where a filter increases the size of an inflight batch by outputting + # events. + # + # In Logstashes <= 2.1 this plugin uses its own internal buffer of events. + # This config option sets that size. In these older logstashes this size may + # have a significant impact on heap usage, whereas in 2.2+ it will never increase it. + # To make efficient bulk API calls, we will buffer a certain number of + # events before flushing that out to Cassandra. This setting + # controls how many events will be buffered before sending a batch + # of events. Increasing the `flush_size` has an effect on Logstash's heap size. + # Remember to also increase the heap size using `LS_HEAP_SIZE` if you are sending big commands + # or have increased the `flush_size` to a higher value. + config :flush_size, :validate => :number, :default => 500 + + # The amount of time since last flush before a flush is forced. + # + # This setting helps ensure slow event rates don't get stuck in Logstash. + # For example, if your `flush_size` is 100, and you have received 10 events, + # and it has been more than `idle_flush_time` seconds since the last flush, + # Logstash will flush those 10 events automatically. + # + # This helps keep both fast and slow log streams moving along in + # near-real-time. + config :idle_flush_time, :validate => :number, :default => 1 + + def register() + assert_filter_transform_structure(@filter_transform) if @filter_transform + setup_event_parser() + setup_safe_submitter() + setup_buffer_and_handler() + end - def receive(event) - @buffer << @event_parser.parse(event) - end + def receive(event) + @buffer << @event_parser.parse(event) + end - # Receive an array of events and immediately attempt to index them (no buffering) - def multi_receive(events) - events.each_slice(@flush_size) do |slice| - @safe_submitter.submit(slice.map {|e| @event_parser.parse(e) }) - end + # Receive an array of events and immediately attempt to index them (no buffering) + def multi_receive(events) + events.each_slice(@flush_size) do |slice| + @safe_submitter.submit(slice.map {|e| @event_parser.parse(e) }) end + end - def teardown() - close() - end + def teardown() + close() + end - def close() - @buffer.stop() - end + def close() + @buffer.stop() + end - private - def assert_filter_transform_structure(filter_transform) - for item in filter_transform - if !item.has_key?("event_key") || !item.has_key?("column_name") || !item.has_key?("cassandra_type") - raise "item is incorrectly configured in filter_transform:\nitem => #{item}\nfilter_transform => #{filter_transform}" - end + private + def assert_filter_transform_structure(filter_transform) + for item in filter_transform + if !item.has_key?("event_key") || !item.has_key?("column_name") || !item.has_key?("cassandra_type") + raise "item is incorrectly configured in filter_transform:\nitem => #{item}\nfilter_transform => #{filter_transform}" end end + end - def setup_buffer_and_handler - @buffer = ::LogStash::Outputs::CassandraOutput::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions| - @safe_submitter.submit(actions) - end + def setup_buffer_and_handler + @buffer = ::LogStash::Outputs::CassandraOutput::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions| + @safe_submitter.submit(actions) end + end - def setup_safe_submitter() - @safe_submitter = ::LogStash::Outputs::Cassandra::SafeSubmitter.new( - @ - ) - end + def setup_safe_submitter() + @safe_submitter = ::LogStash::Outputs::Cassandra::SafeSubmitter.new( + @logger, @username, @password, @hosts, @consistency, @request_timeout, @retry_policy, @keyspace + ) + end - def setup_event_parser() - @event_parser = ::LogStash::Outputs::Cassandra::EventParser.new( - @table, @filter_transform_event_key, @filter_transform, @hints, @ignore_bad_values, @logger - ) - end + def setup_event_parser() + @event_parser = ::LogStash::Outputs::Cassandra::EventParser.new( + @logger, @table, @filter_transform_event_key, @filter_transform, @hints, @ignore_bad_values + ) end -end end end +end diff --git a/spec/outputs/cassandra_spec.rb b/spec/outputs/cassandra_spec.rb index 3f91fd4..ce904a9 100644 --- a/spec/outputs/cassandra_spec.rb +++ b/spec/outputs/cassandra_spec.rb @@ -1,2 +1,2 @@ require "logstash/devutils/rspec/spec_helper" -require "logstash/outputs/cassandra" +require "logstash/outputs/cassandra_output" From efb6883757117505cfe407a61ac46f428d1ca928 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 3 Mar 2016 10:05:47 +0200 Subject: [PATCH 011/126] added buffer spec from the es output plugin --- logstash-output-cassandra.gemspec | 1 + spec/unit/outputs/buffer_spec.rb | 118 ++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 spec/unit/outputs/buffer_spec.rb diff --git a/logstash-output-cassandra.gemspec b/logstash-output-cassandra.gemspec index 18cc83b..57dcad9 100644 --- a/logstash-output-cassandra.gemspec +++ b/logstash-output-cassandra.gemspec @@ -22,5 +22,6 @@ Gem::Specification.new do |s| s.add_runtime_dependency 'concurrent-ruby' s.add_runtime_dependency "logstash-core", '>= 2.0.0', '< 3.0.0' s.add_runtime_dependency 'cassandra-driver', '>= 2.0.0', '< 3.0.0' + s.add_development_dependency 'cabin', ['~> 0.6'] s.add_development_dependency 'logstash-devutils' end diff --git a/spec/unit/outputs/buffer_spec.rb b/spec/unit/outputs/buffer_spec.rb new file mode 100644 index 0000000..3fd230d --- /dev/null +++ b/spec/unit/outputs/buffer_spec.rb @@ -0,0 +1,118 @@ +require "logstash/outputs/cassandra/buffer" +require 'cabin' + +describe ::LogStash::Outputs::Cassandra::Buffer do + class OperationTarget # Used to track buffer flushesn + attr_reader :buffer, :buffer_history, :receive_count + def initialize + @buffer = nil + @buffer_history = [] + @receive_count = 0 + end + + def receive(buffer) + @receive_count += 1 + @buffer_history << buffer.clone + @buffer = buffer + end + end + + let(:logger) { Cabin::Channel.get } + let(:max_size) { 10 } + let(:flush_interval) { 2 } + # Used to track flush count + let(:operation_target) { OperationTarget.new() } + let(:operation) { proc {|buffer| operation_target.receive(buffer) } } + subject(:buffer){ LogStash::Outputs::Cassandra::Buffer.new(logger, max_size, flush_interval, &operation) } + + after(:each) do + buffer.stop(do_flush=false) + end + + it "should initialize cleanly" do + expect(buffer).to be_a(LogStash::Outputs::Cassandra::Buffer) + end + + shared_examples("a buffer with two items inside") do + it "should add a pushed item to the buffer" do + buffer.synchronize do |data| + expect(data).to include(item1) + expect(data).to include(item2) + end + end + + describe "interval flushing" do + before do + sleep flush_interval + 1 + end + + it "should flush the buffer after the interval has passed" do + expect(operation_target.receive_count).to eql(1) + end + + it "should clear the buffer after a successful flush" do + expect(operation_target.buffer).to eql([]) + end + end + + describe "interval flushing a stopped buffer" do + before do + buffer.stop(do_flush=false) + sleep flush_interval + 1 + end + + it "should not flush if the buffer is stopped" do + expect(operation_target.receive_count).to eql(0) + end + end + end + + describe "with a buffer push" do + let(:item1) { "foo" } + let(:item2) { "bar" } + + describe "a buffer with two items pushed to it separately" do + before do + buffer << item1 + buffer << item2 + end + + include_examples("a buffer with two items inside") + end + + describe "a buffer with two items pushed to it in one operation" do + before do + buffer.push_multi([item1, item2]) + end + + include_examples("a buffer with two items inside") + end + end + + describe "with an empty buffer" do + it "should not perform an operation if the buffer is empty" do + buffer.flush + expect(operation_target.receive_count).to eql(0) + end + end + + describe "flushing with an operation that raises an error" do + class TestError < StandardError; end + let(:operation) { proc {|buffer| raise TestError, "A test" } } + let(:item) { double("item") } + + before do + buffer << item + end + + it "should raise an exception" do + expect { buffer.flush }.to raise_error(TestError) + end + + it "should not clear the buffer" do + expect do + buffer.flush rescue TestError + end.not_to change(buffer, :contents) + end + end +end From 9a60468d2b5cd10635349175e43de37d7b36bc9e Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 3 Mar 2016 10:49:31 +0200 Subject: [PATCH 012/126] created spec helper --- spec/cassandra_spec_helper.rb | 1 + spec/outputs/cassandra_spec.rb | 2 +- spec/unit/outputs/buffer_spec.rb | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 spec/cassandra_spec_helper.rb diff --git a/spec/cassandra_spec_helper.rb b/spec/cassandra_spec_helper.rb new file mode 100644 index 0000000..6bf5945 --- /dev/null +++ b/spec/cassandra_spec_helper.rb @@ -0,0 +1 @@ +require "logstash/devutils/rspec/spec_helper" diff --git a/spec/outputs/cassandra_spec.rb b/spec/outputs/cassandra_spec.rb index ce904a9..1999ebb 100644 --- a/spec/outputs/cassandra_spec.rb +++ b/spec/outputs/cassandra_spec.rb @@ -1,2 +1,2 @@ -require "logstash/devutils/rspec/spec_helper" +require_relative "../cassandra_spec_helper" require "logstash/outputs/cassandra_output" diff --git a/spec/unit/outputs/buffer_spec.rb b/spec/unit/outputs/buffer_spec.rb index 3fd230d..9fe311f 100644 --- a/spec/unit/outputs/buffer_spec.rb +++ b/spec/unit/outputs/buffer_spec.rb @@ -1,7 +1,7 @@ require "logstash/outputs/cassandra/buffer" -require 'cabin' +require "cabin" -describe ::LogStash::Outputs::Cassandra::Buffer do +describe LogStash::Outputs::Cassandra::Buffer do class OperationTarget # Used to track buffer flushesn attr_reader :buffer, :buffer_history, :receive_count def initialize From c9a1690423ad7f8ee0382efdbf3e053f494784fe Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 3 Mar 2016 13:09:20 +0200 Subject: [PATCH 013/126] moved assert_filter_transform_structure from output to event parser --- .../outputs/cassandra/event_parser.rb | 9 +++++++ lib/logstash/outputs/cassandra_output.rb | 25 ++++++------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index 8ff7b2c..aef8cdd 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -9,6 +9,7 @@ def initialize(logger, table, filter_transform_event_key, filter_transform, hint @logger = logger @table = table @filter_transform_event_key = filter_transform_event_key + assert_filter_transform_structure(filter_transform) if filter_transform @filter_transform = filter_transform @hints = hints @ignore_bad_values = ignore_bad_values @@ -42,6 +43,14 @@ def get_filter_transform(event) return filter_transform end + def assert_filter_transform_structure(filter_transform) + for item in filter_transform + if !item.has_key?("event_key") || !item.has_key?("column_name") || !item.has_key?("cassandra_type") + raise "item is incorrectly configured in filter_transform:\nitem => #{item}\nfilter_transform => #{filter_transform}" + end + end + end + def add_event_value_from_filter_to_action(event, filter, action) event_data = event.sprintf(filter["event_data"]) if filter.has_key?("cassandra_type") diff --git a/lib/logstash/outputs/cassandra_output.rb b/lib/logstash/outputs/cassandra_output.rb index 9f46498..3caf784 100644 --- a/lib/logstash/outputs/cassandra_output.rb +++ b/lib/logstash/outputs/cassandra_output.rb @@ -95,7 +95,6 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base config :idle_flush_time, :validate => :number, :default => 1 def register() - assert_filter_transform_structure(@filter_transform) if @filter_transform setup_event_parser() setup_safe_submitter() setup_buffer_and_handler() @@ -121,18 +120,10 @@ def close() end private - def assert_filter_transform_structure(filter_transform) - for item in filter_transform - if !item.has_key?("event_key") || !item.has_key?("column_name") || !item.has_key?("cassandra_type") - raise "item is incorrectly configured in filter_transform:\nitem => #{item}\nfilter_transform => #{filter_transform}" - end - end - end - - def setup_buffer_and_handler - @buffer = ::LogStash::Outputs::CassandraOutput::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions| - @safe_submitter.submit(actions) - end + def setup_event_parser() + @event_parser = ::LogStash::Outputs::Cassandra::EventParser.new( + @logger, @table, @filter_transform_event_key, @filter_transform, @hints, @ignore_bad_values + ) end def setup_safe_submitter() @@ -141,9 +132,9 @@ def setup_safe_submitter() ) end - def setup_event_parser() - @event_parser = ::LogStash::Outputs::Cassandra::EventParser.new( - @logger, @table, @filter_transform_event_key, @filter_transform, @hints, @ignore_bad_values - ) + def setup_buffer_and_handler + @buffer = ::LogStash::Outputs::CassandraOutput::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions| + @safe_submitter.submit(actions) + end end end From b30e5bbb8d3fe6ab4522102b7e8cc4e6772378d2 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 3 Mar 2016 13:13:03 +0200 Subject: [PATCH 014/126] event parser spec layout --- spec/outputs/event_parser_spec.rb | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 spec/outputs/event_parser_spec.rb diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb new file mode 100644 index 0000000..2e6473e --- /dev/null +++ b/spec/outputs/event_parser_spec.rb @@ -0,0 +1,29 @@ +require_relative "../cassandra_spec_helper" +require "logstash/outputs/cassandra/event_parser" + +describe LogStash::Outputs::Cassandra::EventParser do + # @table + # => regular table name + # => event table name + + # @filter_transform_event_key + # => get from event + + # @filter_transform + # => malformed + # => single + # => multiple + # => without type + # => with type + + # @hints + # => does nothing for none + # => hints what it knows + # => fails for unknown types + + # @ignore_bad_values + # => fails on bad values if false + # => if true + # => defaults what it can + # => skips what it cant +end From fd9d0713d5fbc1d7bb4369148bffa137c08bb7e5 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 3 Mar 2016 15:06:59 +0200 Subject: [PATCH 015/126] event parser - table name parsing - leaves regular table names unchanged --- .../outputs/cassandra/event_parser.rb | 16 ++++++------ spec/cassandra_spec_helper.rb | 1 + spec/outputs/event_parser_spec.rb | 26 ++++++++++++++++--- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index aef8cdd..c467436 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -5,14 +5,14 @@ module LogStash; module Outputs; module Cassandra class EventParser - def initialize(logger, table, filter_transform_event_key, filter_transform, hints, ignore_bad_values) - @logger = logger - @table = table - @filter_transform_event_key = filter_transform_event_key - assert_filter_transform_structure(filter_transform) if filter_transform - @filter_transform = filter_transform - @hints = hints - @ignore_bad_values = ignore_bad_values + def initialize(opts) + @logger = opts['logger'] + @table = opts['table'] + @filter_transform_event_key = opts['filter_transform_event_key'] + assert_filter_transform_structure(opts['filter_transform']) if opts['filter_transform'] + @filter_transform = opts['filter_transform'] + @hints = opts['hints'] + @ignore_bad_values = opts['ignore_bad_values'] end def parse(event) diff --git a/spec/cassandra_spec_helper.rb b/spec/cassandra_spec_helper.rb index 6bf5945..2e9de89 100644 --- a/spec/cassandra_spec_helper.rb +++ b/spec/cassandra_spec_helper.rb @@ -1 +1,2 @@ require "logstash/devutils/rspec/spec_helper" +require "logstash/event" diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index 2e6473e..1cc3913 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -1,10 +1,28 @@ require_relative "../cassandra_spec_helper" require "logstash/outputs/cassandra/event_parser" -describe LogStash::Outputs::Cassandra::EventParser do - # @table - # => regular table name - # => event table name +RSpec.describe LogStash::Outputs::Cassandra::EventParser do + let(:sut) { LogStash::Outputs::Cassandra::EventParser } + let(:default_opts) {{ + 'logger' => double(), + 'table' => 'dummy', + 'filter_transform_event_key' => nil, + 'filter_transform' => nil, + 'hints' => {}, + 'ignore_bad_values' => false + }} + let(:sample_event) { LogStash::Event.new("message" => "sample message here") } + + describe "table name parsing" do + it "leaves regular table names unchanged" do + sut_instance = sut().new(default_opts.update({ "table" => "simple" })) + action = sut_instance.parse(sample_event) + expect(action["table"]).to(eq("simple")) + end + # @table + # => regular table name + # => event table name + end # @filter_transform_event_key # => get from event From 6f5f3ba77967af885d50920753dcdd889908d2cc Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 3 Mar 2016 15:15:08 +0200 Subject: [PATCH 016/126] event parser - table name parser - parses table names with data from the event --- spec/cassandra_spec_helper.rb | 1 + spec/outputs/cassandra_spec.rb | 1 + spec/outputs/event_parser_spec.rb | 8 ++++++++ 3 files changed, 10 insertions(+) diff --git a/spec/cassandra_spec_helper.rb b/spec/cassandra_spec_helper.rb index 2e9de89..c00762e 100644 --- a/spec/cassandra_spec_helper.rb +++ b/spec/cassandra_spec_helper.rb @@ -1,2 +1,3 @@ +# encoding: utf-8 require "logstash/devutils/rspec/spec_helper" require "logstash/event" diff --git a/spec/outputs/cassandra_spec.rb b/spec/outputs/cassandra_spec.rb index 1999ebb..40c2492 100644 --- a/spec/outputs/cassandra_spec.rb +++ b/spec/outputs/cassandra_spec.rb @@ -1,2 +1,3 @@ +# encoding: utf-8 require_relative "../cassandra_spec_helper" require "logstash/outputs/cassandra_output" diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index 1cc3913..c1fd35c 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -1,3 +1,4 @@ +# encoding: utf-8 require_relative "../cassandra_spec_helper" require "logstash/outputs/cassandra/event_parser" @@ -19,6 +20,13 @@ action = sut_instance.parse(sample_event) expect(action["table"]).to(eq("simple")) end + + it "parses table names with data from the event" do + sut_instance = sut().new(default_opts.update({ "table" => "%{[a_field]}" })) + sample_event["a_field"] = "a_value" + action = sut_instance.parse(sample_event) + expect(action["table"]).to(eq("a_value")) + end # @table # => regular table name # => event table name From 273695a9d6afbe179038cf4793d63c567d5a3886 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 11:36:25 +0200 Subject: [PATCH 017/126] added malformed transform tests --- spec/outputs/event_parser_spec.rb | 33 ++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index c1fd35c..11e66df 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -27,20 +27,31 @@ action = sut_instance.parse(sample_event) expect(action["table"]).to(eq("a_value")) end - # @table - # => regular table name - # => event table name end - # @filter_transform_event_key - # => get from event - # @filter_transform - # => malformed - # => single - # => multiple - # => without type - # => with type + describe "filter transforms" do + describe "from config" do + describe "malformed configurations" do + it "fails if the transform has no event_data setting" do + expect { sut().new(default_opts.update({ "filter_transform" => [{ "column_name" => "" }] })) }.to raise_error + end + + it "fails if the transform has no column_name setting" do + expect { sut().new(default_opts.update({ "filter_transform" => [{ "event_data" => "" }] })) }.to raise_error + end + end + # => single + # => multiple + # => without type + # => with type + end + + describe "from event" do + # @filter_transform_event_key + # => get from event + end + end # @hints # => does nothing for none From f42b37fd21ad0128196dfa8a2713e9e68a0921d1 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 12:37:17 +0200 Subject: [PATCH 018/126] changed spec to test the error itself as well --- spec/outputs/event_parser_spec.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index 11e66df..3cf40c6 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -34,14 +34,16 @@ describe "from config" do describe "malformed configurations" do it "fails if the transform has no event_data setting" do - expect { sut().new(default_opts.update({ "filter_transform" => [{ "column_name" => "" }] })) }.to raise_error + expect { sut().new(default_opts.update({ "filter_transform" => [{ "column_name" => "" }] })) }.to raise_error(/item is incorrectly configured/) end it "fails if the transform has no column_name setting" do - expect { sut().new(default_opts.update({ "filter_transform" => [{ "event_data" => "" }] })) }.to raise_error + expect { sut().new(default_opts.update({ "filter_transform" => [{ "event_data" => "" }] })) }.to raise_error(/item is incorrectly configured/) end end + # => single + # => multiple # => without type # => with type From 245e224efece742537042b0648ac288c8852ebeb Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 13:32:23 +0200 Subject: [PATCH 019/126] maps the event key to the column added stub specs --- .../outputs/cassandra/event_parser.rb | 4 +-- lib/logstash/outputs/cassandra_output.rb | 2 +- spec/outputs/event_parser_spec.rb | 28 +++++++++++++------ 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index c467436..e164910 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -45,14 +45,14 @@ def get_filter_transform(event) def assert_filter_transform_structure(filter_transform) for item in filter_transform - if !item.has_key?("event_key") || !item.has_key?("column_name") || !item.has_key?("cassandra_type") + if !item.has_key?("event_key") || !item.has_key?("column_name") raise "item is incorrectly configured in filter_transform:\nitem => #{item}\nfilter_transform => #{filter_transform}" end end end def add_event_value_from_filter_to_action(event, filter, action) - event_data = event.sprintf(filter["event_data"]) + event_data = event[event.sprintf(filter["event_key"])] if filter.has_key?("cassandra_type") cassandra_type = event.sprintf(filter["cassandra_type"]) event_data = convert_value_to_cassandra_type(event_data, cassandra_type) diff --git a/lib/logstash/outputs/cassandra_output.rb b/lib/logstash/outputs/cassandra_output.rb index 3caf784..ffc50b7 100644 --- a/lib/logstash/outputs/cassandra_output.rb +++ b/lib/logstash/outputs/cassandra_output.rb @@ -32,7 +32,7 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base config :password, :validate => :string, :required => true # An optional hash describing how / what to transform / filter from the original event - # Each key is expected to be of the form { event_data => "..." column_name => "..." cassandra_type => "..." } + # Each key is expected to be of the form { event_key => "..." column_name => "..." cassandra_type => "..." } # Event level processing (e.g. %{[key]}) is supported for all three config :filter_transform, :validate => :array, :default => nil diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index 3cf40c6..26f338c 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -29,7 +29,6 @@ end end - # @filter_transform describe "filter transforms" do describe "from config" do describe "malformed configurations" do @@ -38,20 +37,33 @@ end it "fails if the transform has no column_name setting" do - expect { sut().new(default_opts.update({ "filter_transform" => [{ "event_data" => "" }] })) }.to raise_error(/item is incorrectly configured/) + expect { sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "" }] })) }.to raise_error(/item is incorrectly configured/) end end - # => single + describe "properly configured" do + it "maps the event key to the column" do + sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column" }] })) + sample_event["a_field"] = "a_value" + action = sut_instance.parse(sample_event) + expect(action["data"]["a_column"]).to(eq("a_value")) + end + + it "transforms to the cassandra type" + it "works with multiple filter transforms" + it "allows for event specific event keys" + it "allows for event specific column names" + it "allows for event specific cassandra types" + end - # => multiple - # => without type - # => with type + describe "cassandra type mapping" do + it "properly maps hints to their respective cassandra types" + it "properly maps sets to their specific set types" + end end describe "from event" do - # @filter_transform_event_key - # => get from event + it "obtains the filter transform from the event if defined" end end From de34652a3d9dc3e74f1508514f295a1ac1f049d0 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 13:33:55 +0200 Subject: [PATCH 020/126] added .idea to gitignoe --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 05f1976..5a9fb21 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ Gemfile.lock .bundle vendor -/nbproject/private/ \ No newline at end of file +/nbproject/private/ +.idea From 1f65620da1573a9655ed8a107ccae1df70d4ea3a Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 13:55:53 +0200 Subject: [PATCH 021/126] works with multiple filter transforms --- spec/outputs/event_parser_spec.rb | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index 26f338c..03ff2d8 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -50,7 +50,16 @@ end it "transforms to the cassandra type" - it "works with multiple filter transforms" + + it "works with multiple filter transforms" do + sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column" }, { "event_key" => "another_field", "column_name" => "a_different_column" }] })) + sample_event["a_field"] = "a_value" + sample_event["another_field"] = "a_second_value" + action = sut_instance.parse(sample_event) + expect(action["data"]["a_column"]).to(eq("a_value")) + expect(action["data"]["a_different_column"]).to(eq("a_second_value")) + end + it "allows for event specific event keys" it "allows for event specific column names" it "allows for event specific cassandra types" From 096c6ec74d95061f3e3adc72660b505aaae082b0 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 14:07:28 +0200 Subject: [PATCH 022/126] allows for event specific event keys --- spec/outputs/event_parser_spec.rb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index 03ff2d8..fb10acb 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -60,7 +60,14 @@ expect(action["data"]["a_different_column"]).to(eq("a_second_value")) end - it "allows for event specific event keys" + it "allows for event specific event keys" do + sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "%{[a_field]}", "column_name" => "a_column" }] })) + sample_event["a_field"] = "another_field" + sample_event["another_field"] = "a_value" + action = sut_instance.parse(sample_event) + expect(action["data"]["a_column"]).to(eq("a_value")) + end + it "allows for event specific column names" it "allows for event specific cassandra types" end From 98661a4359e22f96146353c8b4e5f3a471d60e93 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 14:10:14 +0200 Subject: [PATCH 023/126] allows for event specific column names --- spec/outputs/event_parser_spec.rb | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index fb10acb..61af467 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -61,14 +61,21 @@ end it "allows for event specific event keys" do - sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "%{[a_field]}", "column_name" => "a_column" }] })) - sample_event["a_field"] = "another_field" + sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "%{[pointer_to_another_field]}", "column_name" => "a_column" }] })) + sample_event["pointer_to_another_field"] = "another_field" sample_event["another_field"] = "a_value" action = sut_instance.parse(sample_event) expect(action["data"]["a_column"]).to(eq("a_value")) end - it "allows for event specific column names" + it "allows for event specific column names" do + sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "%{[pointer_to_another_field]}" }] })) + sample_event["a_field"] = "a_value" + sample_event["pointer_to_another_field"] = "a_different_column" + action = sut_instance.parse(sample_event) + expect(action["data"]["a_different_column"]).to(eq("a_value")) + end + it "allows for event specific cassandra types" end From e8380b6fcd476f6cf8fea8228df5a3d26d93739e Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 15:15:44 +0200 Subject: [PATCH 024/126] properly maps #{mapping[:name]} to #{mapping[:type]} --- .../outputs/cassandra/event_parser.rb | 34 +++++++++---------- spec/outputs/event_parser_spec.rb | 31 ++++++++++++++--- 2 files changed, 43 insertions(+), 22 deletions(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index e164910..b70c7fe 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -83,7 +83,7 @@ def convert_value_to_cassandra_type(event_data, cassandra_type) case event_data when 'int', 'varint', 'bigint', 'double', 'decimal', 'counter' typed_event_data = 0 - when 'uuid', 'timeuuid' + when 'timeuuid' typed_event_data = generator.new("00000000-0000-0000-0000-000000000000") when 'timestamp' typed_event_data = generator.new(Time::parse("1970-01-01 00:00:00")) @@ -111,38 +111,36 @@ def convert_value_to_cassandra_type(event_data, cassandra_type) def get_cassandra_type_generator(name) case name - when 'uuid' - return Cassandra::Types::Uuid when 'timestamp' - return Cassandra::Types::Timestamp + return ::Cassandra::Types::Timestamp when 'inet' - return Cassandra::Types::Inet + return ::Cassandra::Types::Inet when 'float' - return Cassandra::Types::Float + return ::Cassandra::Types::Float when 'varchar' - return Cassandra::Types::Varchar + return ::Cassandra::Types::Varchar when 'text' - return Cassandra::Types::Text + return ::Cassandra::Types::Text when 'blob' - return Cassandra::Types::Blog + return ::Cassandra::Types::Blob when 'ascii' - return Cassandra::Types::Ascii + return ::Cassandra::Types::Ascii when 'bigint' - return Cassandra::Types::Bigint + return ::Cassandra::Types::Bigint when 'counter' - return Cassandra::Types::Counter + return ::Cassandra::Types::Counter when 'int' - return Cassandra::Types::Int + return ::Cassandra::Types::Int when 'varint' - return Cassandra::Types::Varint + return ::Cassandra::Types::Varint when 'boolean' - return Cassandra::Types::Boolean + return ::Cassandra::Types::Boolean when 'decimal' - return Cassandra::Types::Decimal + return ::Cassandra::Types::Decimal when 'double' - return Cassandra::Types::Double + return ::Cassandra::Types::Double when 'timeuuid' - return Cassandra::Types::Timeuuid + return ::Cassandra::Types::Timeuuid when /^set\((.*)\)$/ set_type = get_cassandra_type_generator($1) return Cassandra::Types::Set(set_type) diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index 61af467..289efcd 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -49,8 +49,6 @@ expect(action["data"]["a_column"]).to(eq("a_value")) end - it "transforms to the cassandra type" - it "works with multiple filter transforms" do sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column" }, { "event_key" => "another_field", "column_name" => "a_different_column" }] })) sample_event["a_field"] = "a_value" @@ -76,12 +74,37 @@ expect(action["data"]["a_different_column"]).to(eq("a_value")) end - it "allows for event specific cassandra types" end describe "cassandra type mapping" do - it "properly maps hints to their respective cassandra types" + [ + { :name => 'timestamp', :type => ::Cassandra::Types::Timestamp, :value => Time::parse("1970-01-01 00:00:00") }, + { :name => 'inet', :type => ::Cassandra::Types::Inet, :value => "0.0.0.0" }, + { :name => 'float', :type => ::Cassandra::Types::Float, :value => "10.15" }, + { :name => 'varchar', :type => ::Cassandra::Types::Varchar, :value => "a varchar" }, + { :name => 'text', :type => ::Cassandra::Types::Text, :value => "some text" }, + { :name => 'blob', :type => ::Cassandra::Types::Blob, :value => "12345678" }, + { :name => 'ascii', :type => ::Cassandra::Types::Ascii, :value => "some ascii" }, + { :name => 'bigint', :type => ::Cassandra::Types::Bigint, :value => "100" }, + { :name => 'counter', :type => ::Cassandra::Types::Counter, :value => "15" }, + { :name => 'int', :type => ::Cassandra::Types::Int, :value => "123" }, + { :name => 'varint', :type => ::Cassandra::Types::Varint, :value => "345" }, + { :name => 'boolean', :type => ::Cassandra::Types::Boolean, :value => "true" }, + { :name => 'decimal', :type => ::Cassandra::Types::Decimal, :value => "0.12E2" }, + { :name => 'double', :type => ::Cassandra::Types::Double, :value => "123.65" }, + { :name => 'timeuuid', :type => ::Cassandra::Types::Timeuuid, :value => "00000000-0000-0000-0000-000000000000" } + ].each { |mapping| + # NOTE: this is not the best test there is, but it is the best / simplest I could think of :/ + it "properly maps #{mapping[:name]} to #{mapping[:type]}" do + sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => mapping[:name] }] })) + sample_event["a_field"] = mapping[:value] + action = sut_instance.parse(sample_event) + expect(action["data"]["a_column"].to_s).to(eq(mapping[:value].to_s)) + end + } + it "properly maps sets to their specific set types" + it "allows for event specific cassandra types" end end From 7805fe379e280d27e38d8d8e642eddb1b09e08ef Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 15:52:26 +0200 Subject: [PATCH 025/126] properly maps sets to their specific set types fails in case of an unknown type --- lib/logstash/outputs/cassandra/event_parser.rb | 4 ++-- spec/outputs/event_parser_spec.rb | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index b70c7fe..2bc760f 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -143,9 +143,9 @@ def get_cassandra_type_generator(name) return ::Cassandra::Types::Timeuuid when /^set\((.*)\)$/ set_type = get_cassandra_type_generator($1) - return Cassandra::Types::Set(set_type) + return ::Cassandra::Types::Set.new(set_type) else - raise "Unknown cassandra_type #{cassandra_type}" + raise "Unknown cassandra_type #{name}" end end end diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index 289efcd..d7438db 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -103,8 +103,21 @@ end } - it "properly maps sets to their specific set types" + it "properly maps sets to their specific set types" do + sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "set(int)" }] })) + original_value = [ 1, 2, 3 ] + sample_event["a_field"] = original_value + action = sut_instance.parse(sample_event) + expect(action["data"]["a_column"].to_a).to(eq(original_value)) + end + it "allows for event specific cassandra types" + + it "fails in case of an unknown type" do + sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "what?!" }] })) + sample_event["a_field"] = "a_value" + expect { sut_instance.parse(sample_event) }.to raise_error(/Unknown cassandra_type/) + end end end From 031b68b499716604373659c79f40202dd74d671b Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 15:55:59 +0200 Subject: [PATCH 026/126] allows for event specific cassandra types --- spec/outputs/event_parser_spec.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index d7438db..69010d0 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -111,7 +111,13 @@ expect(action["data"]["a_column"].to_a).to(eq(original_value)) end - it "allows for event specific cassandra types" + it "allows for event specific cassandra types" do + sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "%{[pointer_to_a_field]}" }] })) + sample_event["a_field"] = "123" + sample_event["pointer_to_a_field"] = "int" + action = sut_instance.parse(sample_event) + expect(action["data"]["a_column"]).to(eq(123)) + end it "fails in case of an unknown type" do sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "what?!" }] })) From 1398a6ff60a5fc188939e73a4a58d4d3da4f90b6 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 16:14:22 +0200 Subject: [PATCH 027/126] obtains the filter transform from the event if defined --- lib/logstash/outputs/cassandra/event_parser.rb | 2 +- spec/outputs/event_parser_spec.rb | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index 2bc760f..95a69b2 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -35,7 +35,7 @@ def parse(event) def get_filter_transform(event) filter_transform = nil if @filter_transform_event_key - filter_transform = event.sprintf(@filter_transform_event_key) + filter_transform = event[@filter_transform_event_key] assert_filter_transform_structure(filter_transform) elsif @filter_transform filter_transform = @filter_transform diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index 69010d0..2d1a93f 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -128,7 +128,13 @@ end describe "from event" do - it "obtains the filter transform from the event if defined" + it "obtains the filter transform from the event if defined" do + sut_instance = sut().new(default_opts.update({ "filter_transform_event_key" => "an_event_filter" })) + sample_event["a_field"] = "a_value" + sample_event["an_event_filter"] = [{ "event_key" => "a_field", "column_name" => "a_column" }] + action = sut_instance.parse(sample_event) + expect(action["data"]["a_column"]).to(eq("a_value")) + end end end From d4cfed6af941061188cb7dcadd2c4b91a8c51463 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 16:22:10 +0200 Subject: [PATCH 028/126] hint test stubs --- spec/outputs/event_parser_spec.rb | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index 2d1a93f..dff3381 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -138,10 +138,13 @@ end end - # @hints - # => does nothing for none - # => hints what it knows - # => fails for unknown types + describe "hints" do + it "removes fields starting with @" + it "does not attempt to change items with no hints" + it "converts items with hints" + it "fails for unknown hints" + it "fails for unsuccessful hint conversion" + end # @ignore_bad_values # => fails on bad values if false From 98c24c624235cad43ca2261010755bb37793f812 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 16:27:07 +0200 Subject: [PATCH 029/126] removes fields starting with @ --- spec/outputs/event_parser_spec.rb | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index dff3381..f28e7ff 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -139,7 +139,15 @@ end describe "hints" do - it "removes fields starting with @" + it "removes fields starting with @" do + sut_instance = sut().new(default_opts.update({ "hints" => {} })) + sample_event["leave"] = "a_value" + sample_event["@remove"] = "another_value" + action = sut_instance.parse(sample_event) + expect(action["data"]["leave"]).to(eq("a_value")) + expect(action["data"]).not_to(include("@remove")) + end + it "does not attempt to change items with no hints" it "converts items with hints" it "fails for unknown hints" From c731553860cfbffbe01592f5d725210089e8780f Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 16:32:33 +0200 Subject: [PATCH 030/126] does not attempt to change items with no hints --- spec/outputs/event_parser_spec.rb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index f28e7ff..8b197a4 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -148,7 +148,14 @@ expect(action["data"]).not_to(include("@remove")) end - it "does not attempt to change items with no hints" + it "does not attempt to change items with no hints" do + sut_instance = sut().new(default_opts.update({ "hints" => {} })) + expected_value = [ 1, 2, 3 ] + sample_event["no_hint_here"] = expected_value + action = sut_instance.parse(sample_event) + expect(action["data"]["no_hint_here"]).to(equal(expected_value)) + end + it "converts items with hints" it "fails for unknown hints" it "fails for unsuccessful hint conversion" From de41d7777d2cc029a259aa16c3797811928693b5 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 16:39:49 +0200 Subject: [PATCH 031/126] converts items with hints --- lib/logstash/outputs/cassandra/event_parser.rb | 2 +- spec/outputs/event_parser_spec.rb | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index 95a69b2..3703c50 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -67,7 +67,7 @@ def add_event_data_using_configured_hints(event, action) action["data"].reject!{|key| %r{^@} =~ key} @hints.each do |event_key, cassandra_type| if action["data"].has_key?(event_key) - event_data = convert_value_to_cassandra_type(action["data"][event_key], cassandra_type) + action["data"][event_key] = convert_value_to_cassandra_type(action["data"][event_key], cassandra_type) end end end diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index 8b197a4..4a9fb38 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -156,7 +156,17 @@ expect(action["data"]["no_hint_here"]).to(equal(expected_value)) end - it "converts items with hints" + it "converts items with hints" do + sut_instance = sut().new(default_opts.update({ "hints" => { "a_set" => "set(int)", "an_int" => "int" } })) + original_set = [ 1, 2, 3 ] + sample_event["a_set"] = original_set + sample_event["an_int"] = "123" + action = sut_instance.parse(sample_event) + expect(action["data"]["a_set"]).to(be_a(Set)) + expect(action["data"]["a_set"].to_a).to(eql(original_set)) + expect(action["data"]["an_int"]).to(eql(123)) + end + it "fails for unknown hints" it "fails for unsuccessful hint conversion" end From c0c31f2a9840f03bc17682474433e65220f03bf1 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 17:03:55 +0200 Subject: [PATCH 032/126] fails for unknown hint types --- spec/outputs/event_parser_spec.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index 4a9fb38..d8d9539 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -167,7 +167,12 @@ expect(action["data"]["an_int"]).to(eql(123)) end - it "fails for unknown hints" + it "fails for unknown hint types" do + sut_instance = sut().new(default_opts.update({ "hints" => { "a_field" => "not_a_real_type" } })) + sample_event["a_field"] = "a value" + expect { sut_instance.parse(sample_event) }.to raise_error(/Unknown cassandra_type/) + end + it "fails for unsuccessful hint conversion" end From 523edf1c89faea9b57eba0f2e8677a31a8dcc16c Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 17:19:35 +0200 Subject: [PATCH 033/126] fails for unsuccessful hint conversion --- spec/outputs/event_parser_spec.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index d8d9539..df9b6fa 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -173,7 +173,13 @@ expect { sut_instance.parse(sample_event) }.to raise_error(/Unknown cassandra_type/) end - it "fails for unsuccessful hint conversion" + it "fails for unsuccessful hint conversion" do + options = default_opts.update({ "hints" => { "a_field" => "int" } }) + expect(options['logger']).to(receive(:error)) + sut_instance = sut().new(options) + sample_event["a_field"] = "i am not an int!!!" + expect { sut_instance.parse(sample_event) }.to raise_error(/Cannot convert/) + end end # @ignore_bad_values From 5f83fcf9e6038bc62e965ff9adbc27c205fd1425 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 17:21:29 +0200 Subject: [PATCH 034/126] reformatted to setup-act-assert --- spec/outputs/event_parser_spec.rb | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index df9b6fa..c758c7d 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -17,14 +17,18 @@ describe "table name parsing" do it "leaves regular table names unchanged" do sut_instance = sut().new(default_opts.update({ "table" => "simple" })) + action = sut_instance.parse(sample_event) + expect(action["table"]).to(eq("simple")) end it "parses table names with data from the event" do sut_instance = sut().new(default_opts.update({ "table" => "%{[a_field]}" })) sample_event["a_field"] = "a_value" + action = sut_instance.parse(sample_event) + expect(action["table"]).to(eq("a_value")) end end @@ -45,7 +49,9 @@ it "maps the event key to the column" do sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column" }] })) sample_event["a_field"] = "a_value" + action = sut_instance.parse(sample_event) + expect(action["data"]["a_column"]).to(eq("a_value")) end @@ -53,7 +59,9 @@ sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column" }, { "event_key" => "another_field", "column_name" => "a_different_column" }] })) sample_event["a_field"] = "a_value" sample_event["another_field"] = "a_second_value" + action = sut_instance.parse(sample_event) + expect(action["data"]["a_column"]).to(eq("a_value")) expect(action["data"]["a_different_column"]).to(eq("a_second_value")) end @@ -62,7 +70,9 @@ sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "%{[pointer_to_another_field]}", "column_name" => "a_column" }] })) sample_event["pointer_to_another_field"] = "another_field" sample_event["another_field"] = "a_value" + action = sut_instance.parse(sample_event) + expect(action["data"]["a_column"]).to(eq("a_value")) end @@ -70,7 +80,9 @@ sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "%{[pointer_to_another_field]}" }] })) sample_event["a_field"] = "a_value" sample_event["pointer_to_another_field"] = "a_different_column" + action = sut_instance.parse(sample_event) + expect(action["data"]["a_different_column"]).to(eq("a_value")) end @@ -98,7 +110,9 @@ it "properly maps #{mapping[:name]} to #{mapping[:type]}" do sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => mapping[:name] }] })) sample_event["a_field"] = mapping[:value] + action = sut_instance.parse(sample_event) + expect(action["data"]["a_column"].to_s).to(eq(mapping[:value].to_s)) end } @@ -107,7 +121,9 @@ sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "set(int)" }] })) original_value = [ 1, 2, 3 ] sample_event["a_field"] = original_value + action = sut_instance.parse(sample_event) + expect(action["data"]["a_column"].to_a).to(eq(original_value)) end @@ -115,13 +131,16 @@ sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "%{[pointer_to_a_field]}" }] })) sample_event["a_field"] = "123" sample_event["pointer_to_a_field"] = "int" + action = sut_instance.parse(sample_event) + expect(action["data"]["a_column"]).to(eq(123)) end it "fails in case of an unknown type" do sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "what?!" }] })) sample_event["a_field"] = "a_value" + expect { sut_instance.parse(sample_event) }.to raise_error(/Unknown cassandra_type/) end end @@ -132,7 +151,9 @@ sut_instance = sut().new(default_opts.update({ "filter_transform_event_key" => "an_event_filter" })) sample_event["a_field"] = "a_value" sample_event["an_event_filter"] = [{ "event_key" => "a_field", "column_name" => "a_column" }] + action = sut_instance.parse(sample_event) + expect(action["data"]["a_column"]).to(eq("a_value")) end end @@ -143,7 +164,9 @@ sut_instance = sut().new(default_opts.update({ "hints" => {} })) sample_event["leave"] = "a_value" sample_event["@remove"] = "another_value" + action = sut_instance.parse(sample_event) + expect(action["data"]["leave"]).to(eq("a_value")) expect(action["data"]).not_to(include("@remove")) end @@ -152,7 +175,9 @@ sut_instance = sut().new(default_opts.update({ "hints" => {} })) expected_value = [ 1, 2, 3 ] sample_event["no_hint_here"] = expected_value + action = sut_instance.parse(sample_event) + expect(action["data"]["no_hint_here"]).to(equal(expected_value)) end @@ -161,7 +186,9 @@ original_set = [ 1, 2, 3 ] sample_event["a_set"] = original_set sample_event["an_int"] = "123" + action = sut_instance.parse(sample_event) + expect(action["data"]["a_set"]).to(be_a(Set)) expect(action["data"]["a_set"].to_a).to(eql(original_set)) expect(action["data"]["an_int"]).to(eql(123)) @@ -169,14 +196,18 @@ it "fails for unknown hint types" do sut_instance = sut().new(default_opts.update({ "hints" => { "a_field" => "not_a_real_type" } })) + sample_event["a_field"] = "a value" + expect { sut_instance.parse(sample_event) }.to raise_error(/Unknown cassandra_type/) end it "fails for unsuccessful hint conversion" do options = default_opts.update({ "hints" => { "a_field" => "int" } }) expect(options['logger']).to(receive(:error)) + sut_instance = sut().new(options) + sample_event["a_field"] = "i am not an int!!!" expect { sut_instance.parse(sample_event) }.to raise_error(/Cannot convert/) end From 9badc65725aa39ea3994f7c41d0d694ed42063ca Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 18:02:57 +0200 Subject: [PATCH 035/126] ignore_bad_values is turned on --- .../outputs/cassandra/event_parser.rb | 10 +-- spec/outputs/event_parser_spec.rb | 71 +++++++++++++------ 2 files changed, 53 insertions(+), 28 deletions(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index 3703c50..729a70a 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -80,8 +80,8 @@ def convert_value_to_cassandra_type(event_data, cassandra_type) rescue Exception => e error_message = "Cannot convert `value (`#{event_data}`) to `#{cassandra_type}` type" if @ignore_bad_values - case event_data - when 'int', 'varint', 'bigint', 'double', 'decimal', 'counter' + case cassandra_type + when 'int', 'varint', 'bigint', 'double', 'counter' typed_event_data = 0 when 'timeuuid' typed_event_data = generator.new("00000000-0000-0000-0000-000000000000") @@ -91,12 +91,6 @@ def convert_value_to_cassandra_type(event_data, cassandra_type) typed_event_data = generator.new("0.0.0.0") when 'float' typed_event_data = generator.new(0) - when 'boolean' - typed_event_data = generator.new(false) - when 'text', 'varchar', 'ascii' - typed_event_data = generator.new(0) - when 'blob' - typed_event_data = generator.new(nil) when /^set\((.*)\)$/ typed_event_data = generator.new([]) end diff --git a/spec/outputs/event_parser_spec.rb b/spec/outputs/event_parser_spec.rb index c758c7d..ea1dbd7 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/outputs/event_parser_spec.rb @@ -91,20 +91,20 @@ describe "cassandra type mapping" do [ { :name => 'timestamp', :type => ::Cassandra::Types::Timestamp, :value => Time::parse("1970-01-01 00:00:00") }, - { :name => 'inet', :type => ::Cassandra::Types::Inet, :value => "0.0.0.0" }, - { :name => 'float', :type => ::Cassandra::Types::Float, :value => "10.15" }, - { :name => 'varchar', :type => ::Cassandra::Types::Varchar, :value => "a varchar" }, - { :name => 'text', :type => ::Cassandra::Types::Text, :value => "some text" }, - { :name => 'blob', :type => ::Cassandra::Types::Blob, :value => "12345678" }, - { :name => 'ascii', :type => ::Cassandra::Types::Ascii, :value => "some ascii" }, - { :name => 'bigint', :type => ::Cassandra::Types::Bigint, :value => "100" }, - { :name => 'counter', :type => ::Cassandra::Types::Counter, :value => "15" }, - { :name => 'int', :type => ::Cassandra::Types::Int, :value => "123" }, - { :name => 'varint', :type => ::Cassandra::Types::Varint, :value => "345" }, - { :name => 'boolean', :type => ::Cassandra::Types::Boolean, :value => "true" }, - { :name => 'decimal', :type => ::Cassandra::Types::Decimal, :value => "0.12E2" }, - { :name => 'double', :type => ::Cassandra::Types::Double, :value => "123.65" }, - { :name => 'timeuuid', :type => ::Cassandra::Types::Timeuuid, :value => "00000000-0000-0000-0000-000000000000" } + { :name => 'inet', :type => ::Cassandra::Types::Inet, :value => "0.0.0.0" }, + { :name => 'float', :type => ::Cassandra::Types::Float, :value => "10.15" }, + { :name => 'varchar', :type => ::Cassandra::Types::Varchar, :value => "a varchar" }, + { :name => 'text', :type => ::Cassandra::Types::Text, :value => "some text" }, + { :name => 'blob', :type => ::Cassandra::Types::Blob, :value => "12345678" }, + { :name => 'ascii', :type => ::Cassandra::Types::Ascii, :value => "some ascii" }, + { :name => 'bigint', :type => ::Cassandra::Types::Bigint, :value => "100" }, + { :name => 'counter', :type => ::Cassandra::Types::Counter, :value => "15" }, + { :name => 'int', :type => ::Cassandra::Types::Int, :value => "123" }, + { :name => 'varint', :type => ::Cassandra::Types::Varint, :value => "345" }, + { :name => 'boolean', :type => ::Cassandra::Types::Boolean, :value => "true" }, + { :name => 'decimal', :type => ::Cassandra::Types::Decimal, :value => "0.12E2" }, + { :name => 'double', :type => ::Cassandra::Types::Double, :value => "123.65" }, + { :name => 'timeuuid', :type => ::Cassandra::Types::Timeuuid, :value => "00000000-0000-0000-0000-000000000000" } ].each { |mapping| # NOTE: this is not the best test there is, but it is the best / simplest I could think of :/ it "properly maps #{mapping[:name]} to #{mapping[:type]}" do @@ -204,7 +204,7 @@ it "fails for unsuccessful hint conversion" do options = default_opts.update({ "hints" => { "a_field" => "int" } }) - expect(options['logger']).to(receive(:error)) + expect(options["logger"]).to(receive(:error)) sut_instance = sut().new(options) @@ -213,9 +213,40 @@ end end - # @ignore_bad_values - # => fails on bad values if false - # => if true - # => defaults what it can - # => skips what it cant + describe "ignore_bad_values is turned on" do + [ + { :name => 'timestamp', :value => "i dont have to_time", :expected => Time::parse("1970-01-01 00:00:00") }, + { :name => 'inet', :value => "i am not an inet address", :expected => "0.0.0.0" }, + { :name => 'float', :value => "i am not a float", :expected => 0.0 }, + { :name => 'bigint', :value => "i am not a bigint", :expected => 0 }, + { :name => 'counter', :value => "i am not a counter", :expected => 0 }, + { :name => 'int', :value => "i am not a int", :expected => 0 }, + { :name => 'varint', :value => "i am not a varint", :expected => 0 }, + { :name => 'double', :value => "i am not a double", :expected => 0 }, + { :name => 'timeuuid', :value => "i am not a timeuuid", :expected => "00000000-0000-0000-0000-000000000000" } + ].each { |mapping| + # NOTE: this is not the best test there is, but it is the best / simplest I could think of :/ + it "properly defaults #{mapping[:name]}" do + options = default_opts.update({ "ignore_bad_values" => true, "hints" => { "a_field" => mapping[:name] } }) + expect(options["logger"]).to(receive(:warn)) + sut_instance = sut().new(options) + sample_event["a_field"] = mapping[:value] + + action = sut_instance.parse(sample_event) + + expect(action["data"]["a_field"].to_s).to(eq(mapping[:expected].to_s)) + end + } + + it "properly default sets" do + options = default_opts.update({ "ignore_bad_values" => true, "hints" => { "a_field" => "set(float)" } }) + expect(options["logger"]).to(receive(:warn)) + sut_instance = sut().new(options) + sample_event["a_field"] = "i am not a set" + + action = sut_instance.parse(sample_event) + + expect(action["data"]["a_field"].to_a).to(eq([])) + end + end end From a24cb1e251f2cd147ed90406d60987d234d2d89e Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 18:06:27 +0200 Subject: [PATCH 036/126] moved files to the unt test folder --- spec/{ => unit}/outputs/cassandra_spec.rb | 2 +- spec/{ => unit}/outputs/event_parser_spec.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename spec/{ => unit}/outputs/cassandra_spec.rb (56%) rename spec/{ => unit}/outputs/event_parser_spec.rb (99%) diff --git a/spec/outputs/cassandra_spec.rb b/spec/unit/outputs/cassandra_spec.rb similarity index 56% rename from spec/outputs/cassandra_spec.rb rename to spec/unit/outputs/cassandra_spec.rb index 40c2492..e1e1959 100644 --- a/spec/outputs/cassandra_spec.rb +++ b/spec/unit/outputs/cassandra_spec.rb @@ -1,3 +1,3 @@ # encoding: utf-8 -require_relative "../cassandra_spec_helper" +require_relative "../../cassandra_spec_helper" require "logstash/outputs/cassandra_output" diff --git a/spec/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb similarity index 99% rename from spec/outputs/event_parser_spec.rb rename to spec/unit/outputs/event_parser_spec.rb index ea1dbd7..deb4acb 100644 --- a/spec/outputs/event_parser_spec.rb +++ b/spec/unit/outputs/event_parser_spec.rb @@ -1,5 +1,5 @@ # encoding: utf-8 -require_relative "../cassandra_spec_helper" +require_relative "../../cassandra_spec_helper" require "logstash/outputs/cassandra/event_parser" RSpec.describe LogStash::Outputs::Cassandra::EventParser do From ebb78a421296495f67ff61ab4b1ded92918c1a68 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 18:09:16 +0200 Subject: [PATCH 037/126] added empty spec for SafeSubmitter --- spec/unit/outputs/safe_submitter_spec.rb | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 spec/unit/outputs/safe_submitter_spec.rb diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb new file mode 100644 index 0000000..6ee5cdb --- /dev/null +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -0,0 +1,7 @@ +# encoding: utf-8 +require_relative "../../cassandra_spec_helper" +require "logstash/outputs/cassandra/safe_submitter" + +RSpec.describe LogStash::Outputs::Cassandra::SafeSubmitter do + +end From 8378ab54d8849ea2a9ed8769253ad9f94f804dd9 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 8 Mar 2016 18:14:28 +0200 Subject: [PATCH 038/126] added stub files for the casandra output tests --- lib/logstash/outputs/cassandra/safe_submitter.rb | 2 +- spec/integration/outputs/cassandra_spec.rb | 5 +++++ spec/unit/outputs/cassandra_spec.rb | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 spec/integration/outputs/cassandra_spec.rb diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb index ca28107..7b5e547 100644 --- a/lib/logstash/outputs/cassandra/safe_submitter.rb +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -8,7 +8,7 @@ def initialize(logger, username, password, hosts, consistency, request_timeout, @statement_cache = {} @logger = logger @keyspace = keyspace - setup_cassandra_session(logger, username, password, hosts, consistency, request_timeout, retry_policy, keyspace) + setup_cassandra_session(logger, username, password, hosts, consistency, request_timeout, retry_policy) end def submit(actions) diff --git a/spec/integration/outputs/cassandra_spec.rb b/spec/integration/outputs/cassandra_spec.rb new file mode 100644 index 0000000..7a1e7fb --- /dev/null +++ b/spec/integration/outputs/cassandra_spec.rb @@ -0,0 +1,5 @@ +# encoding: utf-8 +require_relative "../../cassandra_spec_helper" +require "logstash/outputs/cassandra_output" + +# TODO: add integration tests here (docker, longhorseman, et al) diff --git a/spec/unit/outputs/cassandra_spec.rb b/spec/unit/outputs/cassandra_spec.rb index e1e1959..4d4265e 100644 --- a/spec/unit/outputs/cassandra_spec.rb +++ b/spec/unit/outputs/cassandra_spec.rb @@ -1,3 +1,5 @@ # encoding: utf-8 require_relative "../../cassandra_spec_helper" require "logstash/outputs/cassandra_output" + +# TODO: add unit tests for the main cassandra output class From c9b386434b282fab36ad46b859baf78897dc8c3e Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 9 Mar 2016 08:43:46 +0200 Subject: [PATCH 039/126] changed all string to use double quotes for consistancy --- lib/logstash/outputs/cassandra/buffer.rb | 2 +- .../outputs/cassandra/event_parser.rb | 54 +++++++++---------- spec/unit/outputs/event_parser_spec.rb | 12 ++--- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/lib/logstash/outputs/cassandra/buffer.rb b/lib/logstash/outputs/cassandra/buffer.rb index ca247b7..bb4256c 100644 --- a/lib/logstash/outputs/cassandra/buffer.rb +++ b/lib/logstash/outputs/cassandra/buffer.rb @@ -1,4 +1,4 @@ -require 'concurrent' +require "concurrent" java_import java.util.concurrent.locks.ReentrantLock module LogStash; module Outputs; module Cassandra diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index 729a70a..241ca2c 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -6,13 +6,13 @@ module LogStash; module Outputs; module Cassandra class EventParser def initialize(opts) - @logger = opts['logger'] - @table = opts['table'] - @filter_transform_event_key = opts['filter_transform_event_key'] - assert_filter_transform_structure(opts['filter_transform']) if opts['filter_transform'] - @filter_transform = opts['filter_transform'] - @hints = opts['hints'] - @ignore_bad_values = opts['ignore_bad_values'] + @logger = opts["logger"] + @table = opts["table"] + @filter_transform_event_key = opts["filter_transform_event_key"] + assert_filter_transform_structure(opts["filter_transform"]) if opts["filter_transform"] + @filter_transform = opts["filter_transform"] + @hints = opts["hints"] + @ignore_bad_values = opts["ignore_bad_values"] end def parse(event) @@ -81,15 +81,15 @@ def convert_value_to_cassandra_type(event_data, cassandra_type) error_message = "Cannot convert `value (`#{event_data}`) to `#{cassandra_type}` type" if @ignore_bad_values case cassandra_type - when 'int', 'varint', 'bigint', 'double', 'counter' + when "int", "varint", "bigint", "double", "counter" typed_event_data = 0 - when 'timeuuid' + when "timeuuid" typed_event_data = generator.new("00000000-0000-0000-0000-000000000000") - when 'timestamp' + when "timestamp" typed_event_data = generator.new(Time::parse("1970-01-01 00:00:00")) - when 'inet' + when "inet" typed_event_data = generator.new("0.0.0.0") - when 'float' + when "float" typed_event_data = generator.new(0) when /^set\((.*)\)$/ typed_event_data = generator.new([]) @@ -105,35 +105,35 @@ def convert_value_to_cassandra_type(event_data, cassandra_type) def get_cassandra_type_generator(name) case name - when 'timestamp' + when "timestamp" return ::Cassandra::Types::Timestamp - when 'inet' + when "inet" return ::Cassandra::Types::Inet - when 'float' + when "float" return ::Cassandra::Types::Float - when 'varchar' + when "varchar" return ::Cassandra::Types::Varchar - when 'text' + when "text" return ::Cassandra::Types::Text - when 'blob' + when "blob" return ::Cassandra::Types::Blob - when 'ascii' + when "ascii" return ::Cassandra::Types::Ascii - when 'bigint' + when "bigint" return ::Cassandra::Types::Bigint - when 'counter' + when "counter" return ::Cassandra::Types::Counter - when 'int' + when "int" return ::Cassandra::Types::Int - when 'varint' + when "varint" return ::Cassandra::Types::Varint - when 'boolean' + when "boolean" return ::Cassandra::Types::Boolean - when 'decimal' + when "decimal" return ::Cassandra::Types::Decimal - when 'double' + when "double" return ::Cassandra::Types::Double - when 'timeuuid' + when "timeuuid" return ::Cassandra::Types::Timeuuid when /^set\((.*)\)$/ set_type = get_cassandra_type_generator($1) diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb index deb4acb..bce31c0 100644 --- a/spec/unit/outputs/event_parser_spec.rb +++ b/spec/unit/outputs/event_parser_spec.rb @@ -5,12 +5,12 @@ RSpec.describe LogStash::Outputs::Cassandra::EventParser do let(:sut) { LogStash::Outputs::Cassandra::EventParser } let(:default_opts) {{ - 'logger' => double(), - 'table' => 'dummy', - 'filter_transform_event_key' => nil, - 'filter_transform' => nil, - 'hints' => {}, - 'ignore_bad_values' => false + "logger" => double(), + "table" => 'dummy', + "filter_transform_event_key" => nil, + "filter_transform" => nil, + "hints" => {}, + "ignore_bad_values" => false }} let(:sample_event) { LogStash::Event.new("message" => "sample message here") } From 742b4db08c71ed72fd3f8d0330f7f9d77236a936 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 9 Mar 2016 08:44:38 +0200 Subject: [PATCH 040/126] added safe submitter test skeleton --- .../outputs/cassandra/safe_submitter.rb | 3 +-- spec/unit/outputs/safe_submitter_spec.rb | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb index 7b5e547..712eee2 100644 --- a/lib/logstash/outputs/cassandra/safe_submitter.rb +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -15,9 +15,8 @@ def submit(actions) begin batch = prepare_batch(actions) @session.execute(batch) - @logger.info("Batch sent successfully") rescue Exception => e - @logger.warn("Failed to send batch (error: #{e.to_s}).") + @logger.error("Failed to send batch to cassandra", :exception => e, :backtrace => e.backtrace) end end diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index 6ee5cdb..acaf6f1 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -3,5 +3,26 @@ require "logstash/outputs/cassandra/safe_submitter" RSpec.describe LogStash::Outputs::Cassandra::SafeSubmitter do + let(:sut) { LogStash::Outputs::Cassandra::SafeSubmitter } + let(:default_opts) {{ + "logger" => double(), + "username" => "a user", + "password" => "a password", + "hosts" => "some host", + "consistency" => "one", + "request_timeout" => 10, + "retry_policy" => "default", + "keyspace" => "the final frontier" + }} + describe "init" do + it "properly inits the cassandra session" + it "supports the ... retry policy by passing ... as the retry_policy" + end + + describe "execution" do + it "prepares and executes the query" + it "caches the generated query" + it "does not confuse between a new query and cached queries" + end end From a4f7f4b52f2def12243e43cfd2974c756703b6b5 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 9 Mar 2016 08:53:46 +0200 Subject: [PATCH 041/126] changed opts to options --- lib/logstash/outputs/cassandra/event_parser.rb | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index 241ca2c..fdc84a0 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -5,14 +5,14 @@ module LogStash; module Outputs; module Cassandra class EventParser - def initialize(opts) - @logger = opts["logger"] - @table = opts["table"] - @filter_transform_event_key = opts["filter_transform_event_key"] - assert_filter_transform_structure(opts["filter_transform"]) if opts["filter_transform"] - @filter_transform = opts["filter_transform"] - @hints = opts["hints"] - @ignore_bad_values = opts["ignore_bad_values"] + def initialize(options) + @logger = options["logger"] + @table = options["table"] + @filter_transform_event_key = options["filter_transform_event_key"] + assert_filter_transform_structure(options["filter_transform"]) if options["filter_transform"] + @filter_transform = options["filter_transform"] + @hints = options["hints"] + @ignore_bad_values = options["ignore_bad_values"] end def parse(event) From 45d1bcf5025feffd315a02354b084e407b2ce8da Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 9 Mar 2016 09:16:37 +0200 Subject: [PATCH 042/126] utf 8 headers for all --- lib/logstash/outputs/cassandra/buffer.rb | 1 + lib/logstash/outputs/cassandra/event_parser.rb | 1 - spec/unit/outputs/buffer_spec.rb | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/logstash/outputs/cassandra/buffer.rb b/lib/logstash/outputs/cassandra/buffer.rb index bb4256c..fb045bd 100644 --- a/lib/logstash/outputs/cassandra/buffer.rb +++ b/lib/logstash/outputs/cassandra/buffer.rb @@ -1,3 +1,4 @@ +# encoding: utf-8 require "concurrent" java_import java.util.concurrent.locks.ReentrantLock diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index fdc84a0..4b30cae 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -2,7 +2,6 @@ require "time" require "cassandra" - module LogStash; module Outputs; module Cassandra class EventParser def initialize(options) diff --git a/spec/unit/outputs/buffer_spec.rb b/spec/unit/outputs/buffer_spec.rb index 9fe311f..5165b20 100644 --- a/spec/unit/outputs/buffer_spec.rb +++ b/spec/unit/outputs/buffer_spec.rb @@ -1,3 +1,4 @@ +# encoding: utf-8 require "logstash/outputs/cassandra/buffer" require "cabin" From 0063e9921c720fefea29f40667f4b647f855534a Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 9 Mar 2016 09:35:13 +0200 Subject: [PATCH 043/126] properly inits the cassandra session --- .../outputs/cassandra/safe_submitter.rb | 29 +++++++++---------- lib/logstash/outputs/cassandra_output.rb | 1 + spec/unit/outputs/safe_submitter_spec.rb | 26 +++++++++++++++-- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb index 712eee2..4b23af3 100644 --- a/lib/logstash/outputs/cassandra/safe_submitter.rb +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -1,14 +1,13 @@ # encoding: utf-8 require "cassandra" - module LogStash; module Outputs; module Cassandra class SafeSubmitter - def initialize(logger, username, password, hosts, consistency, request_timeout, retry_policy, keyspace) + def initialize(options) @statement_cache = {} - @logger = logger - @keyspace = keyspace - setup_cassandra_session(logger, username, password, hosts, consistency, request_timeout, retry_policy) + @logger = options["logger"] + @keyspace = options["keyspace"] + setup_cassandra_session(options) end def submit(actions) @@ -21,19 +20,17 @@ def submit(actions) end private - def setup_cassandra_session(logger, username, password, hosts, consistency, request_timeout, retry_policy) - cluster = ::Cassandra.cluster( - username: username, - password: password, - hosts: hosts, - consistency: consistency.to_sym, - timeout: request_timeout, - retry_policy: get_retry_policy(retry_policy), - logger: logger + def setup_cassandra_session(options) + cluster = options["cassandra"].cluster( + username: options["username"], + password: options["password"], + hosts: options["hosts"], + consistency: options["consistency"].to_sym, + timeout: options["request_timeout"], + retry_policy: get_retry_policy(options["retry_policy"]), + logger: options["logger"] ) @session = cluster.connect(@keyspace) - @logger.info("New cassandra session created", - :username => username, :hosts => hosts, :keyspace => @keyspace) end def get_retry_policy(policy_name) diff --git a/lib/logstash/outputs/cassandra_output.rb b/lib/logstash/outputs/cassandra_output.rb index ffc50b7..c18779a 100644 --- a/lib/logstash/outputs/cassandra_output.rb +++ b/lib/logstash/outputs/cassandra_output.rb @@ -4,6 +4,7 @@ require "logstash/outputs/cassandra/buffer" require "logstash/outputs/cassandra/event_parser" require "logstash/outputs/cassandra/safe_submitter" +require "cassandra" class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index acaf6f1..675eb47 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -4,19 +4,41 @@ RSpec.describe LogStash::Outputs::Cassandra::SafeSubmitter do let(:sut) { LogStash::Outputs::Cassandra::SafeSubmitter } - let(:default_opts) {{ + let(:default_options) {{ "logger" => double(), + "cassandra" => double(), "username" => "a user", "password" => "a password", "hosts" => "some host", "consistency" => "one", "request_timeout" => 10, "retry_policy" => "default", + "concrete_retry_policy" => ::Cassandra::Retry::Policies::Default, "keyspace" => "the final frontier" }} describe "init" do - it "properly inits the cassandra session" + def setup_cassandra_double(options) + session_double = double() + cluster_double = double() + expect(cluster_double).to(receive(:connect)).with(options["keyspace"]).and_return(session_double) + expect(options["cassandra"]).to(receive(:cluster).with( + username: options["username"], + password: options["password"], + hosts: options["hosts"], + consistency: options["consistency"].to_sym, + timeout: options["request_timeout"], + retry_policy: options["concrete_retry_policy"], + logger: options["logger"] + )).and_return(cluster_double) + end + + it "properly inits the cassandra session" do + setup_cassandra_double(default_options) + + sut.new(default_options) + end + it "supports the ... retry policy by passing ... as the retry_policy" end From 6c790482c4628f0639f20971bafe2098e60f6a1b Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 9 Mar 2016 09:38:16 +0200 Subject: [PATCH 044/126] remove parantheses from call to sut --- spec/unit/outputs/event_parser_spec.rb | 40 +++++++++++++------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb index bce31c0..49b7168 100644 --- a/spec/unit/outputs/event_parser_spec.rb +++ b/spec/unit/outputs/event_parser_spec.rb @@ -16,7 +16,7 @@ describe "table name parsing" do it "leaves regular table names unchanged" do - sut_instance = sut().new(default_opts.update({ "table" => "simple" })) + sut_instance = sut.new(default_opts.update({ "table" => "simple" })) action = sut_instance.parse(sample_event) @@ -24,7 +24,7 @@ end it "parses table names with data from the event" do - sut_instance = sut().new(default_opts.update({ "table" => "%{[a_field]}" })) + sut_instance = sut.new(default_opts.update({ "table" => "%{[a_field]}" })) sample_event["a_field"] = "a_value" action = sut_instance.parse(sample_event) @@ -37,17 +37,17 @@ describe "from config" do describe "malformed configurations" do it "fails if the transform has no event_data setting" do - expect { sut().new(default_opts.update({ "filter_transform" => [{ "column_name" => "" }] })) }.to raise_error(/item is incorrectly configured/) + expect { sut.new(default_opts.update({ "filter_transform" => [{ "column_name" => "" }] })) }.to raise_error(/item is incorrectly configured/) end it "fails if the transform has no column_name setting" do - expect { sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "" }] })) }.to raise_error(/item is incorrectly configured/) + expect { sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "" }] })) }.to raise_error(/item is incorrectly configured/) end end describe "properly configured" do it "maps the event key to the column" do - sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column" }] })) + sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column" }] })) sample_event["a_field"] = "a_value" action = sut_instance.parse(sample_event) @@ -56,7 +56,7 @@ end it "works with multiple filter transforms" do - sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column" }, { "event_key" => "another_field", "column_name" => "a_different_column" }] })) + sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column" }, { "event_key" => "another_field", "column_name" => "a_different_column" }] })) sample_event["a_field"] = "a_value" sample_event["another_field"] = "a_second_value" @@ -67,7 +67,7 @@ end it "allows for event specific event keys" do - sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "%{[pointer_to_another_field]}", "column_name" => "a_column" }] })) + sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "%{[pointer_to_another_field]}", "column_name" => "a_column" }] })) sample_event["pointer_to_another_field"] = "another_field" sample_event["another_field"] = "a_value" @@ -77,7 +77,7 @@ end it "allows for event specific column names" do - sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "%{[pointer_to_another_field]}" }] })) + sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "%{[pointer_to_another_field]}" }] })) sample_event["a_field"] = "a_value" sample_event["pointer_to_another_field"] = "a_different_column" @@ -108,7 +108,7 @@ ].each { |mapping| # NOTE: this is not the best test there is, but it is the best / simplest I could think of :/ it "properly maps #{mapping[:name]} to #{mapping[:type]}" do - sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => mapping[:name] }] })) + sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => mapping[:name] }] })) sample_event["a_field"] = mapping[:value] action = sut_instance.parse(sample_event) @@ -118,7 +118,7 @@ } it "properly maps sets to their specific set types" do - sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "set(int)" }] })) + sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "set(int)" }] })) original_value = [ 1, 2, 3 ] sample_event["a_field"] = original_value @@ -128,7 +128,7 @@ end it "allows for event specific cassandra types" do - sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "%{[pointer_to_a_field]}" }] })) + sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "%{[pointer_to_a_field]}" }] })) sample_event["a_field"] = "123" sample_event["pointer_to_a_field"] = "int" @@ -138,7 +138,7 @@ end it "fails in case of an unknown type" do - sut_instance = sut().new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "what?!" }] })) + sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "what?!" }] })) sample_event["a_field"] = "a_value" expect { sut_instance.parse(sample_event) }.to raise_error(/Unknown cassandra_type/) @@ -148,7 +148,7 @@ describe "from event" do it "obtains the filter transform from the event if defined" do - sut_instance = sut().new(default_opts.update({ "filter_transform_event_key" => "an_event_filter" })) + sut_instance = sut.new(default_opts.update({ "filter_transform_event_key" => "an_event_filter" })) sample_event["a_field"] = "a_value" sample_event["an_event_filter"] = [{ "event_key" => "a_field", "column_name" => "a_column" }] @@ -161,7 +161,7 @@ describe "hints" do it "removes fields starting with @" do - sut_instance = sut().new(default_opts.update({ "hints" => {} })) + sut_instance = sut.new(default_opts.update({ "hints" => {} })) sample_event["leave"] = "a_value" sample_event["@remove"] = "another_value" @@ -172,7 +172,7 @@ end it "does not attempt to change items with no hints" do - sut_instance = sut().new(default_opts.update({ "hints" => {} })) + sut_instance = sut.new(default_opts.update({ "hints" => {} })) expected_value = [ 1, 2, 3 ] sample_event["no_hint_here"] = expected_value @@ -182,7 +182,7 @@ end it "converts items with hints" do - sut_instance = sut().new(default_opts.update({ "hints" => { "a_set" => "set(int)", "an_int" => "int" } })) + sut_instance = sut.new(default_opts.update({ "hints" => { "a_set" => "set(int)", "an_int" => "int" } })) original_set = [ 1, 2, 3 ] sample_event["a_set"] = original_set sample_event["an_int"] = "123" @@ -195,7 +195,7 @@ end it "fails for unknown hint types" do - sut_instance = sut().new(default_opts.update({ "hints" => { "a_field" => "not_a_real_type" } })) + sut_instance = sut.new(default_opts.update({ "hints" => { "a_field" => "not_a_real_type" } })) sample_event["a_field"] = "a value" @@ -206,7 +206,7 @@ options = default_opts.update({ "hints" => { "a_field" => "int" } }) expect(options["logger"]).to(receive(:error)) - sut_instance = sut().new(options) + sut_instance = sut.new(options) sample_event["a_field"] = "i am not an int!!!" expect { sut_instance.parse(sample_event) }.to raise_error(/Cannot convert/) @@ -229,7 +229,7 @@ it "properly defaults #{mapping[:name]}" do options = default_opts.update({ "ignore_bad_values" => true, "hints" => { "a_field" => mapping[:name] } }) expect(options["logger"]).to(receive(:warn)) - sut_instance = sut().new(options) + sut_instance = sut.new(options) sample_event["a_field"] = mapping[:value] action = sut_instance.parse(sample_event) @@ -241,7 +241,7 @@ it "properly default sets" do options = default_opts.update({ "ignore_bad_values" => true, "hints" => { "a_field" => "set(float)" } }) expect(options["logger"]).to(receive(:warn)) - sut_instance = sut().new(options) + sut_instance = sut.new(options) sample_event["a_field"] = "i am not a set" action = sut_instance.parse(sample_event) From b2eedfa301bd8045d6810d4833db8d5241d3f810 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 9 Mar 2016 09:39:56 +0200 Subject: [PATCH 045/126] switched remaining single quotes to double quotes --- spec/unit/outputs/event_parser_spec.rb | 50 +++++++++++++------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb index 49b7168..8c11e7e 100644 --- a/spec/unit/outputs/event_parser_spec.rb +++ b/spec/unit/outputs/event_parser_spec.rb @@ -6,7 +6,7 @@ let(:sut) { LogStash::Outputs::Cassandra::EventParser } let(:default_opts) {{ "logger" => double(), - "table" => 'dummy', + "table" => "dummy", "filter_transform_event_key" => nil, "filter_transform" => nil, "hints" => {}, @@ -90,21 +90,21 @@ describe "cassandra type mapping" do [ - { :name => 'timestamp', :type => ::Cassandra::Types::Timestamp, :value => Time::parse("1970-01-01 00:00:00") }, - { :name => 'inet', :type => ::Cassandra::Types::Inet, :value => "0.0.0.0" }, - { :name => 'float', :type => ::Cassandra::Types::Float, :value => "10.15" }, - { :name => 'varchar', :type => ::Cassandra::Types::Varchar, :value => "a varchar" }, - { :name => 'text', :type => ::Cassandra::Types::Text, :value => "some text" }, - { :name => 'blob', :type => ::Cassandra::Types::Blob, :value => "12345678" }, - { :name => 'ascii', :type => ::Cassandra::Types::Ascii, :value => "some ascii" }, - { :name => 'bigint', :type => ::Cassandra::Types::Bigint, :value => "100" }, - { :name => 'counter', :type => ::Cassandra::Types::Counter, :value => "15" }, - { :name => 'int', :type => ::Cassandra::Types::Int, :value => "123" }, - { :name => 'varint', :type => ::Cassandra::Types::Varint, :value => "345" }, - { :name => 'boolean', :type => ::Cassandra::Types::Boolean, :value => "true" }, - { :name => 'decimal', :type => ::Cassandra::Types::Decimal, :value => "0.12E2" }, - { :name => 'double', :type => ::Cassandra::Types::Double, :value => "123.65" }, - { :name => 'timeuuid', :type => ::Cassandra::Types::Timeuuid, :value => "00000000-0000-0000-0000-000000000000" } + { :name => "timestamp", :type => ::Cassandra::Types::Timestamp, :value => Time::parse("1970-01-01 00:00:00") }, + { :name => "inet", :type => ::Cassandra::Types::Inet, :value => "0.0.0.0" }, + { :name => "float", :type => ::Cassandra::Types::Float, :value => "10.15" }, + { :name => "varchar", :type => ::Cassandra::Types::Varchar, :value => "a varchar" }, + { :name => "text", :type => ::Cassandra::Types::Text, :value => "some text" }, + { :name => "blob", :type => ::Cassandra::Types::Blob, :value => "12345678" }, + { :name => "ascii", :type => ::Cassandra::Types::Ascii, :value => "some ascii" }, + { :name => "bigint", :type => ::Cassandra::Types::Bigint, :value => "100" }, + { :name => "counter", :type => ::Cassandra::Types::Counter, :value => "15" }, + { :name => "int", :type => ::Cassandra::Types::Int, :value => "123" }, + { :name => "varint", :type => ::Cassandra::Types::Varint, :value => "345" }, + { :name => "boolean", :type => ::Cassandra::Types::Boolean, :value => "true" }, + { :name => "decimal", :type => ::Cassandra::Types::Decimal, :value => "0.12E2" }, + { :name => "double", :type => ::Cassandra::Types::Double, :value => "123.65" }, + { :name => "timeuuid", :type => ::Cassandra::Types::Timeuuid, :value => "00000000-0000-0000-0000-000000000000" } ].each { |mapping| # NOTE: this is not the best test there is, but it is the best / simplest I could think of :/ it "properly maps #{mapping[:name]} to #{mapping[:type]}" do @@ -215,15 +215,15 @@ describe "ignore_bad_values is turned on" do [ - { :name => 'timestamp', :value => "i dont have to_time", :expected => Time::parse("1970-01-01 00:00:00") }, - { :name => 'inet', :value => "i am not an inet address", :expected => "0.0.0.0" }, - { :name => 'float', :value => "i am not a float", :expected => 0.0 }, - { :name => 'bigint', :value => "i am not a bigint", :expected => 0 }, - { :name => 'counter', :value => "i am not a counter", :expected => 0 }, - { :name => 'int', :value => "i am not a int", :expected => 0 }, - { :name => 'varint', :value => "i am not a varint", :expected => 0 }, - { :name => 'double', :value => "i am not a double", :expected => 0 }, - { :name => 'timeuuid', :value => "i am not a timeuuid", :expected => "00000000-0000-0000-0000-000000000000" } + { :name => "timestamp", :value => "i dont have to_time", :expected => Time::parse("1970-01-01 00:00:00") }, + { :name => "inet", :value => "i am not an inet address", :expected => "0.0.0.0" }, + { :name => "float", :value => "i am not a float", :expected => 0.0 }, + { :name => "bigint", :value => "i am not a bigint", :expected => 0 }, + { :name => "counter", :value => "i am not a counter", :expected => 0 }, + { :name => "int", :value => "i am not a int", :expected => 0 }, + { :name => "varint", :value => "i am not a varint", :expected => 0 }, + { :name => "double", :value => "i am not a double", :expected => 0 }, + { :name => "timeuuid", :value => "i am not a timeuuid", :expected => "00000000-0000-0000-0000-000000000000" } ].each { |mapping| # NOTE: this is not the best test there is, but it is the best / simplest I could think of :/ it "properly defaults #{mapping[:name]}" do From 1043dd5b0c71f3310a112e44c1800008e9d6d0d6 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 9 Mar 2016 09:44:39 +0200 Subject: [PATCH 046/126] all retry policies --- spec/unit/outputs/safe_submitter_spec.rb | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index 675eb47..801e6f1 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -39,7 +39,18 @@ def setup_cassandra_double(options) sut.new(default_options) end - it "supports the ... retry policy by passing ... as the retry_policy" + [ + { :name => "default", :concrete_retry_policy => ::Cassandra::Retry::Policies::Default }, + { :name => "downgrading_consistency", :concrete_retry_policy => ::Cassandra::Retry::Policies::DowngradingConsistency }, + { :name => "failthrough", :concrete_retry_policy => ::Cassandra::Retry::Policies::Fallthrough } + ].each { |mapping| + it "supports the #{mapping["class"]} retry policy by passing #{mapping["name"]} as the retry_policy" do + options = default_options.update({ "retry_policy" => mapping[:name], "concrete_retry_policy" => mapping[:concrete_retry_policy] }) + setup_cassandra_double(options) + + sut.new(options) + end + } end describe "execution" do From 47b4c6ea453928151de0af094ea748324f23a3bb Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 9 Mar 2016 10:46:00 +0200 Subject: [PATCH 047/126] prepares and executes the query --- .../outputs/cassandra/safe_submitter.rb | 5 +- spec/unit/outputs/safe_submitter_spec.rb | 71 ++++++++++++++----- 2 files changed, 56 insertions(+), 20 deletions(-) diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb index 4b23af3..231e011 100644 --- a/lib/logstash/outputs/cassandra/safe_submitter.rb +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -47,8 +47,9 @@ def get_retry_policy(policy_name) def prepare_batch(actions) statement_and_values = [] for action in actions - query = "INSERT INTO #{@keyspace}.#{action["table"]} (#{action["data"].keys.join(', ')}) - VALUES (#{("?" * action["data"].keys.count).split(//) * ", "})" + query = +"INSERT INTO #{@keyspace}.#{action["table"]} (#{action["data"].keys.join(', ')}) +VALUES (#{("?" * action["data"].keys.count).split(//) * ", "})" if !@statement_cache.has_key?(query) @statement_cache[query] = @session.prepare(query) diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index 801e6f1..78e4411 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -17,24 +17,25 @@ "keyspace" => "the final frontier" }} - describe "init" do - def setup_cassandra_double(options) - session_double = double() - cluster_double = double() - expect(cluster_double).to(receive(:connect)).with(options["keyspace"]).and_return(session_double) - expect(options["cassandra"]).to(receive(:cluster).with( - username: options["username"], - password: options["password"], - hosts: options["hosts"], - consistency: options["consistency"].to_sym, - timeout: options["request_timeout"], - retry_policy: options["concrete_retry_policy"], - logger: options["logger"] - )).and_return(cluster_double) - end + def setup_session_double(options) + session_double = double() + cluster_double = double() + expect(cluster_double).to(receive(:connect)).with(options["keyspace"]).and_return(session_double) + expect(options["cassandra"]).to(receive(:cluster).with( + username: options["username"], + password: options["password"], + hosts: options["hosts"], + consistency: options["consistency"].to_sym, + timeout: options["request_timeout"], + retry_policy: options["concrete_retry_policy"], + logger: options["logger"] + )).and_return(cluster_double) + return { :session_double => session_double } + end + describe "init" do it "properly inits the cassandra session" do - setup_cassandra_double(default_options) + setup_session_double(default_options) sut.new(default_options) end @@ -46,7 +47,7 @@ def setup_cassandra_double(options) ].each { |mapping| it "supports the #{mapping["class"]} retry policy by passing #{mapping["name"]} as the retry_policy" do options = default_options.update({ "retry_policy" => mapping[:name], "concrete_retry_policy" => mapping[:concrete_retry_policy] }) - setup_cassandra_double(options) + setup_session_double(options) sut.new(options) end @@ -54,7 +55,41 @@ def setup_cassandra_double(options) end describe "execution" do - it "prepares and executes the query" + let(:one_action) {[{ + "table" => "a_table", + "data" => { + "a_column" => "a_value", + "another_column" => "another_value" + } + }]} + let(:expected_query_for_one_action) { "INSERT INTO the final frontier.a_table (a_column, another_column)\nVALUES (?, ?)" } + let(:another_action) {{ + "table" => "another_table", + "data" => { + "a_column" => "a_value", + "another_column" => "another_value", + "a_third_column" => "another_value" + } + }} + let(:expected_query_for_another_action) { "INSERT INTO the final frontier.another_table (a_column, another_column, a_third_column)\nVALUES (?, ?, ?)" } + + def setup_batch_and_session_doubles() + session_double = setup_session_double(default_options)[:session_double] + batch_double = double() + expect(session_double).to(receive(:batch).and_yield(batch_double)).and_return(batch_double) + expect(session_double).to(receive(:execute).with(batch_double)) + return { :batch_double => batch_double, :session_double => session_double } + end + + it "prepares and executes the query" do + doubles = setup_batch_and_session_doubles() + expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_one_action)).and_return("eureka") + expect(doubles[:batch_double]).to(receive(:add).with("eureka", ["a_value", "another_value"])) + sut_instance = sut.new(default_options) + + sut_instance.submit(one_action) + end + it "caches the generated query" it "does not confuse between a new query and cached queries" end From 48dad4b46c61612d6a0c9fe97374f369b7a655b7 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 9 Mar 2016 10:54:24 +0200 Subject: [PATCH 048/126] does not confuse between a new query and cached queries --- spec/unit/outputs/safe_submitter_spec.rb | 31 ++++++++++++++++++------ 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index 78e4411..dafd6cc 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -55,13 +55,13 @@ def setup_session_double(options) end describe "execution" do - let(:one_action) {[{ + let(:one_action) {{ "table" => "a_table", "data" => { "a_column" => "a_value", "another_column" => "another_value" } - }]} + }} let(:expected_query_for_one_action) { "INSERT INTO the final frontier.a_table (a_column, another_column)\nVALUES (?, ?)" } let(:another_action) {{ "table" => "another_table", @@ -76,8 +76,8 @@ def setup_session_double(options) def setup_batch_and_session_doubles() session_double = setup_session_double(default_options)[:session_double] batch_double = double() - expect(session_double).to(receive(:batch).and_yield(batch_double)).and_return(batch_double) - expect(session_double).to(receive(:execute).with(batch_double)) + expect(session_double).to(receive(:batch).and_yield(batch_double).at_least(:once)).and_return(batch_double) + expect(session_double).to(receive(:execute).with(batch_double).at_least(:once)) return { :batch_double => batch_double, :session_double => session_double } end @@ -87,10 +87,27 @@ def setup_batch_and_session_doubles() expect(doubles[:batch_double]).to(receive(:add).with("eureka", ["a_value", "another_value"])) sut_instance = sut.new(default_options) - sut_instance.submit(one_action) + sut_instance.submit([one_action]) + end + + it "caches the generated query" do + doubles = setup_batch_and_session_doubles() + expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_one_action).once).and_return("eureka") + expect(doubles[:batch_double]).to(receive(:add).with("eureka", ["a_value", "another_value"]).twice) + sut_instance = sut.new(default_options) + + sut_instance.submit([one_action, one_action]) end - it "caches the generated query" - it "does not confuse between a new query and cached queries" + it "does not confuse between a new query and cached queries" do + doubles = setup_batch_and_session_doubles() + expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_one_action).once).and_return("eureka") + expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_another_action).once).and_return("great scott") + expect(doubles[:batch_double]).to(receive(:add).with("eureka", ["a_value", "another_value"])) + expect(doubles[:batch_double]).to(receive(:add).with("great scott", ["a_value", "another_value", "another_value"])) + sut_instance = sut.new(default_options) + + sut_instance.submit([one_action, another_action]) + end end end From a1a25bdfb8f3d9649f0ba80e37ba8ae10bf6dd86 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 9 Mar 2016 10:58:26 +0200 Subject: [PATCH 049/126] updated cassandra output to use the new helper class interfaces --- lib/logstash/outputs/cassandra_output.rb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/logstash/outputs/cassandra_output.rb b/lib/logstash/outputs/cassandra_output.rb index c18779a..c057d5e 100644 --- a/lib/logstash/outputs/cassandra_output.rb +++ b/lib/logstash/outputs/cassandra_output.rb @@ -123,18 +123,22 @@ def close() private def setup_event_parser() @event_parser = ::LogStash::Outputs::Cassandra::EventParser.new( - @logger, @table, @filter_transform_event_key, @filter_transform, @hints, @ignore_bad_values + :logger => @logger, :table => @table, + :filter_transform_event_key => @filter_transform_event_key, :filter_transform => @filter_transform, + :hints => @hints, :ignore_bad_values => @ignore_bad_values ) end def setup_safe_submitter() @safe_submitter = ::LogStash::Outputs::Cassandra::SafeSubmitter.new( - @logger, @username, @password, @hosts, @consistency, @request_timeout, @retry_policy, @keyspace + :logger => @logger, :username => @username, :password => @password, :hosts => @hosts, + :consistency => @consistency, :request_timeout => @request_timeout, :retry_policy => @retry_policy, + :keyspace => @keyspace ) end def setup_buffer_and_handler - @buffer = ::LogStash::Outputs::CassandraOutput::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions| + @buffer = ::LogStash::Outputs::Cassandra::Buffer.new(@logger, @flush_size, @idle_flush_time) do |actions| @safe_submitter.submit(actions) end end From 47f1ef88d28ba863c08725f85c1ceb7c975cb0bf Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 9 Mar 2016 12:02:31 +0200 Subject: [PATCH 050/126] added port to config --- lib/logstash/outputs/cassandra/safe_submitter.rb | 1 + lib/logstash/outputs/cassandra_output.rb | 16 ++++++++++------ spec/unit/outputs/safe_submitter_spec.rb | 2 ++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb index 231e011..7995080 100644 --- a/lib/logstash/outputs/cassandra/safe_submitter.rb +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -25,6 +25,7 @@ def setup_cassandra_session(options) username: options["username"], password: options["password"], hosts: options["hosts"], + port: options["port"], consistency: options["consistency"].to_sym, timeout: options["request_timeout"], retry_policy: get_retry_policy(options["retry_policy"]), diff --git a/lib/logstash/outputs/cassandra_output.rb b/lib/logstash/outputs/cassandra_output.rb index c057d5e..0cca4df 100644 --- a/lib/logstash/outputs/cassandra_output.rb +++ b/lib/logstash/outputs/cassandra_output.rb @@ -15,6 +15,9 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base # List of Cassandra hostname(s) or IP-address(es) config :hosts, :validate => :array, :required => true + # The port cassandra is listening to + config :hosts, :validate => :integer, :default => 9042, :required => true + # Cassandra consistency level. # Options: "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one" # Default: "one" @@ -123,17 +126,18 @@ def close() private def setup_event_parser() @event_parser = ::LogStash::Outputs::Cassandra::EventParser.new( - :logger => @logger, :table => @table, - :filter_transform_event_key => @filter_transform_event_key, :filter_transform => @filter_transform, - :hints => @hints, :ignore_bad_values => @ignore_bad_values + "logger" => @logger, "table" => @table, + "filter_transform_event_key" => @filter_transform_event_key, "filter_transform" => @filter_transform, + "hints" => @hints, "ignore_bad_values" => @ignore_bad_values ) end def setup_safe_submitter() @safe_submitter = ::LogStash::Outputs::Cassandra::SafeSubmitter.new( - :logger => @logger, :username => @username, :password => @password, :hosts => @hosts, - :consistency => @consistency, :request_timeout => @request_timeout, :retry_policy => @retry_policy, - :keyspace => @keyspace + "logger" => @logger, "cassandra" => ::Cassandra.cluster, + "hosts" => @hosts, "port" => @port, "username" => @username, "password" => @password, + "consistency" => @consistency, "request_timeout" => @request_timeout, "retry_policy" => @retry_policy, + "keyspace" => @keyspace ) end diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index dafd6cc..93aac61 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -10,6 +10,7 @@ "username" => "a user", "password" => "a password", "hosts" => "some host", + "port" => 9042, "consistency" => "one", "request_timeout" => 10, "retry_policy" => "default", @@ -25,6 +26,7 @@ def setup_session_double(options) username: options["username"], password: options["password"], hosts: options["hosts"], + port: options["port"], consistency: options["consistency"].to_sym, timeout: options["request_timeout"], retry_policy: options["concrete_retry_policy"], From d403ca19237117b2994c81680ce89945f75089c0 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 9 Mar 2016 13:32:58 +0200 Subject: [PATCH 051/126] added base integratino with docker for integration tests --- logstash-output-cassandra.gemspec | 1 + spec/integration/outputs/cassandra_spec.rb | 10 +- .../integration/outputs/integration_helper.rb | 92 +++++++++++++++++++ 3 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 spec/integration/outputs/integration_helper.rb diff --git a/logstash-output-cassandra.gemspec b/logstash-output-cassandra.gemspec index 57dcad9..e354f97 100644 --- a/logstash-output-cassandra.gemspec +++ b/logstash-output-cassandra.gemspec @@ -23,5 +23,6 @@ Gem::Specification.new do |s| s.add_runtime_dependency "logstash-core", '>= 2.0.0', '< 3.0.0' s.add_runtime_dependency 'cassandra-driver', '>= 2.0.0', '< 3.0.0' s.add_development_dependency 'cabin', ['~> 0.6'] + s.add_development_dependency 'longshoreman' s.add_development_dependency 'logstash-devutils' end diff --git a/spec/integration/outputs/cassandra_spec.rb b/spec/integration/outputs/cassandra_spec.rb index 7a1e7fb..a7d5da2 100644 --- a/spec/integration/outputs/cassandra_spec.rb +++ b/spec/integration/outputs/cassandra_spec.rb @@ -1,5 +1,13 @@ # encoding: utf-8 -require_relative "../../cassandra_spec_helper" +require_relative "./integration_helper" require "logstash/outputs/cassandra_output" # TODO: add integration tests here (docker, longhorseman, et al) +describe "client create actions", :integration => true do + let(:session) { get_session() } + + it "does nothing" do + session = get_session() + "hmmm...." + end +end \ No newline at end of file diff --git a/spec/integration/outputs/integration_helper.rb b/spec/integration/outputs/integration_helper.rb new file mode 100644 index 0000000..7dbdc2b --- /dev/null +++ b/spec/integration/outputs/integration_helper.rb @@ -0,0 +1,92 @@ +# encoding: utf-8 +require_relative "../../cassandra_spec_helper" +require "longshoreman" +require "cassandra" + +CONTAINER_NAME = "logstash-output-cassandra-#{rand(999).to_s}" +CONTAINER_IMAGE = "cassandra" +CONTAINER_TAG = "2" + +module CassandraHelper + def get_host_ip + address = Longshoreman.new.get_host_ip + return address + end + + def get_port + container = Longshoreman::Container.new + container.get(CONTAINER_NAME) + port = container.rport(9042) + return port + end + + def get_session + cluster = ::Cassandra.cluster( + username: "cassandra", + password: "cassandra", + port: get_port(), + hosts: [get_host_ip()] + ) + session = cluster.connect() + return session + end +end + + +RSpec.configure do |config| + config.include CassandraHelper + + # this :all hook gets run before every describe block that is tagged with :integration => true. + config.before(:all, :integration => true) do + # check if container exists already before creating new one. + begin + ls = Longshoreman::new + ls.container.get(CONTAINER_NAME) + rescue Docker::Error::NotFoundError + create_retry = 0 + begin + Longshoreman.new("#{CONTAINER_IMAGE}:#{CONTAINER_TAG}", CONTAINER_NAME, { + 'HostConfig' => { + 'PublishAllPorts' => true + } + }) + connect_retry = 0 + begin + sleep(1) + get_session() + rescue ::Cassandra::Errors::NoHostsAvailable + connect_retry += 1 + if connect_retry <= 60 + retry + else + raise + end + end + rescue Docker::Error::NotFoundError + create_retry += 1 + if create_retry <= 2 + Longshoreman.pull_image(CONTAINER_IMAGE, CONTAINER_TAG) + retry + else + raise + end + end + end + end + + # we want to do a final cleanup after all :integration runs, + # but we don't want to clean up before the last block. + # This is a final blind check to see if the ES docker container is running and + # needs to be cleaned up. If no container can be found and/or docker is not + # running on the system, we do nothing. + config.after(:suite) do + # only cleanup docker container if system has docker and the container is running + begin + ls = Longshoreman::new + ls.container.get(CONTAINER_NAME) + ls.cleanup + rescue Docker::Error::NotFoundError, Excon::Errors::SocketError + # do nothing + end + end +end From 82e91907793565f0ef571012684bce3c353d1b4c Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 9 Mar 2016 13:40:18 +0200 Subject: [PATCH 052/126] a small refactoring --- spec/integration/outputs/cassandra_spec.rb | 2 +- spec/integration/outputs/integration_helper.rb | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/spec/integration/outputs/cassandra_spec.rb b/spec/integration/outputs/cassandra_spec.rb index a7d5da2..5cf562d 100644 --- a/spec/integration/outputs/cassandra_spec.rb +++ b/spec/integration/outputs/cassandra_spec.rb @@ -10,4 +10,4 @@ session = get_session() "hmmm...." end -end \ No newline at end of file +end diff --git a/spec/integration/outputs/integration_helper.rb b/spec/integration/outputs/integration_helper.rb index 7dbdc2b..bd3b8bd 100644 --- a/spec/integration/outputs/integration_helper.rb +++ b/spec/integration/outputs/integration_helper.rb @@ -52,19 +52,21 @@ def get_session }) connect_retry = 0 begin - sleep(1) get_session() rescue ::Cassandra::Errors::NoHostsAvailable + # retry connecting for a minute connect_retry += 1 if connect_retry <= 60 + sleep(1) retry else raise end end rescue Docker::Error::NotFoundError + # try to pull the image once if it does not exist create_retry += 1 - if create_retry <= 2 + if create_retry <= 1 Longshoreman.pull_image(CONTAINER_IMAGE, CONTAINER_TAG) retry else From 2a4e8e9ba46106c15b8c50a5e09ce33be717f3f0 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 9 Mar 2016 17:08:11 +0200 Subject: [PATCH 053/126] writting to cassandra works, now we need to assert properly --- lib/logstash/outputs/cassandra_output.rb | 8 ++- logstash-output-cassandra.gemspec | 1 + spec/integration/outputs/cassandra_spec.rb | 62 ++++++++++++++++++++-- 3 files changed, 64 insertions(+), 7 deletions(-) diff --git a/lib/logstash/outputs/cassandra_output.rb b/lib/logstash/outputs/cassandra_output.rb index 0cca4df..2a950d9 100644 --- a/lib/logstash/outputs/cassandra_output.rb +++ b/lib/logstash/outputs/cassandra_output.rb @@ -16,7 +16,7 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base config :hosts, :validate => :array, :required => true # The port cassandra is listening to - config :hosts, :validate => :integer, :default => 9042, :required => true + config :port, :validate => :number, :default => 9042, :required => true # Cassandra consistency level. # Options: "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one" @@ -123,6 +123,10 @@ def close() @buffer.stop() end + def flush + @buffer.flush() + end + private def setup_event_parser() @event_parser = ::LogStash::Outputs::Cassandra::EventParser.new( @@ -134,7 +138,7 @@ def setup_event_parser() def setup_safe_submitter() @safe_submitter = ::LogStash::Outputs::Cassandra::SafeSubmitter.new( - "logger" => @logger, "cassandra" => ::Cassandra.cluster, + "logger" => @logger, "cassandra" => ::Cassandra, "hosts" => @hosts, "port" => @port, "username" => @username, "password" => @password, "consistency" => @consistency, "request_timeout" => @request_timeout, "retry_policy" => @retry_policy, "keyspace" => @keyspace diff --git a/logstash-output-cassandra.gemspec b/logstash-output-cassandra.gemspec index e354f97..c1d4f0c 100644 --- a/logstash-output-cassandra.gemspec +++ b/logstash-output-cassandra.gemspec @@ -25,4 +25,5 @@ Gem::Specification.new do |s| s.add_development_dependency 'cabin', ['~> 0.6'] s.add_development_dependency 'longshoreman' s.add_development_dependency 'logstash-devutils' + s.add_development_dependency 'logstash-codec-plain' end diff --git a/spec/integration/outputs/cassandra_spec.rb b/spec/integration/outputs/cassandra_spec.rb index 5cf562d..28e0c01 100644 --- a/spec/integration/outputs/cassandra_spec.rb +++ b/spec/integration/outputs/cassandra_spec.rb @@ -2,12 +2,64 @@ require_relative "./integration_helper" require "logstash/outputs/cassandra_output" -# TODO: add integration tests here (docker, longhorseman, et al) describe "client create actions", :integration => true do - let(:session) { get_session() } + before(:all) do + get_session().execute("CREATE KEYSPACE test WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };") + get_session().execute(" + CREATE TABLE test.first( + text_column text, + timeuuid_column timeuuid, + int_column int, + PRIMARY KEY (text_column) + );") + get_session().execute(" + CREATE TABLE test.second( + text_column text, + timeuuid_column timeuuid, + int_column int, + PRIMARY KEY (text_column) + );") + end + + before(:each) do + get_session().execute("TRUNCATE test.first") + get_session().execute("TRUNCATE test.second") + end + + def get_sut() + options = { + "hosts" => [get_host_ip()], + "port" => get_port(), + "keyspace" => "test", + "table" => "%{[cassandra_table]}", + "username" => "cassandra", + "password" => "cassandra", + "filter_transform_event_key" => "cassandra_filter" + } + sut = LogStash::Outputs::CassandraOutput.new(options) + return sut + end + + # TODO: add integration tests here (docker, longhorseman, et al) + # pushing a single event + # pushing a set of events + # pushing to a few tables + it "properly creates a single event" do + sut = get_sut() + sut.register() + sut.receive(LogStash::Event.new( + "text_field" => "some text", + "timeuuid_field" => "00000000-0000-0000-0000-000000000000", + "int_field" => "345", + "cassandra_table" => "first", + "cassandra_filter" => [ + { "event_key" => "text_field", "column_name" => "text_column" }, + { "event_key" => "timeuuid_field", "column_name" => "timeuuid_column", "cassandra_type" => "timeuuid" }, + { "event_key" => "int_field", "column_name" => "int_column", "cassandra_type" => "int" } + ] )) + sut.flush() - it "does nothing" do - session = get_session() - "hmmm...." + result = get_session().execute("SELECT * FROM test.first") + print 'done' end end From b0cdf33dcd32089a31c8c6223edcc695ed4d88d3 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 9 Mar 2016 22:24:32 +0200 Subject: [PATCH 054/126] integration test - properly creates a single event --- .../outputs/cassandra/safe_submitter.rb | 5 +- spec/integration/outputs/cassandra_spec.rb | 59 +++++++++++-------- 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb index 7995080..c18b345 100644 --- a/lib/logstash/outputs/cassandra/safe_submitter.rb +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -6,7 +6,6 @@ class SafeSubmitter def initialize(options) @statement_cache = {} @logger = options["logger"] - @keyspace = options["keyspace"] setup_cassandra_session(options) end @@ -31,7 +30,7 @@ def setup_cassandra_session(options) retry_policy: get_retry_policy(options["retry_policy"]), logger: options["logger"] ) - @session = cluster.connect(@keyspace) + @session = cluster.connect(options["keyspace"]) end def get_retry_policy(policy_name) @@ -49,7 +48,7 @@ def prepare_batch(actions) statement_and_values = [] for action in actions query = -"INSERT INTO #{@keyspace}.#{action["table"]} (#{action["data"].keys.join(', ')}) +"INSERT INTO #{action["table"]} (#{action["data"].keys.join(', ')}) VALUES (#{("?" * action["data"].keys.count).split(//) * ", "})" if !@statement_cache.has_key?(query) diff --git a/spec/integration/outputs/cassandra_spec.rb b/spec/integration/outputs/cassandra_spec.rb index 28e0c01..a617e16 100644 --- a/spec/integration/outputs/cassandra_spec.rb +++ b/spec/integration/outputs/cassandra_spec.rb @@ -6,24 +6,35 @@ before(:all) do get_session().execute("CREATE KEYSPACE test WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };") get_session().execute(" - CREATE TABLE test.first( - text_column text, - timeuuid_column timeuuid, - int_column int, - PRIMARY KEY (text_column) + CREATE TABLE test.simple( + text_column text, + int_column int, + PRIMARY KEY (text_column) );") get_session().execute(" - CREATE TABLE test.second( - text_column text, - timeuuid_column timeuuid, - int_column int, - PRIMARY KEY (text_column) + CREATE TABLE test.complex( + timestamp_column timestamp, + inet_column inet, + float_column float, + varchar_column varchar, + text_column text, + blob_column blob, + ascii_column ascii, + bigint_column bigint, + int_column int, + varint_column varint, + boolean_column boolean, + decimal_column decimal, + double_column double, + timeuuid_column timeuuid, + set_column set, + PRIMARY KEY (text_column) );") end before(:each) do - get_session().execute("TRUNCATE test.first") - get_session().execute("TRUNCATE test.second") + get_session().execute("TRUNCATE test.simple") + get_session().execute("TRUNCATE test.complex") end def get_sut() @@ -40,26 +51,28 @@ def get_sut() return sut end - # TODO: add integration tests here (docker, longhorseman, et al) - # pushing a single event - # pushing a set of events - # pushing to a few tables it "properly creates a single event" do sut = get_sut() sut.register() sut.receive(LogStash::Event.new( "text_field" => "some text", - "timeuuid_field" => "00000000-0000-0000-0000-000000000000", "int_field" => "345", - "cassandra_table" => "first", + "cassandra_table" => "simple", "cassandra_filter" => [ - { "event_key" => "text_field", "column_name" => "text_column" }, - { "event_key" => "timeuuid_field", "column_name" => "timeuuid_column", "cassandra_type" => "timeuuid" }, - { "event_key" => "int_field", "column_name" => "int_column", "cassandra_type" => "int" } + { "event_key" => "text_field", "column_name" => "text_column" }, + { "event_key" => "int_field", "column_name" => "int_column", "cassandra_type" => "int" } ] )) sut.flush() - result = get_session().execute("SELECT * FROM test.first") - print 'done' + result = get_session().execute("SELECT * FROM test.simple") + expect(result.size).to((eq(1))) + result.each { |row| + expect(row["text_column"]).to(eq("some text")) + expect(row["int_column"]).to(eq(345)) + } end + + it "properly creates all column types" + it "properly works with counter columns" + it "properly adds multiple events to multiple tables in the same batch" end From fb0a67f826646413ee561ff39f1e54352c58af97 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 10 Mar 2016 11:47:07 +0200 Subject: [PATCH 055/126] changed set def format to match table creation in cassandra --- lib/logstash/outputs/cassandra/event_parser.rb | 2 +- spec/unit/outputs/event_parser_spec.rb | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index 4b30cae..e775c2f 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -134,7 +134,7 @@ def get_cassandra_type_generator(name) return ::Cassandra::Types::Double when "timeuuid" return ::Cassandra::Types::Timeuuid - when /^set\((.*)\)$/ + when /^set<(.*)>$/ set_type = get_cassandra_type_generator($1) return ::Cassandra::Types::Set.new(set_type) else diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb index 8c11e7e..65dc2e5 100644 --- a/spec/unit/outputs/event_parser_spec.rb +++ b/spec/unit/outputs/event_parser_spec.rb @@ -118,7 +118,7 @@ } it "properly maps sets to their specific set types" do - sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "set(int)" }] })) + sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "set" }] })) original_value = [ 1, 2, 3 ] sample_event["a_field"] = original_value @@ -182,7 +182,7 @@ end it "converts items with hints" do - sut_instance = sut.new(default_opts.update({ "hints" => { "a_set" => "set(int)", "an_int" => "int" } })) + sut_instance = sut.new(default_opts.update({ "hints" => { "a_set" => "set", "an_int" => "int" } })) original_set = [ 1, 2, 3 ] sample_event["a_set"] = original_set sample_event["an_int"] = "123" @@ -239,7 +239,7 @@ } it "properly default sets" do - options = default_opts.update({ "ignore_bad_values" => true, "hints" => { "a_field" => "set(float)" } }) + options = default_opts.update({ "ignore_bad_values" => true, "hints" => { "a_field" => "set" } }) expect(options["logger"]).to(receive(:warn)) sut_instance = sut.new(options) sample_event["a_field"] = "i am not a set" From b949012fc7fe468a5cf29cee4bd6c6ab7077cae7 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 10 Mar 2016 13:39:36 +0200 Subject: [PATCH 056/126] simple insert integration test working, now to fix --- spec/integration/outputs/cassandra_spec.rb | 100 ++++++++++++--------- 1 file changed, 57 insertions(+), 43 deletions(-) diff --git a/spec/integration/outputs/cassandra_spec.rb b/spec/integration/outputs/cassandra_spec.rb index a617e16..3219b27 100644 --- a/spec/integration/outputs/cassandra_spec.rb +++ b/spec/integration/outputs/cassandra_spec.rb @@ -3,38 +3,12 @@ require "logstash/outputs/cassandra_output" describe "client create actions", :integration => true do - before(:all) do + before(:each) do get_session().execute("CREATE KEYSPACE test WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };") - get_session().execute(" - CREATE TABLE test.simple( - text_column text, - int_column int, - PRIMARY KEY (text_column) - );") - get_session().execute(" - CREATE TABLE test.complex( - timestamp_column timestamp, - inet_column inet, - float_column float, - varchar_column varchar, - text_column text, - blob_column blob, - ascii_column ascii, - bigint_column bigint, - int_column int, - varint_column varint, - boolean_column boolean, - decimal_column decimal, - double_column double, - timeuuid_column timeuuid, - set_column set, - PRIMARY KEY (text_column) - );") end - before(:each) do - get_session().execute("TRUNCATE test.simple") - get_session().execute("TRUNCATE test.complex") + after(:each) do + get_session().execute("DROP KEYSPACE test;") end def get_sut() @@ -50,29 +24,69 @@ def get_sut() sut = LogStash::Outputs::CassandraOutput.new(options) return sut end + + def create_table(type_to_test) + get_session().execute(" + CREATE TABLE test.simple( + idish_column text, + value_column #{type_to_test[:type]}, + PRIMARY KEY (idish_column) + );") + end - it "properly creates a single event" do - sut = get_sut() - sut.register() - sut.receive(LogStash::Event.new( - "text_field" => "some text", - "int_field" => "345", + def build_event(type_to_test) + options = { "cassandra_table" => "simple", + "idish_field" => "some text", + "value_field" => type_to_test[:value], "cassandra_filter" => [ - { "event_key" => "text_field", "column_name" => "text_column" }, - { "event_key" => "int_field", "column_name" => "int_column", "cassandra_type" => "int" } - ] )) - sut.flush() - + { "event_key" => "idish_field", "column_name" => "idish_column" }, + { "event_key" => "value_field", "column_name" => "value_column", "cassandra_type" => type_to_test[:type] } + ] + } + event = LogStash::Event.new(options) + return event + end + + def assert_proper_insert(type_to_test) result = get_session().execute("SELECT * FROM test.simple") expect(result.size).to((eq(1))) result.each { |row| - expect(row["text_column"]).to(eq("some text")) - expect(row["int_column"]).to(eq(345)) + expect(row["idish_column"]).to(eq("some text")) + expect(row["value_column"].to_s).to(eq(type_to_test[:value].to_s)) } end - it "properly creates all column types" + [ + { type: "timestamp", value: 1457606758 }, + { type: "inet", value: "192.168.99.100" }, + { type: "float", value: "10.050000190734863" }, + { type: "varchar", value: "some chars" }, + { type: "text", value: "some text" }, + { type: "blob", value: "a blob" }, + { type: "ascii", value: "some ascii" }, + { type: "bigint", value: "123456789" }, + { type: "int", value: "12345" }, + { type: "varint", value: "12345678" }, + { type: "boolean", value: "true" }, + { type: "decimal", value: "0.1015E2" }, + { type: "double", value: "200.54" }, + { type: "timeuuid", value: "d2177dd0-eaa2-11de-a572-001b779c76e3" }, + { type: "set", value: ["d2177dd0-eaa2-11de-a572-001b779c76e3", "d2177dd0-eaa2-11de-a572-001b779c76e3", "d2177dd0-eaa2-11de-a572-001b779c76e3"] } + ].each { |type_to_test| + it "properly inserts data of type #{type_to_test[:type]}" do + create_table(type_to_test) + sut = get_sut() + sut.register() + event = build_event(type_to_test) + + sut.receive(event) + sut.flush() + + assert_proper_insert(type_to_test) + end + } + it "properly works with counter columns" it "properly adds multiple events to multiple tables in the same batch" end From 28a1a175e50e42cfa02ec7f6d10cb964dcaa7f8d Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 10 Mar 2016 13:44:28 +0200 Subject: [PATCH 057/126] added unit tests to properly cover the type conversion cases we want to cover --- spec/unit/outputs/event_parser_spec.rb | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb index 65dc2e5..0a850d1 100644 --- a/spec/unit/outputs/event_parser_spec.rb +++ b/spec/unit/outputs/event_parser_spec.rb @@ -91,6 +91,8 @@ describe "cassandra type mapping" do [ { :name => "timestamp", :type => ::Cassandra::Types::Timestamp, :value => Time::parse("1970-01-01 00:00:00") }, + { :name => "timestamp", :type => ::Cassandra::Types::Timestamp, :value => "1970-01-01 00:00:00" }, + { :name => "timestamp", :type => ::Cassandra::Types::Timestamp, :value => 1457606758 }, { :name => "inet", :type => ::Cassandra::Types::Inet, :value => "0.0.0.0" }, { :name => "float", :type => ::Cassandra::Types::Float, :value => "10.15" }, { :name => "varchar", :type => ::Cassandra::Types::Varchar, :value => "a varchar" }, @@ -127,6 +129,16 @@ expect(action["data"]["a_column"].to_a).to(eq(original_value)) end + it "properly maps sets to their specific set types for type which also require actual conversion" do + sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "set" }] })) + original_value = [ "00000000-0000-0000-0000-000000000000", "00000000-0000-0000-0000-000000000000", "00000000-0000-0000-0000-000000000000" ] + sample_event["a_field"] = original_value + + action = sut_instance.parse(sample_event) + + expect(action["data"]["a_column"].to_a).to(eq(original_value)) + end + it "allows for event specific cassandra types" do sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "%{[pointer_to_a_field]}" }] })) sample_event["a_field"] = "123" From 3d12159a06e13a9912fa64a9f35b5425fdb6ef38 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 10 Mar 2016 15:02:54 +0200 Subject: [PATCH 058/126] all types now properly converted :) --- .../outputs/cassandra/event_parser.rb | 74 ++++++++++--------- spec/integration/outputs/cassandra_spec.rb | 33 ++++++++- spec/unit/outputs/event_parser_spec.rb | 32 +++++--- 3 files changed, 91 insertions(+), 48 deletions(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index e775c2f..4eeda80 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -54,7 +54,7 @@ def add_event_value_from_filter_to_action(event, filter, action) event_data = event[event.sprintf(filter["event_key"])] if filter.has_key?("cassandra_type") cassandra_type = event.sprintf(filter["cassandra_type"]) - event_data = convert_value_to_cassandra_type(event_data, cassandra_type) + event_data = convert_value_to_cassandra_type_or_default_if_configured(event_data, cassandra_type) end column_name = event.sprintf(filter["column_name"]) action["data"][column_name] = event_data @@ -66,32 +66,27 @@ def add_event_data_using_configured_hints(event, action) action["data"].reject!{|key| %r{^@} =~ key} @hints.each do |event_key, cassandra_type| if action["data"].has_key?(event_key) - action["data"][event_key] = convert_value_to_cassandra_type(action["data"][event_key], cassandra_type) + action["data"][event_key] = convert_value_to_cassandra_type_or_default_if_configured(action["data"][event_key], cassandra_type) end end end - def convert_value_to_cassandra_type(event_data, cassandra_type) - generator = get_cassandra_type_generator(cassandra_type) + def convert_value_to_cassandra_type_or_default_if_configured(event_data, cassandra_type) typed_event_data = nil begin - typed_event_data = generator.new(event_data) + typed_event_data = convert_value_to_cassandra_type(event_data, cassandra_type) rescue Exception => e error_message = "Cannot convert `value (`#{event_data}`) to `#{cassandra_type}` type" if @ignore_bad_values case cassandra_type - when "int", "varint", "bigint", "double", "counter" - typed_event_data = 0 + when "float", "int", "varint", "bigint", "double", "counter", "timestamp" + typed_event_data = convert_value_to_cassandra_type(0, cassandra_type) when "timeuuid" - typed_event_data = generator.new("00000000-0000-0000-0000-000000000000") - when "timestamp" - typed_event_data = generator.new(Time::parse("1970-01-01 00:00:00")) + typed_event_data = convert_value_to_cassandra_type("00000000-0000-0000-0000-000000000000", cassandra_type) when "inet" - typed_event_data = generator.new("0.0.0.0") - when "float" - typed_event_data = generator.new(0) + typed_event_data = convert_value_to_cassandra_type("0.0.0.0", cassandra_type) when /^set\((.*)\)$/ - typed_event_data = generator.new([]) + typed_event_data = convert_value_to_cassandra_type([], cassandra_type) end @logger.warn(error_message, :exception => e, :backtrace => e.backtrace) else @@ -102,41 +97,54 @@ def convert_value_to_cassandra_type(event_data, cassandra_type) return typed_event_data end - def get_cassandra_type_generator(name) - case name + def convert_value_to_cassandra_type(event_data, cassandra_type) + case cassandra_type when "timestamp" - return ::Cassandra::Types::Timestamp + converted_value = event_data + if converted_value.is_a?(Numeric) + converted_value = Time.at(converted_value) + elsif converted_value.respond_to?(:to_s) + converted_value = Time::parse(event_data.to_s) + end + return ::Cassandra::Types::Timestamp.new(converted_value) when "inet" - return ::Cassandra::Types::Inet + return ::Cassandra::Types::Inet.new(event_data) when "float" - return ::Cassandra::Types::Float + return ::Cassandra::Types::Float.new(event_data) when "varchar" - return ::Cassandra::Types::Varchar + return ::Cassandra::Types::Varchar.new(event_data) when "text" - return ::Cassandra::Types::Text + return ::Cassandra::Types::Text.new(event_data) when "blob" - return ::Cassandra::Types::Blob + return ::Cassandra::Types::Blob.new(event_data) when "ascii" - return ::Cassandra::Types::Ascii + return ::Cassandra::Types::Ascii.new(event_data) when "bigint" - return ::Cassandra::Types::Bigint + return ::Cassandra::Types::Bigint.new(event_data) when "counter" - return ::Cassandra::Types::Counter + return ::Cassandra::Types::Counter.new(event_data) when "int" - return ::Cassandra::Types::Int + return ::Cassandra::Types::Int.new(event_data) when "varint" - return ::Cassandra::Types::Varint + return ::Cassandra::Types::Varint.new(event_data) when "boolean" - return ::Cassandra::Types::Boolean + return ::Cassandra::Types::Boolean.new(event_data) when "decimal" - return ::Cassandra::Types::Decimal + return ::Cassandra::Types::Decimal.new(event_data) when "double" - return ::Cassandra::Types::Double + return ::Cassandra::Types::Double.new(event_data) when "timeuuid" - return ::Cassandra::Types::Timeuuid + return ::Cassandra::Types::Timeuuid.new(event_data) when /^set<(.*)>$/ - set_type = get_cassandra_type_generator($1) - return ::Cassandra::Types::Set.new(set_type) + # convert each value + # then add all to an array and convert to set + converted_items = ::Set.new() + set_type = $1 + event_data.each { |item| + converted_item = convert_value_to_cassandra_type(item, set_type) + converted_items.add(converted_item) + } + return converted_items else raise "Unknown cassandra_type #{name}" end diff --git a/spec/integration/outputs/cassandra_spec.rb b/spec/integration/outputs/cassandra_spec.rb index 3219b27..3cff1bb 100644 --- a/spec/integration/outputs/cassandra_spec.rb +++ b/spec/integration/outputs/cassandra_spec.rb @@ -2,6 +2,25 @@ require_relative "./integration_helper" require "logstash/outputs/cassandra_output" +module Helper + def self.get_assert_timestamp_equallity() + Proc.new do |expect, row, type_to_test| + expect.call(row["value_column"].to_s).to(eq(Time.at(type_to_test[:value]).to_s)) + end + end + + def self.get_assert_set_equallity() + Proc.new do |expect, row, type_to_test| + set_from_cassandra = row["value_column"] + original_value = type_to_test[:value] + expect.call(set_from_cassandra.size).to(eq(original_value.size)) + set_from_cassandra.to_a.each { |item| + expect.call(original_value).to(include(item.to_s)) + } + end + end +end + describe "client create actions", :integration => true do before(:each) do get_session().execute("CREATE KEYSPACE test WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };") @@ -53,12 +72,19 @@ def assert_proper_insert(type_to_test) expect(result.size).to((eq(1))) result.each { |row| expect(row["idish_column"]).to(eq("some text")) - expect(row["value_column"].to_s).to(eq(type_to_test[:value].to_s)) + if type_to_test.has_key?(:assert_override) + expect_proc = Proc.new do |value| + return expect(value) + end + type_to_test[:assert_override].call(expect_proc, row, type_to_test) + else + expect(row["value_column"].to_s).to(eq(type_to_test[:value].to_s)) + end } end [ - { type: "timestamp", value: 1457606758 }, + { type: "timestamp", value: 1457606758, assert_override: Helper::get_assert_timestamp_equallity() }, { type: "inet", value: "192.168.99.100" }, { type: "float", value: "10.050000190734863" }, { type: "varchar", value: "some chars" }, @@ -72,7 +98,8 @@ def assert_proper_insert(type_to_test) { type: "decimal", value: "0.1015E2" }, { type: "double", value: "200.54" }, { type: "timeuuid", value: "d2177dd0-eaa2-11de-a572-001b779c76e3" }, - { type: "set", value: ["d2177dd0-eaa2-11de-a572-001b779c76e3", "d2177dd0-eaa2-11de-a572-001b779c76e3", "d2177dd0-eaa2-11de-a572-001b779c76e3"] } + { type: "set", + value: ["d2177dd0-eaa2-11de-a572-001b779c76e3", "d2177dd0-eaa2-11de-a572-001b779c76e4", "d2177dd0-eaa2-11de-a572-001b779c76e5"], assert_override: Helper::get_assert_set_equallity() } ].each { |type_to_test| it "properly inserts data of type #{type_to_test[:type]}" do create_table(type_to_test) diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb index 0a850d1..f856449 100644 --- a/spec/unit/outputs/event_parser_spec.rb +++ b/spec/unit/outputs/event_parser_spec.rb @@ -90,9 +90,9 @@ describe "cassandra type mapping" do [ - { :name => "timestamp", :type => ::Cassandra::Types::Timestamp, :value => Time::parse("1970-01-01 00:00:00") }, - { :name => "timestamp", :type => ::Cassandra::Types::Timestamp, :value => "1970-01-01 00:00:00" }, - { :name => "timestamp", :type => ::Cassandra::Types::Timestamp, :value => 1457606758 }, + { :name => "timestamp", :type => ::Cassandra::Types::Timestamp, :value => Time::parse("1979-07-27 00:00:00 +0300") }, + { :name => "timestamp", :type => ::Cassandra::Types::Timestamp, :value => "1982-05-04 00:00:00 +0300", expected: Time::parse("1982-05-04 00:00:00 +0300") }, + { :name => "timestamp", :type => ::Cassandra::Types::Timestamp, :value => 1457606758, expected: Time.at(1457606758) }, { :name => "inet", :type => ::Cassandra::Types::Inet, :value => "0.0.0.0" }, { :name => "float", :type => ::Cassandra::Types::Float, :value => "10.15" }, { :name => "varchar", :type => ::Cassandra::Types::Varchar, :value => "a varchar" }, @@ -115,7 +115,8 @@ action = sut_instance.parse(sample_event) - expect(action["data"]["a_column"].to_s).to(eq(mapping[:value].to_s)) + expected_value = mapping.has_key?(:expected) ? mapping[:expected] : mapping[:value] + expect(action["data"]["a_column"].to_s).to(eq(expected_value.to_s)) end } @@ -131,12 +132,15 @@ it "properly maps sets to their specific set types for type which also require actual conversion" do sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "set" }] })) - original_value = [ "00000000-0000-0000-0000-000000000000", "00000000-0000-0000-0000-000000000000", "00000000-0000-0000-0000-000000000000" ] + original_value = [ "00000000-0000-0000-0000-000000000000", "00000000-0000-0000-0000-000000000001", "00000000-0000-0000-0000-000000000002" ] sample_event["a_field"] = original_value action = sut_instance.parse(sample_event) - expect(action["data"]["a_column"].to_a).to(eq(original_value)) + expect(action["data"]["a_column"].size).to(eq(original_value.size)) + action["data"]["a_column"].to_a.each { |item| + expect(original_value).to(include(item.to_s)) + } end it "allows for event specific cassandra types" do @@ -150,10 +154,12 @@ end it "fails in case of an unknown type" do - sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "what?!" }] })) + options = default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "what?!" }] }) + sut_instance = sut.new(options) sample_event["a_field"] = "a_value" + expect(options["logger"]).to(receive(:error)) - expect { sut_instance.parse(sample_event) }.to raise_error(/Unknown cassandra_type/) + expect { sut_instance.parse(sample_event) }.to raise_error(/Cannot convert/) end end end @@ -207,11 +213,13 @@ end it "fails for unknown hint types" do - sut_instance = sut.new(default_opts.update({ "hints" => { "a_field" => "not_a_real_type" } })) + options = default_opts.update({ "hints" => { "a_field" => "not_a_real_type" } }) + sut_instance = sut.new(options) + expect(options["logger"]).to(receive(:error)) sample_event["a_field"] = "a value" - expect { sut_instance.parse(sample_event) }.to raise_error(/Unknown cassandra_type/) + expect { sut_instance.parse(sample_event) }.to raise_error(/Cannot convert/) end it "fails for unsuccessful hint conversion" do @@ -227,14 +235,14 @@ describe "ignore_bad_values is turned on" do [ - { :name => "timestamp", :value => "i dont have to_time", :expected => Time::parse("1970-01-01 00:00:00") }, + { :name => "timestamp", :value => "i dont have to_time", :expected => Time::parse("1970-01-01 00:00:00 +0000") }, { :name => "inet", :value => "i am not an inet address", :expected => "0.0.0.0" }, { :name => "float", :value => "i am not a float", :expected => 0.0 }, { :name => "bigint", :value => "i am not a bigint", :expected => 0 }, { :name => "counter", :value => "i am not a counter", :expected => 0 }, { :name => "int", :value => "i am not a int", :expected => 0 }, { :name => "varint", :value => "i am not a varint", :expected => 0 }, - { :name => "double", :value => "i am not a double", :expected => 0 }, + { :name => "double", :value => "i am not a double", :expected => 0.0 }, { :name => "timeuuid", :value => "i am not a timeuuid", :expected => "00000000-0000-0000-0000-000000000000" } ].each { |mapping| # NOTE: this is not the best test there is, but it is the best / simplest I could think of :/ From 11fb3c5adfe74de0f11f666de8633cf0cacbdf09 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Sun, 13 Mar 2016 10:42:42 +0200 Subject: [PATCH 059/126] remove incorrect query expectation (should not include the keyspace) --- spec/unit/outputs/safe_submitter_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index 93aac61..8af3aca 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -64,7 +64,7 @@ def setup_session_double(options) "another_column" => "another_value" } }} - let(:expected_query_for_one_action) { "INSERT INTO the final frontier.a_table (a_column, another_column)\nVALUES (?, ?)" } + let(:expected_query_for_one_action) { "INSERT INTO a_table (a_column, another_column)\nVALUES (?, ?)" } let(:another_action) {{ "table" => "another_table", "data" => { @@ -73,7 +73,7 @@ def setup_session_double(options) "a_third_column" => "another_value" } }} - let(:expected_query_for_another_action) { "INSERT INTO the final frontier.another_table (a_column, another_column, a_third_column)\nVALUES (?, ?, ?)" } + let(:expected_query_for_another_action) { "INSERT INTO another_table (a_column, another_column, a_third_column)\nVALUES (?, ?, ?)" } def setup_batch_and_session_doubles() session_double = setup_session_double(default_options)[:session_double] From e5499305530370570c229341162dbba1a23df65f Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Sun, 13 Mar 2016 13:32:28 +0200 Subject: [PATCH 060/126] renamed output file name --- lib/logstash/outputs/{cassandra_output.rb => cassandra.rb} | 1 - spec/integration/outputs/cassandra_spec.rb | 2 +- spec/unit/outputs/cassandra_spec.rb | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) rename lib/logstash/outputs/{cassandra_output.rb => cassandra.rb} (99%) diff --git a/lib/logstash/outputs/cassandra_output.rb b/lib/logstash/outputs/cassandra.rb similarity index 99% rename from lib/logstash/outputs/cassandra_output.rb rename to lib/logstash/outputs/cassandra.rb index 2a950d9..23df29f 100644 --- a/lib/logstash/outputs/cassandra_output.rb +++ b/lib/logstash/outputs/cassandra.rb @@ -4,7 +4,6 @@ require "logstash/outputs/cassandra/buffer" require "logstash/outputs/cassandra/event_parser" require "logstash/outputs/cassandra/safe_submitter" -require "cassandra" class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base diff --git a/spec/integration/outputs/cassandra_spec.rb b/spec/integration/outputs/cassandra_spec.rb index 3cff1bb..614d19d 100644 --- a/spec/integration/outputs/cassandra_spec.rb +++ b/spec/integration/outputs/cassandra_spec.rb @@ -1,6 +1,6 @@ # encoding: utf-8 require_relative "./integration_helper" -require "logstash/outputs/cassandra_output" +require "logstash/outputs/cassandra" module Helper def self.get_assert_timestamp_equallity() diff --git a/spec/unit/outputs/cassandra_spec.rb b/spec/unit/outputs/cassandra_spec.rb index 4d4265e..5c06866 100644 --- a/spec/unit/outputs/cassandra_spec.rb +++ b/spec/unit/outputs/cassandra_spec.rb @@ -1,5 +1,5 @@ # encoding: utf-8 require_relative "../../cassandra_spec_helper" -require "logstash/outputs/cassandra_output" +require "logstash/outputs/cassandra" # TODO: add unit tests for the main cassandra output class From adc61fb20c72a83271271c8909304232b97013a8 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Sun, 13 Mar 2016 14:49:36 +0200 Subject: [PATCH 061/126] added a test to ensure the filters can easily come from the @metadata field --- spec/unit/outputs/event_parser_spec.rb | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb index f856449..59f5365 100644 --- a/spec/unit/outputs/event_parser_spec.rb +++ b/spec/unit/outputs/event_parser_spec.rb @@ -174,6 +174,16 @@ expect(action["data"]["a_column"]).to(eq("a_value")) end + + it "obtains the filter transform from the event even when it is in the metadata" do + sut_instance = sut.new(default_opts.update({ "filter_transform_event_key" => "[@metadata][the_filter]" })) + sample_event["a_field"] = "a_value" + sample_event["@metadata"] = { "the_filter" => [{ "event_key" => "a_field", "column_name" => "a_column" }] } + + action = sut_instance.parse(sample_event) + + expect(action["data"]["a_column"]).to(eq("a_value")) + end end end From 96c95d242d0d65f0a85b2d40f696f1635ea662dc Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Mon, 28 Mar 2016 22:40:24 +0300 Subject: [PATCH 062/126] added string expansion only option --- lib/logstash/outputs/cassandra/event_parser.rb | 5 ++++- spec/unit/outputs/event_parser_spec.rb | 9 +++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index 4eeda80..cf6f57f 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -51,7 +51,10 @@ def assert_filter_transform_structure(filter_transform) end def add_event_value_from_filter_to_action(event, filter, action) - event_data = event[event.sprintf(filter["event_key"])] + event_data = event.sprintf(filter["event_key"]) + if !filter.fetch("expansion_only", false) + event_data = event[event_data] + end if filter.has_key?("cassandra_type") cassandra_type = event.sprintf(filter["cassandra_type"]) event_data = convert_value_to_cassandra_type_or_default_if_configured(event_data, cassandra_type) diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb index 59f5365..a3fc068 100644 --- a/spec/unit/outputs/event_parser_spec.rb +++ b/spec/unit/outputs/event_parser_spec.rb @@ -76,6 +76,15 @@ expect(action["data"]["a_column"]).to(eq("a_value")) end + it "allows for expansion only filters for things like date string formats" do + sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "%{+yyyyMMddHHmm}", "expansion_only" => true, "column_name" => "a_column" }] })) + expected_value = Time.now.getutc.strftime('%Y%m%d%H%M') + + action = sut_instance.parse(sample_event) + + expect(action["data"]["a_column"]).to(eq(expected_value)) + end + it "allows for event specific column names" do sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "%{[pointer_to_another_field]}" }] })) sample_event["a_field"] = "a_value" From 425cbae797c11ed3a1a1163942e7bef2bb26d0ed Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Mon, 28 Mar 2016 22:43:10 +0300 Subject: [PATCH 063/126] updated spec names to match the proper name for string expansion --- spec/unit/outputs/event_parser_spec.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb index a3fc068..6067654 100644 --- a/spec/unit/outputs/event_parser_spec.rb +++ b/spec/unit/outputs/event_parser_spec.rb @@ -23,7 +23,7 @@ expect(action["table"]).to(eq("simple")) end - it "parses table names with data from the event" do + it "allows for string expansion in table names" do sut_instance = sut.new(default_opts.update({ "table" => "%{[a_field]}" })) sample_event["a_field"] = "a_value" @@ -66,7 +66,7 @@ expect(action["data"]["a_different_column"]).to(eq("a_second_value")) end - it "allows for event specific event keys" do + it "allows for string expansion in event keys" do sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "%{[pointer_to_another_field]}", "column_name" => "a_column" }] })) sample_event["pointer_to_another_field"] = "another_field" sample_event["another_field"] = "a_value" @@ -76,7 +76,7 @@ expect(action["data"]["a_column"]).to(eq("a_value")) end - it "allows for expansion only filters for things like date string formats" do + it "allows for string expansion only filters for things like date string formats" do sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "%{+yyyyMMddHHmm}", "expansion_only" => true, "column_name" => "a_column" }] })) expected_value = Time.now.getutc.strftime('%Y%m%d%H%M') @@ -85,7 +85,7 @@ expect(action["data"]["a_column"]).to(eq(expected_value)) end - it "allows for event specific column names" do + it "allows for string expansion in column names" do sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "%{[pointer_to_another_field]}" }] })) sample_event["a_field"] = "a_value" sample_event["pointer_to_another_field"] = "a_different_column" @@ -152,7 +152,7 @@ } end - it "allows for event specific cassandra types" do + it "allows for string expansion in cassandra types" do sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "%{[pointer_to_a_field]}" }] })) sample_event["a_field"] = "123" sample_event["pointer_to_a_field"] = "int" From 8c934c658014c17195e58ad73a160c9f76afab0a Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 29 Mar 2016 12:30:14 +0300 Subject: [PATCH 064/126] added some debug logging --- .../outputs/cassandra/event_parser.rb | 1 + .../outputs/cassandra/safe_submitter.rb | 2 ++ spec/unit/outputs/event_parser_spec.rb | 20 +++++++++++-------- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index cf6f57f..76e3ad2 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -27,6 +27,7 @@ def parse(event) add_event_data_using_configured_hints(event, action) end + @logger.debug("event parsed to action", :action => action) return action end diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb index c18b345..55258e2 100644 --- a/lib/logstash/outputs/cassandra/safe_submitter.rb +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -47,11 +47,13 @@ def get_retry_policy(policy_name) def prepare_batch(actions) statement_and_values = [] for action in actions + @logger.debug("generating query for action", :action => action) query = "INSERT INTO #{action["table"]} (#{action["data"].keys.join(', ')}) VALUES (#{("?" * action["data"].keys.count).split(//) * ", "})" if !@statement_cache.has_key?(query) + @logger.debug("new query generated", :query => query) @statement_cache[query] = @session.prepare(query) end statement_and_values << [@statement_cache[query], action["data"].values] diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb index 6067654..ee59f12 100644 --- a/spec/unit/outputs/event_parser_spec.rb +++ b/spec/unit/outputs/event_parser_spec.rb @@ -4,14 +4,18 @@ RSpec.describe LogStash::Outputs::Cassandra::EventParser do let(:sut) { LogStash::Outputs::Cassandra::EventParser } - let(:default_opts) {{ - "logger" => double(), - "table" => "dummy", - "filter_transform_event_key" => nil, - "filter_transform" => nil, - "hints" => {}, - "ignore_bad_values" => false - }} + let(:default_opts) { + logger = double() + allow(logger).to(receive(:debug)) + return { + "logger" => logger, + "table" => "dummy", + "filter_transform_event_key" => nil, + "filter_transform" => nil, + "hints" => {}, + "ignore_bad_values" => false + } + } let(:sample_event) { LogStash::Event.new("message" => "sample message here") } describe "table name parsing" do From 1b5d301b2ad9313001def7a28bcf02f8ef21eca6 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 29 Mar 2016 12:35:42 +0300 Subject: [PATCH 065/126] changed the filter transform default to an empty array --- lib/logstash/outputs/cassandra.rb | 2 +- lib/logstash/outputs/cassandra/event_parser.rb | 2 +- spec/unit/outputs/event_parser_spec.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/logstash/outputs/cassandra.rb b/lib/logstash/outputs/cassandra.rb index 23df29f..d8c7d06 100644 --- a/lib/logstash/outputs/cassandra.rb +++ b/lib/logstash/outputs/cassandra.rb @@ -37,7 +37,7 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base # An optional hash describing how / what to transform / filter from the original event # Each key is expected to be of the form { event_key => "..." column_name => "..." cassandra_type => "..." } # Event level processing (e.g. %{[key]}) is supported for all three - config :filter_transform, :validate => :array, :default => nil + config :filter_transform, :validate => :array, :default => [] # An optional string which points to the event specific location from which to pull the filter_transform definition # The contents need to conform with those defined for the filter_transform config setting diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index 76e3ad2..ed38be9 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -37,7 +37,7 @@ def get_filter_transform(event) if @filter_transform_event_key filter_transform = event[@filter_transform_event_key] assert_filter_transform_structure(filter_transform) - elsif @filter_transform + elsif @filter_transform.length > 0 filter_transform = @filter_transform end return filter_transform diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb index ee59f12..264475d 100644 --- a/spec/unit/outputs/event_parser_spec.rb +++ b/spec/unit/outputs/event_parser_spec.rb @@ -11,7 +11,7 @@ "logger" => logger, "table" => "dummy", "filter_transform_event_key" => nil, - "filter_transform" => nil, + "filter_transform" => [], "hints" => {}, "ignore_bad_values" => false } From 52d345164d328d640a7897e3ad3dd47d97f1c555 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 29 Mar 2016 13:42:34 +0300 Subject: [PATCH 066/126] fixed logger double to allow debug message --- spec/unit/outputs/safe_submitter_spec.rb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index 8af3aca..54e9d5d 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -4,8 +4,11 @@ RSpec.describe LogStash::Outputs::Cassandra::SafeSubmitter do let(:sut) { LogStash::Outputs::Cassandra::SafeSubmitter } - let(:default_options) {{ - "logger" => double(), + let(:default_options) { + logger = double() + allow(logger).to(receive(:debug)) + { + "logger" => logger, "cassandra" => double(), "username" => "a user", "password" => "a password", @@ -16,7 +19,8 @@ "retry_policy" => "default", "concrete_retry_policy" => ::Cassandra::Retry::Policies::Default, "keyspace" => "the final frontier" - }} + } + } def setup_session_double(options) session_double = double() From b5e1be4d2f3c82e2eb43b0abc4f52b04e2c0c6b6 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 29 Mar 2016 15:39:57 +0300 Subject: [PATCH 067/126] switched from batches to futures in submitter due to the fact that batches are not recommened for bulk inserts in cassandra https://docs.datastax.com/en/cql/3.3/cql/cql_using/useBatch.html --- .../outputs/cassandra/safe_submitter.rb | 40 +++++++++---------- spec/unit/outputs/safe_submitter_spec.rb | 27 +++++++------ 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb index 55258e2..0ca7cfd 100644 --- a/lib/logstash/outputs/cassandra/safe_submitter.rb +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -11,10 +11,13 @@ def initialize(options) def submit(actions) begin - batch = prepare_batch(actions) - @session.execute(batch) + futures = actions.map do |action| + query = get_query(action) + execute_async(query, action["data"].values) + end + futures.each(&:join) rescue Exception => e - @logger.error("Failed to send batch to cassandra", :exception => e, :backtrace => e.backtrace) + @logger.error("Failed to send batch to cassandra", :actions => actions, :exception => e, :backtrace => e.backtrace) end end @@ -44,27 +47,24 @@ def get_retry_policy(policy_name) end end - def prepare_batch(actions) - statement_and_values = [] - for action in actions - @logger.debug("generating query for action", :action => action) - query = + def get_query(action) + @logger.debug("generating query for action", :action => action) + query = "INSERT INTO #{action["table"]} (#{action["data"].keys.join(', ')}) VALUES (#{("?" * action["data"].keys.count).split(//) * ", "})" - - if !@statement_cache.has_key?(query) - @logger.debug("new query generated", :query => query) - @statement_cache[query] = @session.prepare(query) - end - statement_and_values << [@statement_cache[query], action["data"].values] + if !@statement_cache.has_key?(query) + @logger.debug("new query generated", :query => query) + @statement_cache[query] = @session.prepare(query) end + return @statement_cache[query] + end - batch = @session.batch do |b| - statement_and_values.each do |v| - b.add(v[0], v[1]) - end - end - return batch + def execute_async(query, arguments) + future = @session.execute_async(query, arguments: arguments) + future.on_failure { |error| + @logger.error("error executing insert", :query => query, :arguments => arguments, :error => error) + } + return future end end end end end diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index 54e9d5d..16d9ed0 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -79,38 +79,39 @@ def setup_session_double(options) }} let(:expected_query_for_another_action) { "INSERT INTO another_table (a_column, another_column, a_third_column)\nVALUES (?, ?, ?)" } - def setup_batch_and_session_doubles() - session_double = setup_session_double(default_options)[:session_double] - batch_double = double() - expect(session_double).to(receive(:batch).and_yield(batch_double).at_least(:once)).and_return(batch_double) - expect(session_double).to(receive(:execute).with(batch_double).at_least(:once)) - return { :batch_double => batch_double, :session_double => session_double } + def generate_future_double() + future_double = double() + expect(future_double).to(receive(:join)) + expect(future_double).to(receive(:on_failure)) + return future_double end it "prepares and executes the query" do - doubles = setup_batch_and_session_doubles() + doubles = setup_session_double(default_options) expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_one_action)).and_return("eureka") - expect(doubles[:batch_double]).to(receive(:add).with("eureka", ["a_value", "another_value"])) + expect(doubles[:session_double]).to(receive(:execute_async).with("eureka", :arguments => one_action["data"].values)).and_return(generate_future_double()) sut_instance = sut.new(default_options) sut_instance.submit([one_action]) end it "caches the generated query" do - doubles = setup_batch_and_session_doubles() + doubles = setup_session_double(default_options) expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_one_action).once).and_return("eureka") - expect(doubles[:batch_double]).to(receive(:add).with("eureka", ["a_value", "another_value"]).twice) + 2.times { + expect(doubles[:session_double]).to(receive(:execute_async).with("eureka", :arguments => one_action["data"].values)).and_return(generate_future_double()) + } sut_instance = sut.new(default_options) sut_instance.submit([one_action, one_action]) end it "does not confuse between a new query and cached queries" do - doubles = setup_batch_and_session_doubles() + doubles = setup_session_double(default_options) expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_one_action).once).and_return("eureka") expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_another_action).once).and_return("great scott") - expect(doubles[:batch_double]).to(receive(:add).with("eureka", ["a_value", "another_value"])) - expect(doubles[:batch_double]).to(receive(:add).with("great scott", ["a_value", "another_value", "another_value"])) + expect(doubles[:session_double]).to(receive(:execute_async).with("eureka", :arguments => one_action["data"].values)).and_return(generate_future_double()) + expect(doubles[:session_double]).to(receive(:execute_async).with("great scott", :arguments => another_action["data"].values)).and_return(generate_future_double()) sut_instance = sut.new(default_options) sut_instance.submit([one_action, another_action]) From a774d7d5593ff183527578f67977d0b9ad5906bd Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 29 Mar 2016 17:09:58 +0300 Subject: [PATCH 068/126] updated spec name to say bulk instead of batch --- spec/integration/outputs/cassandra_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/integration/outputs/cassandra_spec.rb b/spec/integration/outputs/cassandra_spec.rb index 614d19d..2ff894a 100644 --- a/spec/integration/outputs/cassandra_spec.rb +++ b/spec/integration/outputs/cassandra_spec.rb @@ -115,5 +115,5 @@ def assert_proper_insert(type_to_test) } it "properly works with counter columns" - it "properly adds multiple events to multiple tables in the same batch" + it "properly adds multiple events to multiple tables in the same bulk" end From 520de93e51121379e215c259d8773d5c1771d033 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 31 Mar 2016 15:18:31 +0300 Subject: [PATCH 069/126] adding protocol version to options --- lib/logstash/outputs/cassandra.rb | 3 +++ lib/logstash/outputs/cassandra/safe_submitter.rb | 1 + spec/unit/outputs/safe_submitter_spec.rb | 2 ++ 3 files changed, 6 insertions(+) diff --git a/lib/logstash/outputs/cassandra.rb b/lib/logstash/outputs/cassandra.rb index d8c7d06..9263ae9 100644 --- a/lib/logstash/outputs/cassandra.rb +++ b/lib/logstash/outputs/cassandra.rb @@ -17,6 +17,9 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base # The port cassandra is listening to config :port, :validate => :number, :default => 9042, :required => true + # The protocol version to use with cassandra + config :protocol_version, :validate => :number, :default => 3 + # Cassandra consistency level. # Options: "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one" # Default: "one" diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb index 0ca7cfd..209bf60 100644 --- a/lib/logstash/outputs/cassandra/safe_submitter.rb +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -26,6 +26,7 @@ def setup_cassandra_session(options) cluster = options["cassandra"].cluster( username: options["username"], password: options["password"], + protocol_version: options["protocol_version"], hosts: options["hosts"], port: options["port"], consistency: options["consistency"].to_sym, diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index 16d9ed0..1c41444 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -12,6 +12,7 @@ "cassandra" => double(), "username" => "a user", "password" => "a password", + "protocol_version" => 3, "hosts" => "some host", "port" => 9042, "consistency" => "one", @@ -29,6 +30,7 @@ def setup_session_double(options) expect(options["cassandra"]).to(receive(:cluster).with( username: options["username"], password: options["password"], + protocol_version: options["protocol_version"], hosts: options["hosts"], port: options["port"], consistency: options["consistency"].to_sym, From 1482276bc5424a244fcc3f36b4363aa4eea84074 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Sun, 3 Apr 2016 12:12:09 +0300 Subject: [PATCH 070/126] changing default protocol version to 4 --- lib/logstash/outputs/cassandra.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/outputs/cassandra.rb b/lib/logstash/outputs/cassandra.rb index 9263ae9..7d82e04 100644 --- a/lib/logstash/outputs/cassandra.rb +++ b/lib/logstash/outputs/cassandra.rb @@ -18,7 +18,7 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base config :port, :validate => :number, :default => 9042, :required => true # The protocol version to use with cassandra - config :protocol_version, :validate => :number, :default => 3 + config :protocol_version, :validate => :number, :default => 4 # Cassandra consistency level. # Options: "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one" From 3f835d27a394f5986f7f4bb7cf9781b0153b7bf8 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 7 Apr 2016 09:27:38 +0300 Subject: [PATCH 071/126] adding logging of errors from the on_complete future callback --- lib/logstash/outputs/cassandra/safe_submitter.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb index 209bf60..1cd9134 100644 --- a/lib/logstash/outputs/cassandra/safe_submitter.rb +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -65,6 +65,11 @@ def execute_async(query, arguments) future.on_failure { |error| @logger.error("error executing insert", :query => query, :arguments => arguments, :error => error) } + future.on_complete { |value, error| + if !error.nil? + @logger.error("error executing insert", :query => query, :arguments => arguments, :error => error) + end + } return future end end From a47e0731779bd330a362c3e22e4760d28e463089 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 7 Apr 2016 14:12:30 +0300 Subject: [PATCH 072/126] adding missing expectation for future finish --- spec/unit/outputs/safe_submitter_spec.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index 1c41444..82f3af3 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -85,6 +85,7 @@ def generate_future_double() future_double = double() expect(future_double).to(receive(:join)) expect(future_double).to(receive(:on_failure)) + expect(future_double).to(receive(:on_complete)) return future_double end From 608b043767f6a6334f5557e67258b65972f270c8 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 7 Apr 2016 15:09:12 +0300 Subject: [PATCH 073/126] adding coverage support --- .gitignore | 1 + logstash-output-cassandra.gemspec | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index 5a9fb21..e8dcb6b 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ Gemfile.lock vendor /nbproject/private/ .idea +coverage \ No newline at end of file diff --git a/logstash-output-cassandra.gemspec b/logstash-output-cassandra.gemspec index c1d4f0c..de62bd8 100644 --- a/logstash-output-cassandra.gemspec +++ b/logstash-output-cassandra.gemspec @@ -26,4 +26,9 @@ Gem::Specification.new do |s| s.add_development_dependency 'longshoreman' s.add_development_dependency 'logstash-devutils' s.add_development_dependency 'logstash-codec-plain' + s.add_development_dependency 'simplecov' + s.add_development_dependency 'simplecov-rcov' + s.add_development_dependency 'unparser', '0.2.4' + s.add_development_dependency 'metric_fu' + s.add_development_dependency 'coveralls' end From 80154a57f94e4df3fc5144d8029f109e4434ab03 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 7 Apr 2016 18:41:35 +0300 Subject: [PATCH 074/126] added back off retry policy --- lib/logstash/outputs/cassandra.rb | 14 +++- .../outputs/cassandra/backoff_retry_policy.rb | 76 +++++++++++++++++++ .../outputs/cassandra/safe_submitter.rb | 12 ++- spec/unit/outputs/safe_submitter_spec.rb | 26 +++++-- 4 files changed, 117 insertions(+), 11 deletions(-) create mode 100644 lib/logstash/outputs/cassandra/backoff_retry_policy.rb diff --git a/lib/logstash/outputs/cassandra.rb b/lib/logstash/outputs/cassandra.rb index 7d82e04..0a34046 100644 --- a/lib/logstash/outputs/cassandra.rb +++ b/lib/logstash/outputs/cassandra.rb @@ -59,12 +59,20 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base # ip => "inet" } config :hints, :validate => :hash, :default => {} - # The retry policy to use - # The available options are: + # The retry policy to use (the default is the default retry policy) + # the hash requires the name of the policy and the params it requires + # The available policy names are: # * default => retry once if needed / possible # * downgrading_consistency => retry once with a best guess lowered consistency # * failthrough => fail immediately (i.e. no retries) - config :retry_policy, :validate => [ "default", "downgrading_consistency", "failthrough" ], :default => "default", :required => true + # * backoff => a version of the default retry policy but with configurable backoff retries + # The backoff options are as follows: + # * backoff_type => either * or ** for linear and exponential backoffs respectively + # * backoff_size => the left operand for the backoff type in seconds + # * retry_limit => the maximum amount of retries to allow per query + # example: + # using { "type" => "backoff" "backoff_type" => "**" "backoff_size" => 2 "retry_limit" => 10 } will perform 10 retries with the following wait times: 1, 2, 4, 8, 16, ... 1024 + config :retry_policy, :validate => :hash, :default => { "type" => "default" }, :required => true # The command execution timeout config :request_timeout, :validate => :number, :default => 5 diff --git a/lib/logstash/outputs/cassandra/backoff_retry_policy.rb b/lib/logstash/outputs/cassandra/backoff_retry_policy.rb new file mode 100644 index 0000000..6d95c21 --- /dev/null +++ b/lib/logstash/outputs/cassandra/backoff_retry_policy.rb @@ -0,0 +1,76 @@ +# encoding: utf-8 +# This is a version of the default retry policy (https://github.com/datastax/ruby-driver/blob/v2.1.5/lib/cassandra/retry/policies/default.rb) with backoff retry configuration options +require "cassandra" + +module Cassandra + module Retry + module Policies + class Backoff + include Policy + + def initialize(opts) + @logger = opts["logger"] + @backoff_type = opts["backoff_type"] + @backoff_size = opts["backoff_size"] + @retry_limit = opts["retry_limit"] + end + + def read_timeout(statement, consistency, required, received, retrieved, retries) + return retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required, :received => received, :retrieved => retrieved, :retries => retries}) { |opts| + if received >= required && !retrieved + try_again(consistency) + else + try_next_host + end + } + end + + def write_timeout(statement, consistency, type, required, received, retries) + return retry_with_backoff({ :statement => statement, :consistency => consistency, :type => type, :required => required, :received => received, :retries => retries}) { |opts| + if opts[:received].zero? + try_next_host + elsif opts[:type] == :batch_log + try_again(opts[:consistency]) + else + reraise + end + } + end + + def unavailable(statement, consistency, required, alive, retries) + return retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required, :alive => alive, :retries => retries }) { |opts| + try_next_host + } + end + + def retry_with_backoff(opts) + if opts[:retries] > @retry_limit + @logger.error('backoff retries exhausted', :opts => opts) + return reraise + end + + @logger.error('activating backoff wait', :opts => opts) + backoff_wait_before_next_retry(opts[:retries]) + + return yield(opts) + end + + def backoff_wait_before_next_retry(retries) + backoff_wait_time = calculate_backoff_wait_time(retries) + sleep(backoff_wait_time) + end + + def calculate_backoff_wait_time(retries) + backoff_wait_time = 0 + case @backoff_type + when "**" + backoff_wait_time = @backoff_size ** retries + when "*" + backoff_wait_time = @backoff_size * retries + end + return backoff_wait_time + end + end + end + end +end diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb index 1cd9134..138b7f1 100644 --- a/lib/logstash/outputs/cassandra/safe_submitter.rb +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -1,5 +1,6 @@ # encoding: utf-8 require "cassandra" +require "logstash/outputs/cassandra/backoff_retry_policy" module LogStash; module Outputs; module Cassandra class SafeSubmitter @@ -37,14 +38,21 @@ def setup_cassandra_session(options) @session = cluster.connect(options["keyspace"]) end - def get_retry_policy(policy_name) - case policy_name + def get_retry_policy(retry_policy) + case retry_policy["type"] when "default" return ::Cassandra::Retry::Policies::Default.new when "downgrading_consistency" return ::Cassandra::Retry::Policies::DowngradingConsistency.new when "failthrough" return ::Cassandra::Retry::Policies::Fallthrough.new + when "backoff" + return ::Cassandra::Retry::Policies::Backoff.new({ + "backoff_type" => retry_policy["backoff_type"], "backoff_size" => retry_policy["backoff_size"], + "retry_limit" => retry_policy["retry_limit"], "logger" => @logger + }) + else + raise ArgumentError, "unknown retry policy type: #{retry_policy["type"]}" end end diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index 82f3af3..0099e3b 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -17,7 +17,7 @@ "port" => 9042, "consistency" => "one", "request_timeout" => 10, - "retry_policy" => "default", + "retry_policy" => { "type" => "default" }, "concrete_retry_policy" => ::Cassandra::Retry::Policies::Default, "keyspace" => "the final frontier" } @@ -49,17 +49,31 @@ def setup_session_double(options) end [ - { :name => "default", :concrete_retry_policy => ::Cassandra::Retry::Policies::Default }, - { :name => "downgrading_consistency", :concrete_retry_policy => ::Cassandra::Retry::Policies::DowngradingConsistency }, - { :name => "failthrough", :concrete_retry_policy => ::Cassandra::Retry::Policies::Fallthrough } + { :setting => { "type" => "default" }, :concrete_retry_policy => ::Cassandra::Retry::Policies::Default }, + { :setting => { "type" => "downgrading_consistency" }, :concrete_retry_policy => ::Cassandra::Retry::Policies::DowngradingConsistency }, + { :setting => { "type" => "failthrough" }, :concrete_retry_policy => ::Cassandra::Retry::Policies::Fallthrough }, + { :setting => { "type" => "backoff", "backoff_type" => "**", "backoff_size" => 2, "retry_limit" => 10 }, + :concrete_retry_policy => ::Cassandra::Retry::Policies::Backoff } ].each { |mapping| - it "supports the #{mapping["class"]} retry policy by passing #{mapping["name"]} as the retry_policy" do - options = default_options.update({ "retry_policy" => mapping[:name], "concrete_retry_policy" => mapping[:concrete_retry_policy] }) + it "supports the #{mapping[:concrete_retry_policy]} retry policy by passing #{mapping[:setting]["type"]} as the retry_policy" do + options = default_options.update({ "retry_policy" => mapping[:setting], "concrete_retry_policy" => mapping[:concrete_retry_policy] }) setup_session_double(options) sut.new(options) end } + + it "properly initializes the backoff retry policy" do + retry_policy_config = { "type" => "backoff", "backoff_type" => "**", "backoff_size" => 2, "retry_limit" => 10 } + expected_policy = double() + options = default_options.update({ "retry_policy" => retry_policy_config, "concrete_retry_policy" => expected_policy }) + expect(::Cassandra::Retry::Policies::Backoff).to(receive(:new).with({ + "backoff_type" => options["retry_policy"]["backoff_type"], "backoff_size" => options["retry_policy"]["backoff_size"], + "retry_limit" => options["retry_policy"]["retry_limit"], "logger" => options["logger"]}).and_return(expected_policy)) + setup_session_double(options) + + sut.new(options) + end end describe "execution" do From 5c1ce9ed257d106753213c6aec6eb1ba76231134 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 7 Apr 2016 18:44:21 +0300 Subject: [PATCH 075/126] empty spec for back off retry policy --- spec/unit/outputs/backoff_retry_policy_spec.rb | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 spec/unit/outputs/backoff_retry_policy_spec.rb diff --git a/spec/unit/outputs/backoff_retry_policy_spec.rb b/spec/unit/outputs/backoff_retry_policy_spec.rb new file mode 100644 index 0000000..8e0ce18 --- /dev/null +++ b/spec/unit/outputs/backoff_retry_policy_spec.rb @@ -0,0 +1,8 @@ +# encoding: utf-8 +require_relative "../../cassandra_spec_helper" +require "logstash/outputs/cassandra/backoff_retry_policy" + +RSpec.describe ::Cassandra::Retry::Policies::Backoff do + let(:sut) { ::Cassandra::Retry::Policies::Backoff } + end +end From 2c85551f05e5f3b55103f3ea754186134e5dec3a Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Thu, 7 Apr 2016 18:56:46 +0300 Subject: [PATCH 076/126] added spec skeleton for the back off policy --- lib/logstash/outputs/cassandra.rb | 1 + .../outputs/cassandra/backoff_retry_policy.rb | 2 +- .../unit/outputs/backoff_retry_policy_spec.rb | 27 +++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/lib/logstash/outputs/cassandra.rb b/lib/logstash/outputs/cassandra.rb index 0a34046..6b8b500 100644 --- a/lib/logstash/outputs/cassandra.rb +++ b/lib/logstash/outputs/cassandra.rb @@ -72,6 +72,7 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base # * retry_limit => the maximum amount of retries to allow per query # example: # using { "type" => "backoff" "backoff_type" => "**" "backoff_size" => 2 "retry_limit" => 10 } will perform 10 retries with the following wait times: 1, 2, 4, 8, 16, ... 1024 + # NOTE: there is an underlying assumption that the insert query is idempotent !!! config :retry_policy, :validate => :hash, :default => { "type" => "default" }, :required => true # The command execution timeout diff --git a/lib/logstash/outputs/cassandra/backoff_retry_policy.rb b/lib/logstash/outputs/cassandra/backoff_retry_policy.rb index 6d95c21..2741fa4 100644 --- a/lib/logstash/outputs/cassandra/backoff_retry_policy.rb +++ b/lib/logstash/outputs/cassandra/backoff_retry_policy.rb @@ -18,7 +18,7 @@ def initialize(opts) def read_timeout(statement, consistency, required, received, retrieved, retries) return retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required, :received => received, :retrieved => retrieved, :retries => retries}) { |opts| if received >= required && !retrieved - try_again(consistency) + try_again(opts[:consistency]) else try_next_host end diff --git a/spec/unit/outputs/backoff_retry_policy_spec.rb b/spec/unit/outputs/backoff_retry_policy_spec.rb index 8e0ce18..759a81f 100644 --- a/spec/unit/outputs/backoff_retry_policy_spec.rb +++ b/spec/unit/outputs/backoff_retry_policy_spec.rb @@ -4,5 +4,32 @@ RSpec.describe ::Cassandra::Retry::Policies::Backoff do let(:sut) { ::Cassandra::Retry::Policies::Backoff } + + describe "#read_timeout" do + it "tries again if the result did not finish, but the required acks were" + it "tries the next host, if retries are left" + + it "stops once the max retries are reached" + it "waits between retries" + end + + describe "#write_timeout" do + it "tries the next host if no acks were recieved (there is an undelying assumption that the query is idempotent)" + it "retries if the query was a logged batch" + + it "stops once the max retries are reached" + it "waits between retries" + end + + describe "#unavailable" do + it "tries the next host" + + it "stops once the max retries are reached" + it "waits between retries" + end + + describe "#calculate_backoff_wait_time" do + it "allows for exponential backoffs" + it "allows for linear backoffs" end end From 30edb4768f98c6c5e63695a6cd457510b24c47dd Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Fri, 8 Apr 2016 16:39:53 +0300 Subject: [PATCH 077/126] extracted shared examples --- .../unit/outputs/backoff_retry_policy_spec.rb | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/spec/unit/outputs/backoff_retry_policy_spec.rb b/spec/unit/outputs/backoff_retry_policy_spec.rb index 759a81f..8a93551 100644 --- a/spec/unit/outputs/backoff_retry_policy_spec.rb +++ b/spec/unit/outputs/backoff_retry_policy_spec.rb @@ -2,34 +2,35 @@ require_relative "../../cassandra_spec_helper" require "logstash/outputs/cassandra/backoff_retry_policy" +RSpec.shared_examples "limited parameterized backoff" do + let(:sut) { ::Cassandra::Retry::Policies::Backoff } + + it "stops once the max retries are reached" + it "waits between retries" + it "allows for exponential backoffs" + it "allows for linear backoffs" +end + RSpec.describe ::Cassandra::Retry::Policies::Backoff do let(:sut) { ::Cassandra::Retry::Policies::Backoff } describe "#read_timeout" do - it "tries again if the result did not finish, but the required acks were" - it "tries the next host, if retries are left" + include_examples "limited parameterized backoff" - it "stops once the max retries are reached" - it "waits between retries" + it "tries again if the result did not arrive, but the required acks arrived" + it "tries the next host, if retries are left" end describe "#write_timeout" do + include_examples "limited parameterized backoff" + it "tries the next host if no acks were recieved (there is an undelying assumption that the query is idempotent)" it "retries if the query was a logged batch" - - it "stops once the max retries are reached" - it "waits between retries" end describe "#unavailable" do - it "tries the next host" + include_examples "limited parameterized backoff" - it "stops once the max retries are reached" - it "waits between retries" - end - - describe "#calculate_backoff_wait_time" do - it "allows for exponential backoffs" - it "allows for linear backoffs" + it "tries the next host" end end From e5050f8ae2e08061fabe42cfff5d6c8660fb74b7 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Fri, 8 Apr 2016 22:00:18 +0300 Subject: [PATCH 078/126] #retry_with_backoff - runs the block if the max retries have not been reached --- .../unit/outputs/backoff_retry_policy_spec.rb | 41 ++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/spec/unit/outputs/backoff_retry_policy_spec.rb b/spec/unit/outputs/backoff_retry_policy_spec.rb index 8a93551..1482ca8 100644 --- a/spec/unit/outputs/backoff_retry_policy_spec.rb +++ b/spec/unit/outputs/backoff_retry_policy_spec.rb @@ -2,35 +2,46 @@ require_relative "../../cassandra_spec_helper" require "logstash/outputs/cassandra/backoff_retry_policy" -RSpec.shared_examples "limited parameterized backoff" do - let(:sut) { ::Cassandra::Retry::Policies::Backoff } - - it "stops once the max retries are reached" - it "waits between retries" - it "allows for exponential backoffs" - it "allows for linear backoffs" -end - RSpec.describe ::Cassandra::Retry::Policies::Backoff do let(:sut) { ::Cassandra::Retry::Policies::Backoff } + let(:default_options) { + logger = double() + allow(logger).to(receive(:error)) + { + "logger" => logger, + "backoff_type" => "*", + "backoff_size" => 1, + "retry_limit" => 1 + } + } + + describe "#retry_with_backoff" do + it "runs the block if the max retries have not been reached" do + sut_instance = sut.new(default_options) + yield_double = double() + expect(yield_double).to(receive(:ola)) + + sut_instance.retry_with_backoff({ :retries => 0 }) { |opts| yield_double.ola(opts) } + end + + it "passes the options it recieves to the yield block" + it "stops once the max retries are reached" + it "waits between retries" + it "allows for exponential backoffs" + it "allows for linear backoffs" + end describe "#read_timeout" do - include_examples "limited parameterized backoff" - it "tries again if the result did not arrive, but the required acks arrived" it "tries the next host, if retries are left" end describe "#write_timeout" do - include_examples "limited parameterized backoff" - it "tries the next host if no acks were recieved (there is an undelying assumption that the query is idempotent)" it "retries if the query was a logged batch" end describe "#unavailable" do - include_examples "limited parameterized backoff" - it "tries the next host" end end From e4d1f35a9ad2084e31b362a05552ada1dcbdc807 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Fri, 8 Apr 2016 22:07:11 +0300 Subject: [PATCH 079/126] passes the options it recieves to the yield block --- spec/unit/outputs/backoff_retry_policy_spec.rb | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/spec/unit/outputs/backoff_retry_policy_spec.rb b/spec/unit/outputs/backoff_retry_policy_spec.rb index 1482ca8..7f447d9 100644 --- a/spec/unit/outputs/backoff_retry_policy_spec.rb +++ b/spec/unit/outputs/backoff_retry_policy_spec.rb @@ -24,7 +24,15 @@ sut_instance.retry_with_backoff({ :retries => 0 }) { |opts| yield_double.ola(opts) } end - it "passes the options it recieves to the yield block" + it "passes the options it recieves to the yield block" do + sut_instance = sut.new(default_options) + yield_double = double() + expected_options = { :retries => 0 } + expect(yield_double).to(receive(:ola).with(expected_options)) + + sut_instance.retry_with_backoff(expected_options) { |opts| yield_double.ola(opts) } + end + it "stops once the max retries are reached" it "waits between retries" it "allows for exponential backoffs" From 13b4c113a7caa61374d03ad5d0bb7ced43e5a95e Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Fri, 8 Apr 2016 22:10:20 +0300 Subject: [PATCH 080/126] stops once the max retries are reached --- spec/unit/outputs/backoff_retry_policy_spec.rb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/spec/unit/outputs/backoff_retry_policy_spec.rb b/spec/unit/outputs/backoff_retry_policy_spec.rb index 7f447d9..39860db 100644 --- a/spec/unit/outputs/backoff_retry_policy_spec.rb +++ b/spec/unit/outputs/backoff_retry_policy_spec.rb @@ -33,7 +33,14 @@ sut_instance.retry_with_backoff(expected_options) { |opts| yield_double.ola(opts) } end - it "stops once the max retries are reached" + it "stops once the max retries are reached" do + sut_instance = sut.new(default_options) + yield_double = double() + expect(yield_double).not_to(receive(:ola)) + + sut_instance.retry_with_backoff({ :retries => 2 }) { |opts| yield_double.ola(opts) } + end + it "waits between retries" it "allows for exponential backoffs" it "allows for linear backoffs" From 794916538c3ea85a8eaffd34ee88d9cd2039cc8a Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Fri, 8 Apr 2016 22:50:32 +0300 Subject: [PATCH 081/126] waits between retries --- lib/logstash/outputs/cassandra/backoff_retry_policy.rb | 2 +- spec/unit/outputs/backoff_retry_policy_spec.rb | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/logstash/outputs/cassandra/backoff_retry_policy.rb b/lib/logstash/outputs/cassandra/backoff_retry_policy.rb index 2741fa4..0c904c8 100644 --- a/lib/logstash/outputs/cassandra/backoff_retry_policy.rb +++ b/lib/logstash/outputs/cassandra/backoff_retry_policy.rb @@ -57,7 +57,7 @@ def retry_with_backoff(opts) def backoff_wait_before_next_retry(retries) backoff_wait_time = calculate_backoff_wait_time(retries) - sleep(backoff_wait_time) + Kernel::sleep(backoff_wait_time) end def calculate_backoff_wait_time(retries) diff --git a/spec/unit/outputs/backoff_retry_policy_spec.rb b/spec/unit/outputs/backoff_retry_policy_spec.rb index 39860db..09088f2 100644 --- a/spec/unit/outputs/backoff_retry_policy_spec.rb +++ b/spec/unit/outputs/backoff_retry_policy_spec.rb @@ -41,7 +41,15 @@ sut_instance.retry_with_backoff({ :retries => 2 }) { |opts| yield_double.ola(opts) } end - it "waits between retries" + it "waits between retries" do + sut_instance = sut.new(default_options) + expect(Kernel).to(receive(:sleep).ordered) + yield_double = double() + expect(yield_double).to(receive(:ola).ordered) + + sut_instance.retry_with_backoff({ :retries => 0 }) { |opts| yield_double.ola(opts) } + end + it "allows for exponential backoffs" it "allows for linear backoffs" end From 3961ad28d353904783b3e3d682fad39763d7ff58 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Fri, 8 Apr 2016 22:59:47 +0300 Subject: [PATCH 082/126] allows for exponential backoffs --- .../unit/outputs/backoff_retry_policy_spec.rb | 28 ++++++++++++++----- spec/unit/outputs/safe_submitter_spec.rb | 2 +- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/spec/unit/outputs/backoff_retry_policy_spec.rb b/spec/unit/outputs/backoff_retry_policy_spec.rb index 09088f2..1b5dfaa 100644 --- a/spec/unit/outputs/backoff_retry_policy_spec.rb +++ b/spec/unit/outputs/backoff_retry_policy_spec.rb @@ -4,7 +4,7 @@ RSpec.describe ::Cassandra::Retry::Policies::Backoff do let(:sut) { ::Cassandra::Retry::Policies::Backoff } - let(:default_options) { + let(:short_linear_backoff) { logger = double() allow(logger).to(receive(:error)) { @@ -14,10 +14,18 @@ "retry_limit" => 1 } } + let(:long_exponential_backoff) { + short_linear_backoff.merge({ + "backoff_type" => "**", + "backoff_size" => 2, + "retry_limit" => 10 + }) + } + describe "#retry_with_backoff" do it "runs the block if the max retries have not been reached" do - sut_instance = sut.new(default_options) + sut_instance = sut.new(short_linear_backoff) yield_double = double() expect(yield_double).to(receive(:ola)) @@ -25,7 +33,7 @@ end it "passes the options it recieves to the yield block" do - sut_instance = sut.new(default_options) + sut_instance = sut.new(short_linear_backoff) yield_double = double() expected_options = { :retries => 0 } expect(yield_double).to(receive(:ola).with(expected_options)) @@ -34,15 +42,15 @@ end it "stops once the max retries are reached" do - sut_instance = sut.new(default_options) + sut_instance = sut.new(short_linear_backoff) yield_double = double() expect(yield_double).not_to(receive(:ola)) sut_instance.retry_with_backoff({ :retries => 2 }) { |opts| yield_double.ola(opts) } end - it "waits between retries" do - sut_instance = sut.new(default_options) + it "waits before retrying" do + sut_instance = sut.new(short_linear_backoff) expect(Kernel).to(receive(:sleep).ordered) yield_double = double() expect(yield_double).to(receive(:ola).ordered) @@ -50,7 +58,13 @@ sut_instance.retry_with_backoff({ :retries => 0 }) { |opts| yield_double.ola(opts) } end - it "allows for exponential backoffs" + it "allows for exponential backoffs" do + sut_instance = sut.new(long_exponential_backoff) + expect(Kernel).to(receive(:sleep).with(256)) + + sut_instance.retry_with_backoff({ :retries => 8 }) { } + end + it "allows for linear backoffs" end diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index 0099e3b..9d3972e 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -4,7 +4,7 @@ RSpec.describe LogStash::Outputs::Cassandra::SafeSubmitter do let(:sut) { LogStash::Outputs::Cassandra::SafeSubmitter } - let(:default_options) { + let(:short_linear_backoff) { logger = double() allow(logger).to(receive(:debug)) { From 491bb9d16a4332fdb210c95fbae02d59372dffda Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Fri, 8 Apr 2016 23:03:52 +0300 Subject: [PATCH 083/126] allows for linear backoffs --- .../unit/outputs/backoff_retry_policy_spec.rb | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/spec/unit/outputs/backoff_retry_policy_spec.rb b/spec/unit/outputs/backoff_retry_policy_spec.rb index 1b5dfaa..f32e8a2 100644 --- a/spec/unit/outputs/backoff_retry_policy_spec.rb +++ b/spec/unit/outputs/backoff_retry_policy_spec.rb @@ -4,28 +4,27 @@ RSpec.describe ::Cassandra::Retry::Policies::Backoff do let(:sut) { ::Cassandra::Retry::Policies::Backoff } - let(:short_linear_backoff) { + let(:linear_backoff) { logger = double() allow(logger).to(receive(:error)) { "logger" => logger, "backoff_type" => "*", - "backoff_size" => 1, - "retry_limit" => 1 + "backoff_size" => 5, + "retry_limit" => 10 } } - let(:long_exponential_backoff) { - short_linear_backoff.merge({ + let(:exponential_backoff) { + linear_backoff.merge({ "backoff_type" => "**", "backoff_size" => 2, "retry_limit" => 10 }) } - describe "#retry_with_backoff" do it "runs the block if the max retries have not been reached" do - sut_instance = sut.new(short_linear_backoff) + sut_instance = sut.new(linear_backoff) yield_double = double() expect(yield_double).to(receive(:ola)) @@ -33,7 +32,7 @@ end it "passes the options it recieves to the yield block" do - sut_instance = sut.new(short_linear_backoff) + sut_instance = sut.new(linear_backoff) yield_double = double() expected_options = { :retries => 0 } expect(yield_double).to(receive(:ola).with(expected_options)) @@ -42,15 +41,15 @@ end it "stops once the max retries are reached" do - sut_instance = sut.new(short_linear_backoff) + sut_instance = sut.new(linear_backoff) yield_double = double() expect(yield_double).not_to(receive(:ola)) - sut_instance.retry_with_backoff({ :retries => 2 }) { |opts| yield_double.ola(opts) } + sut_instance.retry_with_backoff({ :retries => 100 }) { |opts| yield_double.ola(opts) } end it "waits before retrying" do - sut_instance = sut.new(short_linear_backoff) + sut_instance = sut.new(linear_backoff) expect(Kernel).to(receive(:sleep).ordered) yield_double = double() expect(yield_double).to(receive(:ola).ordered) @@ -59,13 +58,18 @@ end it "allows for exponential backoffs" do - sut_instance = sut.new(long_exponential_backoff) + sut_instance = sut.new(exponential_backoff) expect(Kernel).to(receive(:sleep).with(256)) sut_instance.retry_with_backoff({ :retries => 8 }) { } end - it "allows for linear backoffs" + it "allows for linear backoffs" do + sut_instance = sut.new(linear_backoff) + expect(Kernel).to(receive(:sleep).with(40)) + + sut_instance.retry_with_backoff({ :retries => 8 }) { } + end end describe "#read_timeout" do From f4cd2421d90b5e074e7f3d6468189ebed766fca2 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Fri, 8 Apr 2016 23:09:27 +0300 Subject: [PATCH 084/126] cleanup of #retry_with_backoff tests --- spec/unit/outputs/backoff_retry_policy_spec.rb | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/spec/unit/outputs/backoff_retry_policy_spec.rb b/spec/unit/outputs/backoff_retry_policy_spec.rb index f32e8a2..ca56272 100644 --- a/spec/unit/outputs/backoff_retry_policy_spec.rb +++ b/spec/unit/outputs/backoff_retry_policy_spec.rb @@ -28,7 +28,7 @@ yield_double = double() expect(yield_double).to(receive(:ola)) - sut_instance.retry_with_backoff({ :retries => 0 }) { |opts| yield_double.ola(opts) } + sut_instance.retry_with_backoff({ :retries => 0 }) { yield_double.ola() } end it "passes the options it recieves to the yield block" do @@ -45,30 +45,32 @@ yield_double = double() expect(yield_double).not_to(receive(:ola)) - sut_instance.retry_with_backoff({ :retries => 100 }) { |opts| yield_double.ola(opts) } + sut_instance.retry_with_backoff({ :retries => linear_backoff["retry_limit"] + 1 }) { yield_double.ola() } end - it "waits before retrying" do + it "waits _before_ retrying" do sut_instance = sut.new(linear_backoff) expect(Kernel).to(receive(:sleep).ordered) yield_double = double() expect(yield_double).to(receive(:ola).ordered) - sut_instance.retry_with_backoff({ :retries => 0 }) { |opts| yield_double.ola(opts) } + sut_instance.retry_with_backoff({ :retries => 0 }) { yield_double.ola() } end it "allows for exponential backoffs" do sut_instance = sut.new(exponential_backoff) - expect(Kernel).to(receive(:sleep).with(256)) + test_retry = exponential_backoff["retry_limit"] - 1 + expect(Kernel).to(receive(:sleep).with(exponential_backoff["backoff_size"] ** test_retry)) - sut_instance.retry_with_backoff({ :retries => 8 }) { } + sut_instance.retry_with_backoff({ :retries => test_retry }) { } end it "allows for linear backoffs" do sut_instance = sut.new(linear_backoff) - expect(Kernel).to(receive(:sleep).with(40)) + test_retry = exponential_backoff["retry_limit"] - 1 + expect(Kernel).to(receive(:sleep).with(linear_backoff["backoff_size"] * test_retry)) - sut_instance.retry_with_backoff({ :retries => 8 }) { } + sut_instance.retry_with_backoff({ :retries => test_retry }) { } end end From 2995c7c518151aac20efb5ef3b23dae234e2e455 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Fri, 8 Apr 2016 23:13:57 +0300 Subject: [PATCH 085/126] returns the value from the yield block --- spec/unit/outputs/backoff_retry_policy_spec.rb | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/spec/unit/outputs/backoff_retry_policy_spec.rb b/spec/unit/outputs/backoff_retry_policy_spec.rb index ca56272..073b33c 100644 --- a/spec/unit/outputs/backoff_retry_policy_spec.rb +++ b/spec/unit/outputs/backoff_retry_policy_spec.rb @@ -31,6 +31,15 @@ sut_instance.retry_with_backoff({ :retries => 0 }) { yield_double.ola() } end + it "returns the value from the yield block" do + sut_instance = sut.new(linear_backoff) + expected_result = double() + + result = sut_instance.retry_with_backoff({ :retries => 0 }) { expected_result } + + expect(result).to(be(expected_result)) + end + it "passes the options it recieves to the yield block" do sut_instance = sut.new(linear_backoff) yield_double = double() @@ -75,7 +84,11 @@ end describe "#read_timeout" do - it "tries again if the result did not arrive, but the required acks arrived" + it "tries again if the result did not arrive, but the required acks arrived" do + sut_instance = sut.new(linear_backoff) + + end + it "tries the next host, if retries are left" end From 7d11259bb09252e56bafa3c194bb257de3a85b7e Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Sat, 9 Apr 2016 13:33:32 +0300 Subject: [PATCH 086/126] simplified the retry policy to always retry as long as we havent reached the retry limit --- .../outputs/cassandra/backoff_retry_policy.rb | 27 ++---- .../unit/outputs/backoff_retry_policy_spec.rb | 94 ++++++++++--------- 2 files changed, 57 insertions(+), 64 deletions(-) diff --git a/lib/logstash/outputs/cassandra/backoff_retry_policy.rb b/lib/logstash/outputs/cassandra/backoff_retry_policy.rb index 0c904c8..1c03b2e 100644 --- a/lib/logstash/outputs/cassandra/backoff_retry_policy.rb +++ b/lib/logstash/outputs/cassandra/backoff_retry_policy.rb @@ -16,31 +16,18 @@ def initialize(opts) end def read_timeout(statement, consistency, required, received, retrieved, retries) - return retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required, :received => received, :retrieved => retrieved, :retries => retries}) { |opts| - if received >= required && !retrieved - try_again(opts[:consistency]) - else - try_next_host - end - } + return retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required, + :received => received, :retrieved => retrieved, :retries => retries }) end def write_timeout(statement, consistency, type, required, received, retries) - return retry_with_backoff({ :statement => statement, :consistency => consistency, :type => type, :required => required, :received => received, :retries => retries}) { |opts| - if opts[:received].zero? - try_next_host - elsif opts[:type] == :batch_log - try_again(opts[:consistency]) - else - reraise - end - } + return retry_with_backoff({ :statement => statement, :consistency => consistency, :type => type, + :required => required, :received => received, :retries => retries }) end def unavailable(statement, consistency, required, alive, retries) - return retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required, :alive => alive, :retries => retries }) { |opts| - try_next_host - } + return retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required, + :alive => alive, :retries => retries }) end def retry_with_backoff(opts) @@ -52,7 +39,7 @@ def retry_with_backoff(opts) @logger.error('activating backoff wait', :opts => opts) backoff_wait_before_next_retry(opts[:retries]) - return yield(opts) + return try_again(opts[:consistency]) end def backoff_wait_before_next_retry(retries) diff --git a/spec/unit/outputs/backoff_retry_policy_spec.rb b/spec/unit/outputs/backoff_retry_policy_spec.rb index 073b33c..558aa03 100644 --- a/spec/unit/outputs/backoff_retry_policy_spec.rb +++ b/spec/unit/outputs/backoff_retry_policy_spec.rb @@ -23,73 +23,79 @@ } describe "#retry_with_backoff" do - it "runs the block if the max retries have not been reached" do - sut_instance = sut.new(linear_backoff) - yield_double = double() - expect(yield_double).to(receive(:ola)) + describe "retry limit not reached" do + it "decides to try again with the same consistency level" do + sut_instance = sut.new(linear_backoff) - sut_instance.retry_with_backoff({ :retries => 0 }) { yield_double.ola() } - end + decision = sut_instance.retry_with_backoff({ :retries => 0, :consistency => :one }) - it "returns the value from the yield block" do - sut_instance = sut.new(linear_backoff) - expected_result = double() + expect(decision).to(be_an_instance_of(::Cassandra::Retry::Decisions::Retry)) + expect(decision.consistency).to(be(:one)) + end - result = sut_instance.retry_with_backoff({ :retries => 0 }) { expected_result } + it "waits _before_ retrying" do + sut_instance = sut.new(linear_backoff) + expect(Kernel).to(receive(:sleep)) - expect(result).to(be(expected_result)) - end + sut_instance.retry_with_backoff({ :retries => 0 }) + end - it "passes the options it recieves to the yield block" do - sut_instance = sut.new(linear_backoff) - yield_double = double() - expected_options = { :retries => 0 } - expect(yield_double).to(receive(:ola).with(expected_options)) + it "allows for exponential backoffs" do + sut_instance = sut.new(exponential_backoff) + test_retry = exponential_backoff["retry_limit"] - 1 + expect(Kernel).to(receive(:sleep).with(exponential_backoff["backoff_size"] ** test_retry)) - sut_instance.retry_with_backoff(expected_options) { |opts| yield_double.ola(opts) } - end + sut_instance.retry_with_backoff({ :retries => test_retry }) { } + end - it "stops once the max retries are reached" do - sut_instance = sut.new(linear_backoff) - yield_double = double() - expect(yield_double).not_to(receive(:ola)) + it "allows for linear backoffs" do + sut_instance = sut.new(linear_backoff) + test_retry = exponential_backoff["retry_limit"] - 1 + expect(Kernel).to(receive(:sleep).with(linear_backoff["backoff_size"] * test_retry)) - sut_instance.retry_with_backoff({ :retries => linear_backoff["retry_limit"] + 1 }) { yield_double.ola() } + sut_instance.retry_with_backoff({ :retries => test_retry }) { } + end end - it "waits _before_ retrying" do - sut_instance = sut.new(linear_backoff) - expect(Kernel).to(receive(:sleep).ordered) - yield_double = double() - expect(yield_double).to(receive(:ola).ordered) + describe "retry limit reached" do + it "decides to reraise" do + sut_instance = sut.new(linear_backoff) - sut_instance.retry_with_backoff({ :retries => 0 }) { yield_double.ola() } - end + decision = sut_instance.retry_with_backoff({ :retries => linear_backoff["retry_limit"] + 1 }) - it "allows for exponential backoffs" do - sut_instance = sut.new(exponential_backoff) - test_retry = exponential_backoff["retry_limit"] - 1 - expect(Kernel).to(receive(:sleep).with(exponential_backoff["backoff_size"] ** test_retry)) + expect(decision).to(be_an_instance_of(::Cassandra::Retry::Decisions::Reraise)) + end - sut_instance.retry_with_backoff({ :retries => test_retry }) { } - end + it "does not wait" do + sut_instance = sut.new(linear_backoff) - it "allows for linear backoffs" do - sut_instance = sut.new(linear_backoff) - test_retry = exponential_backoff["retry_limit"] - 1 - expect(Kernel).to(receive(:sleep).with(linear_backoff["backoff_size"] * test_retry)) + expect(Kernel).not_to(receive(:sleep)) - sut_instance.retry_with_backoff({ :retries => test_retry }) { } + sut_instance.retry_with_backoff({ :retries => linear_backoff["retry_limit"] + 1 }) + end end end describe "#read_timeout" do - it "tries again if the result did not arrive, but the required acks arrived" do + it "properly calls #retry_with_backoff" do sut_instance = sut.new(linear_backoff) + expect(sut_instance).to(receive(:retry_with_backoff).with({ + :statement => "statement", :consistency => :one, :required => 1, + :received => 0, :retrieved => false, :retries => 0 + })) + sut_instance.read_timeout("statement", :one, 1, 0, false, 0) end - it "tries the next host, if retries are left" + it "returns the decision it got" do + sut_instance = sut.new(linear_backoff) + expected_result = double() + expect(sut_instance).to(receive(:retry_with_backoff).and_return(expected_result)) + + result = sut_instance.read_timeout("statement", :one, 1, 0, false, 0) + + expect(result).to(be(expected_result)) + end end describe "#write_timeout" do From f63f8aca62a63740800270c943af214df895113d Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Sat, 9 Apr 2016 13:47:21 +0300 Subject: [PATCH 087/126] public api tests --- .../unit/outputs/backoff_retry_policy_spec.rb | 61 +++++++++++-------- 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/spec/unit/outputs/backoff_retry_policy_spec.rb b/spec/unit/outputs/backoff_retry_policy_spec.rb index 558aa03..4734e6b 100644 --- a/spec/unit/outputs/backoff_retry_policy_spec.rb +++ b/spec/unit/outputs/backoff_retry_policy_spec.rb @@ -76,34 +76,43 @@ end end - describe "#read_timeout" do - it "properly calls #retry_with_backoff" do - sut_instance = sut.new(linear_backoff) - expect(sut_instance).to(receive(:retry_with_backoff).with({ - :statement => "statement", :consistency => :one, :required => 1, - :received => 0, :retrieved => false, :retries => 0 - })) - - sut_instance.read_timeout("statement", :one, 1, 0, false, 0) - end - - it "returns the decision it got" do - sut_instance = sut.new(linear_backoff) - expected_result = double() - expect(sut_instance).to(receive(:retry_with_backoff).and_return(expected_result)) + [ + { + :method_name=> "read_timeout", + :expected_opts => { :statement => "statement", :consistency => :one, :required => 1, :received => 0, + :retrieved => false, :retries => 0 }, + :call_args => ["statement", :one, 1, 0, false, 0] + }, + { + :method_name=> "write_timeout", + :expected_opts => { :statement => "statement", :consistency => :one, :type => :prepared, + :required => 1, :received => 2, :retries => 5 }, + :call_args => ["statement", :one, :prepared, 1, 2, 5] + }, + { + :method_name=> "unavailable", + :expected_opts => { :statement => "statement", :consistency => :one, :required => 3, + :alive => 2, :retries => 4}, + :call_args => ["statement", :one, 3, 2, 4] + } + ].each { |use_case| + describe "#{use_case[:method_name]}" do + it "properly calls #retry_with_backoff" do + sut_instance = sut.new(linear_backoff) + expect(sut_instance).to(receive(:retry_with_backoff).with(use_case[:expected_opts])) - result = sut_instance.read_timeout("statement", :one, 1, 0, false, 0) + sut_instance.send(use_case[:method_name], *use_case[:call_args]) + end - expect(result).to(be(expected_result)) - end - end + it "returns the decision it got" do + sut_instance = sut.new(linear_backoff) + expected_result = double() + expect(sut_instance).to(receive(:retry_with_backoff).and_return(expected_result)) - describe "#write_timeout" do - it "tries the next host if no acks were recieved (there is an undelying assumption that the query is idempotent)" - it "retries if the query was a logged batch" - end + result = sut_instance.send(use_case[:method_name], *use_case[:call_args]) - describe "#unavailable" do - it "tries the next host" - end + expect(result).to(be(expected_result)) + end + end + } end From b630a657cd8a0da7ee47be26b9bdd3864c688c38 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Sat, 9 Apr 2016 13:50:16 +0300 Subject: [PATCH 088/126] fixed incorrectly named let --- spec/unit/outputs/safe_submitter_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index 9d3972e..0099e3b 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -4,7 +4,7 @@ RSpec.describe LogStash::Outputs::Cassandra::SafeSubmitter do let(:sut) { LogStash::Outputs::Cassandra::SafeSubmitter } - let(:short_linear_backoff) { + let(:default_options) { logger = double() allow(logger).to(receive(:debug)) { From 8515bdf4a80a48232b99941dc9f1c902485da9fa Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Sat, 9 Apr 2016 14:14:20 +0300 Subject: [PATCH 089/126] fixing incorrect set default regex and test --- lib/logstash/outputs/cassandra/event_parser.rb | 2 +- spec/unit/outputs/event_parser_spec.rb | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index ed38be9..4b23772 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -89,7 +89,7 @@ def convert_value_to_cassandra_type_or_default_if_configured(event_data, cassand typed_event_data = convert_value_to_cassandra_type("00000000-0000-0000-0000-000000000000", cassandra_type) when "inet" typed_event_data = convert_value_to_cassandra_type("0.0.0.0", cassandra_type) - when /^set\((.*)\)$/ + when /^set<.*>$/ typed_event_data = convert_value_to_cassandra_type([], cassandra_type) end @logger.warn(error_message, :exception => e, :backtrace => e.backtrace) diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb index 264475d..9fd18a3 100644 --- a/spec/unit/outputs/event_parser_spec.rb +++ b/spec/unit/outputs/event_parser_spec.rb @@ -289,7 +289,8 @@ action = sut_instance.parse(sample_event) - expect(action["data"]["a_field"].to_a).to(eq([])) + expect(action["data"]["a_field"].size).to(be(0)) + expect(action["data"]["a_field"]).to(be_an_instance_of(Set)) end end end From 1e79f0de452ac646e3c21558933704078b9b5a5d Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Sat, 9 Apr 2016 14:24:02 +0300 Subject: [PATCH 090/126] added coverage for bad retry policies --- spec/unit/outputs/safe_submitter_spec.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index 0099e3b..369837c 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -74,6 +74,12 @@ def setup_session_double(options) sut.new(options) end + + it "fails if the retry policy is unknown" do + options = default_options.update({ "retry_policy" => "bad policy" }) + + expect { sut.new(options) }.to(raise_error(ArgumentError)) + end end describe "execution" do From 9970b8719769ce3e9754bcdf5b44016bbc89d6ab Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Sat, 9 Apr 2016 14:31:45 +0300 Subject: [PATCH 091/126] added coverage to failed batch logging --- spec/unit/outputs/safe_submitter_spec.rb | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index 369837c..e734460 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -139,5 +139,14 @@ def generate_future_double() sut_instance.submit([one_action, another_action]) end + + it "logs and skips failed batches" do + setup_session_double(default_options) + sut_instance = sut.new(default_options) + expect(sut_instance).to(receive(:get_query).and_raise(ArgumentError)) + expect(default_options["logger"]).to(receive(:error)) + + expect { sut_instance.submit([one_action]) }.to_not raise_error + end end end From 30ece3ef57be864f2b1963044b27d26f13f1e657 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Sun, 10 Apr 2016 18:02:50 +0300 Subject: [PATCH 092/126] adding metric fu, built in coverage calcs, and sonar --- .gitignore | 3 ++- sonar-project.properties | 9 +++++++++ spec/cassandra_spec_helper.rb | 11 +++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 sonar-project.properties diff --git a/.gitignore b/.gitignore index e8dcb6b..cbf4f18 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ Gemfile.lock vendor /nbproject/private/ .idea -coverage \ No newline at end of file +coverage +tmp diff --git a/sonar-project.properties b/sonar-project.properties new file mode 100644 index 0000000..6e9e7e6 --- /dev/null +++ b/sonar-project.properties @@ -0,0 +1,9 @@ +sonar.projectKey=px:logstash-output-cassandra + +sonar.projectName=Logstash Cassandra Output +sonar.projectVersion=1.0 + +sonar.language=ruby +sonar.sources=lib +sonar.tests=spec +sonar.exclusions=spec/** diff --git a/spec/cassandra_spec_helper.rb b/spec/cassandra_spec_helper.rb index c00762e..4fbc774 100644 --- a/spec/cassandra_spec_helper.rb +++ b/spec/cassandra_spec_helper.rb @@ -1,3 +1,14 @@ # encoding: utf-8 require "logstash/devutils/rspec/spec_helper" require "logstash/event" +require 'simplecov' +require 'simplecov-rcov' + +SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter.new([ + SimpleCov::Formatter::HTMLFormatter, + SimpleCov::Formatter::RcovFormatter +]) + +SimpleCov.start do + add_filter '/spec/' +end From 787a640984025b63de359783f4084b06cdd5ece1 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Sun, 10 Apr 2016 18:03:22 +0300 Subject: [PATCH 093/126] adding integration / docker tests to the full run and setting to cass docker to 2.2 --- spec/integration/outputs/cassandra_spec.rb | 2 +- spec/integration/outputs/integration_helper.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/integration/outputs/cassandra_spec.rb b/spec/integration/outputs/cassandra_spec.rb index 2ff894a..22476a0 100644 --- a/spec/integration/outputs/cassandra_spec.rb +++ b/spec/integration/outputs/cassandra_spec.rb @@ -21,7 +21,7 @@ def self.get_assert_set_equallity() end end -describe "client create actions", :integration => true do +describe "client create actions", :docker => true do before(:each) do get_session().execute("CREATE KEYSPACE test WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };") end diff --git a/spec/integration/outputs/integration_helper.rb b/spec/integration/outputs/integration_helper.rb index bd3b8bd..90a0da5 100644 --- a/spec/integration/outputs/integration_helper.rb +++ b/spec/integration/outputs/integration_helper.rb @@ -5,7 +5,7 @@ CONTAINER_NAME = "logstash-output-cassandra-#{rand(999).to_s}" CONTAINER_IMAGE = "cassandra" -CONTAINER_TAG = "2" +CONTAINER_TAG = "2.2" module CassandraHelper def get_host_ip @@ -37,7 +37,7 @@ def get_session config.include CassandraHelper # this :all hook gets run before every describe block that is tagged with :integration => true. - config.before(:all, :integration => true) do + config.before(:all, :docker => true) do # check if container exists already before creating new one. begin ls = Longshoreman::new From 3adbdc4baf65b1db1312e0765246afe81c4a9f96 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Mon, 11 Apr 2016 22:08:26 +0300 Subject: [PATCH 094/126] cleanup of event parser according to ruby mine --- .../outputs/cassandra/event_parser.rb | 106 ++++--- spec/unit/outputs/event_parser_spec.rb | 300 +++++++++--------- 2 files changed, 208 insertions(+), 198 deletions(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index 4b23772..340f0be 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -1,34 +1,34 @@ # encoding: utf-8 -require "time" -require "cassandra" +require 'time' +require 'cassandra' module LogStash; module Outputs; module Cassandra class EventParser def initialize(options) - @logger = options["logger"] - @table = options["table"] - @filter_transform_event_key = options["filter_transform_event_key"] - assert_filter_transform_structure(options["filter_transform"]) if options["filter_transform"] - @filter_transform = options["filter_transform"] - @hints = options["hints"] - @ignore_bad_values = options["ignore_bad_values"] + @logger = options['logger'] + @table = options['table'] + @filter_transform_event_key = options['filter_transform_event_key'] + assert_filter_transform_structure(options['filter_transform']) if options['filter_transform'] + @filter_transform = options['filter_transform'] + @hints = options['hints'] + @ignore_bad_values = options['ignore_bad_values'] end def parse(event) action = {} - action["table"] = event.sprintf(@table) + action['table'] = event.sprintf(@table) filter_transform = get_filter_transform(event) if filter_transform - action["data"] = {} - for filter in filter_transform + action['data'] = {} + filter_transform.each { |filter| add_event_value_from_filter_to_action(event, filter, action) - end + } else add_event_data_using_configured_hints(event, action) end - @logger.debug("event parsed to action", :action => action) - return action + @logger.debug('event parsed to action', :action => action) + action end private @@ -40,37 +40,37 @@ def get_filter_transform(event) elsif @filter_transform.length > 0 filter_transform = @filter_transform end - return filter_transform + filter_transform end def assert_filter_transform_structure(filter_transform) - for item in filter_transform - if !item.has_key?("event_key") || !item.has_key?("column_name") - raise "item is incorrectly configured in filter_transform:\nitem => #{item}\nfilter_transform => #{filter_transform}" + filter_transform.each { |item| + if !item.has_key?('event_key') || !item.has_key?('column_name') + raise ArgumentError, "item is incorrectly configured in filter_transform:\nitem => #{item}\nfilter_transform => #{filter_transform}" end - end + } end def add_event_value_from_filter_to_action(event, filter, action) - event_data = event.sprintf(filter["event_key"]) - if !filter.fetch("expansion_only", false) + event_data = event.sprintf(filter['event_key']) + if !filter.fetch('expansion_only', false) event_data = event[event_data] end - if filter.has_key?("cassandra_type") - cassandra_type = event.sprintf(filter["cassandra_type"]) + if filter.has_key?('cassandra_type') + cassandra_type = event.sprintf(filter['cassandra_type']) event_data = convert_value_to_cassandra_type_or_default_if_configured(event_data, cassandra_type) end - column_name = event.sprintf(filter["column_name"]) - action["data"][column_name] = event_data + column_name = event.sprintf(filter['column_name']) + action['data'][column_name] = event_data end def add_event_data_using_configured_hints(event, action) - action["data"] = event.to_hash() + action['data'] = event.to_hash # Filter out @timestamp, @version, etc to be able to use elasticsearch input plugin directly - action["data"].reject!{|key| %r{^@} =~ key} + action['data'].reject!{|key| %r{^@} =~ key} @hints.each do |event_key, cassandra_type| - if action["data"].has_key?(event_key) - action["data"][event_key] = convert_value_to_cassandra_type_or_default_if_configured(action["data"][event_key], cassandra_type) + if action['data'].has_key?(event_key) + action['data'][event_key] = convert_value_to_cassandra_type_or_default_if_configured(action['data'][event_key], cassandra_type) end end end @@ -83,14 +83,16 @@ def convert_value_to_cassandra_type_or_default_if_configured(event_data, cassand error_message = "Cannot convert `value (`#{event_data}`) to `#{cassandra_type}` type" if @ignore_bad_values case cassandra_type - when "float", "int", "varint", "bigint", "double", "counter", "timestamp" + when 'float', 'int', 'varint', 'bigint', 'double', 'counter', 'timestamp' typed_event_data = convert_value_to_cassandra_type(0, cassandra_type) - when "timeuuid" - typed_event_data = convert_value_to_cassandra_type("00000000-0000-0000-0000-000000000000", cassandra_type) - when "inet" - typed_event_data = convert_value_to_cassandra_type("0.0.0.0", cassandra_type) + when 'timeuuid' + typed_event_data = convert_value_to_cassandra_type('00000000-0000-0000-0000-000000000000', cassandra_type) + when 'inet' + typed_event_data = convert_value_to_cassandra_type('0.0.0.0', cassandra_type) when /^set<.*>$/ typed_event_data = convert_value_to_cassandra_type([], cassandra_type) + else + raise ArgumentError, "unable to provide a default value for type #{event_data}" end @logger.warn(error_message, :exception => e, :backtrace => e.backtrace) else @@ -98,12 +100,12 @@ def convert_value_to_cassandra_type_or_default_if_configured(event_data, cassand raise error_message end end - return typed_event_data + typed_event_data end def convert_value_to_cassandra_type(event_data, cassandra_type) case cassandra_type - when "timestamp" + when 'timestamp' converted_value = event_data if converted_value.is_a?(Numeric) converted_value = Time.at(converted_value) @@ -111,38 +113,38 @@ def convert_value_to_cassandra_type(event_data, cassandra_type) converted_value = Time::parse(event_data.to_s) end return ::Cassandra::Types::Timestamp.new(converted_value) - when "inet" + when 'inet' return ::Cassandra::Types::Inet.new(event_data) - when "float" + when 'float' return ::Cassandra::Types::Float.new(event_data) - when "varchar" + when 'varchar' return ::Cassandra::Types::Varchar.new(event_data) - when "text" + when 'text' return ::Cassandra::Types::Text.new(event_data) - when "blob" + when 'blob' return ::Cassandra::Types::Blob.new(event_data) - when "ascii" + when 'ascii' return ::Cassandra::Types::Ascii.new(event_data) - when "bigint" + when 'bigint' return ::Cassandra::Types::Bigint.new(event_data) - when "counter" + when 'counter' return ::Cassandra::Types::Counter.new(event_data) - when "int" + when 'int' return ::Cassandra::Types::Int.new(event_data) - when "varint" + when 'varint' return ::Cassandra::Types::Varint.new(event_data) - when "boolean" + when 'boolean' return ::Cassandra::Types::Boolean.new(event_data) - when "decimal" + when 'decimal' return ::Cassandra::Types::Decimal.new(event_data) - when "double" + when 'double' return ::Cassandra::Types::Double.new(event_data) - when "timeuuid" + when 'timeuuid' return ::Cassandra::Types::Timeuuid.new(event_data) when /^set<(.*)>$/ # convert each value # then add all to an array and convert to set - converted_items = ::Set.new() + converted_items = ::Set.new set_type = $1 event_data.each { |item| converted_item = convert_value_to_cassandra_type(item, set_type) diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb index 9fd18a3..78c7c9c 100644 --- a/spec/unit/outputs/event_parser_spec.rb +++ b/spec/unit/outputs/event_parser_spec.rb @@ -1,296 +1,304 @@ # encoding: utf-8 -require_relative "../../cassandra_spec_helper" -require "logstash/outputs/cassandra/event_parser" +require_relative '../../cassandra_spec_helper' +require 'logstash/outputs/cassandra/event_parser' RSpec.describe LogStash::Outputs::Cassandra::EventParser do let(:sut) { LogStash::Outputs::Cassandra::EventParser } let(:default_opts) { - logger = double() + logger = double allow(logger).to(receive(:debug)) return { - "logger" => logger, - "table" => "dummy", - "filter_transform_event_key" => nil, - "filter_transform" => [], - "hints" => {}, - "ignore_bad_values" => false + 'logger' => logger, + 'table' => 'dummy', + 'filter_transform_event_key' => nil, + 'filter_transform' => [], + 'hints' => {}, + 'ignore_bad_values' => false } } - let(:sample_event) { LogStash::Event.new("message" => "sample message here") } + let(:sample_event) { LogStash::Event.new('message' => 'sample message here') } - describe "table name parsing" do - it "leaves regular table names unchanged" do - sut_instance = sut.new(default_opts.update({ "table" => "simple" })) + describe 'table name parsing' do + it 'leaves regular table names unchanged' do + sut_instance = sut.new(default_opts.update({ 'table' => 'simple' })) action = sut_instance.parse(sample_event) - expect(action["table"]).to(eq("simple")) + expect(action['table']).to(eq('simple')) end - it "allows for string expansion in table names" do - sut_instance = sut.new(default_opts.update({ "table" => "%{[a_field]}" })) - sample_event["a_field"] = "a_value" + it 'allows for string expansion in table names' do + sut_instance = sut.new(default_opts.update({ 'table' => '%{[a_field]}' })) + sample_event['a_field'] = 'a_value' action = sut_instance.parse(sample_event) - expect(action["table"]).to(eq("a_value")) + expect(action['table']).to(eq('a_value')) end end - describe "filter transforms" do - describe "from config" do - describe "malformed configurations" do - it "fails if the transform has no event_data setting" do - expect { sut.new(default_opts.update({ "filter_transform" => [{ "column_name" => "" }] })) }.to raise_error(/item is incorrectly configured/) + describe 'filter transforms' do + describe 'from config' do + describe 'malformed configurations' do + it 'fails if the transform has no event_data setting' do + expect { sut.new(default_opts.update({ 'filter_transform' => [{ 'column_name' => '' }] })) }.to raise_error(/item is incorrectly configured/) end - it "fails if the transform has no column_name setting" do - expect { sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "" }] })) }.to raise_error(/item is incorrectly configured/) + it 'fails if the transform has no column_name setting' do + expect { sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => '' }] })) }.to raise_error(/item is incorrectly configured/) end end - describe "properly configured" do - it "maps the event key to the column" do - sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column" }] })) - sample_event["a_field"] = "a_value" + describe 'properly configured' do + it 'maps the event key to the column' do + sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column' }] })) + sample_event['a_field'] = 'a_value' action = sut_instance.parse(sample_event) - expect(action["data"]["a_column"]).to(eq("a_value")) + expect(action['data']['a_column']).to(eq('a_value')) end - it "works with multiple filter transforms" do - sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column" }, { "event_key" => "another_field", "column_name" => "a_different_column" }] })) - sample_event["a_field"] = "a_value" - sample_event["another_field"] = "a_second_value" + it 'works with multiple filter transforms' do + sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column' }, { 'event_key' => 'another_field', 'column_name' => 'a_different_column' }] })) + sample_event['a_field'] = 'a_value' + sample_event['another_field'] = 'a_second_value' action = sut_instance.parse(sample_event) - expect(action["data"]["a_column"]).to(eq("a_value")) - expect(action["data"]["a_different_column"]).to(eq("a_second_value")) + expect(action['data']['a_column']).to(eq('a_value')) + expect(action['data']['a_different_column']).to(eq('a_second_value')) end - it "allows for string expansion in event keys" do - sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "%{[pointer_to_another_field]}", "column_name" => "a_column" }] })) - sample_event["pointer_to_another_field"] = "another_field" - sample_event["another_field"] = "a_value" + it 'allows for string expansion in event keys' do + sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => '%{[pointer_to_another_field]}', 'column_name' => 'a_column' }] })) + sample_event['pointer_to_another_field'] = 'another_field' + sample_event['another_field'] = 'a_value' action = sut_instance.parse(sample_event) - expect(action["data"]["a_column"]).to(eq("a_value")) + expect(action['data']['a_column']).to(eq('a_value')) end - it "allows for string expansion only filters for things like date string formats" do - sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "%{+yyyyMMddHHmm}", "expansion_only" => true, "column_name" => "a_column" }] })) + it 'allows for string expansion only filters for things like date string formats' do + sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => '%{+yyyyMMddHHmm}', 'expansion_only' => true, 'column_name' => 'a_column' }] })) expected_value = Time.now.getutc.strftime('%Y%m%d%H%M') action = sut_instance.parse(sample_event) - expect(action["data"]["a_column"]).to(eq(expected_value)) + expect(action['data']['a_column']).to(eq(expected_value)) end - it "allows for string expansion in column names" do - sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "%{[pointer_to_another_field]}" }] })) - sample_event["a_field"] = "a_value" - sample_event["pointer_to_another_field"] = "a_different_column" + it 'allows for string expansion in column names' do + sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => '%{[pointer_to_another_field]}' }] })) + sample_event['a_field'] = 'a_value' + sample_event['pointer_to_another_field'] = 'a_different_column' action = sut_instance.parse(sample_event) - expect(action["data"]["a_different_column"]).to(eq("a_value")) + expect(action['data']['a_different_column']).to(eq('a_value')) end end - describe "cassandra type mapping" do + describe 'cassandra type mapping' do [ - { :name => "timestamp", :type => ::Cassandra::Types::Timestamp, :value => Time::parse("1979-07-27 00:00:00 +0300") }, - { :name => "timestamp", :type => ::Cassandra::Types::Timestamp, :value => "1982-05-04 00:00:00 +0300", expected: Time::parse("1982-05-04 00:00:00 +0300") }, - { :name => "timestamp", :type => ::Cassandra::Types::Timestamp, :value => 1457606758, expected: Time.at(1457606758) }, - { :name => "inet", :type => ::Cassandra::Types::Inet, :value => "0.0.0.0" }, - { :name => "float", :type => ::Cassandra::Types::Float, :value => "10.15" }, - { :name => "varchar", :type => ::Cassandra::Types::Varchar, :value => "a varchar" }, - { :name => "text", :type => ::Cassandra::Types::Text, :value => "some text" }, - { :name => "blob", :type => ::Cassandra::Types::Blob, :value => "12345678" }, - { :name => "ascii", :type => ::Cassandra::Types::Ascii, :value => "some ascii" }, - { :name => "bigint", :type => ::Cassandra::Types::Bigint, :value => "100" }, - { :name => "counter", :type => ::Cassandra::Types::Counter, :value => "15" }, - { :name => "int", :type => ::Cassandra::Types::Int, :value => "123" }, - { :name => "varint", :type => ::Cassandra::Types::Varint, :value => "345" }, - { :name => "boolean", :type => ::Cassandra::Types::Boolean, :value => "true" }, - { :name => "decimal", :type => ::Cassandra::Types::Decimal, :value => "0.12E2" }, - { :name => "double", :type => ::Cassandra::Types::Double, :value => "123.65" }, - { :name => "timeuuid", :type => ::Cassandra::Types::Timeuuid, :value => "00000000-0000-0000-0000-000000000000" } + { :name => 'timestamp', :type => ::Cassandra::Types::Timestamp, :value => Time::parse('1979-07-27 00:00:00 +0300') }, + { :name => 'timestamp', :type => ::Cassandra::Types::Timestamp, :value => '1982-05-04 00:00:00 +0300', expected: Time::parse('1982-05-04 00:00:00 +0300') }, + { :name => 'timestamp', :type => ::Cassandra::Types::Timestamp, :value => 1457606758, expected: Time.at(1457606758) }, + { :name => 'inet', :type => ::Cassandra::Types::Inet, :value => '0.0.0.0' }, + { :name => 'float', :type => ::Cassandra::Types::Float, :value => '10.15' }, + { :name => 'varchar', :type => ::Cassandra::Types::Varchar, :value => 'a varchar' }, + { :name => 'text', :type => ::Cassandra::Types::Text, :value => 'some text' }, + { :name => 'blob', :type => ::Cassandra::Types::Blob, :value => '12345678' }, + { :name => 'ascii', :type => ::Cassandra::Types::Ascii, :value => 'some ascii' }, + { :name => 'bigint', :type => ::Cassandra::Types::Bigint, :value => '100' }, + { :name => 'counter', :type => ::Cassandra::Types::Counter, :value => '15' }, + { :name => 'int', :type => ::Cassandra::Types::Int, :value => '123' }, + { :name => 'varint', :type => ::Cassandra::Types::Varint, :value => '345' }, + { :name => 'boolean', :type => ::Cassandra::Types::Boolean, :value => 'true' }, + { :name => 'decimal', :type => ::Cassandra::Types::Decimal, :value => '0.12E2' }, + { :name => 'double', :type => ::Cassandra::Types::Double, :value => '123.65' }, + { :name => 'timeuuid', :type => ::Cassandra::Types::Timeuuid, :value => '00000000-0000-0000-0000-000000000000' } ].each { |mapping| # NOTE: this is not the best test there is, but it is the best / simplest I could think of :/ it "properly maps #{mapping[:name]} to #{mapping[:type]}" do - sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => mapping[:name] }] })) - sample_event["a_field"] = mapping[:value] + sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column', 'cassandra_type' => mapping[:name] }] })) + sample_event['a_field'] = mapping[:value] action = sut_instance.parse(sample_event) expected_value = mapping.has_key?(:expected) ? mapping[:expected] : mapping[:value] - expect(action["data"]["a_column"].to_s).to(eq(expected_value.to_s)) + expect(action['data']['a_column'].to_s).to(eq(expected_value.to_s)) end } - it "properly maps sets to their specific set types" do - sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "set" }] })) + it 'properly maps sets to their specific set types' do + sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column', 'cassandra_type' => 'set' }] })) original_value = [ 1, 2, 3 ] - sample_event["a_field"] = original_value + sample_event['a_field'] = original_value action = sut_instance.parse(sample_event) - expect(action["data"]["a_column"].to_a).to(eq(original_value)) + expect(action['data']['a_column'].to_a).to(eq(original_value)) end - it "properly maps sets to their specific set types for type which also require actual conversion" do - sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "set" }] })) - original_value = [ "00000000-0000-0000-0000-000000000000", "00000000-0000-0000-0000-000000000001", "00000000-0000-0000-0000-000000000002" ] - sample_event["a_field"] = original_value + it 'properly maps sets to their specific set types for type which also require actual conversion' do + sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column', 'cassandra_type' => 'set' }] })) + original_value = %w(00000000-0000-0000-0000-000000000000 00000000-0000-0000-0000-000000000001 00000000-0000-0000-0000-000000000002) + sample_event['a_field'] = original_value action = sut_instance.parse(sample_event) - expect(action["data"]["a_column"].size).to(eq(original_value.size)) - action["data"]["a_column"].to_a.each { |item| + expect(action['data']['a_column'].size).to(eq(original_value.size)) + action['data']['a_column'].to_a.each { |item| expect(original_value).to(include(item.to_s)) } end - it "allows for string expansion in cassandra types" do - sut_instance = sut.new(default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "%{[pointer_to_a_field]}" }] })) - sample_event["a_field"] = "123" - sample_event["pointer_to_a_field"] = "int" + it 'allows for string expansion in cassandra types' do + sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column', 'cassandra_type' => '%{[pointer_to_a_field]}' }] })) + sample_event['a_field'] = '123' + sample_event['pointer_to_a_field'] = 'int' action = sut_instance.parse(sample_event) - expect(action["data"]["a_column"]).to(eq(123)) + expect(action['data']['a_column']).to(eq(123)) end - it "fails in case of an unknown type" do - options = default_opts.update({ "filter_transform" => [{ "event_key" => "a_field", "column_name" => "a_column", "cassandra_type" => "what?!" }] }) + it 'fails in case of an unknown type' do + options = default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column', 'cassandra_type' => 'what?!' }] }) sut_instance = sut.new(options) - sample_event["a_field"] = "a_value" - expect(options["logger"]).to(receive(:error)) + sample_event['a_field'] = 'a_value' + expect(options['logger']).to(receive(:error)) expect { sut_instance.parse(sample_event) }.to raise_error(/Cannot convert/) end end end - describe "from event" do - it "obtains the filter transform from the event if defined" do - sut_instance = sut.new(default_opts.update({ "filter_transform_event_key" => "an_event_filter" })) - sample_event["a_field"] = "a_value" - sample_event["an_event_filter"] = [{ "event_key" => "a_field", "column_name" => "a_column" }] + describe 'from event' do + it 'obtains the filter transform from the event if defined' do + sut_instance = sut.new(default_opts.update({ 'filter_transform_event_key' => 'an_event_filter' })) + sample_event['a_field'] = 'a_value' + sample_event['an_event_filter'] = [{ 'event_key' => 'a_field', 'column_name' => 'a_column' }] action = sut_instance.parse(sample_event) - expect(action["data"]["a_column"]).to(eq("a_value")) + expect(action['data']['a_column']).to(eq('a_value')) end - it "obtains the filter transform from the event even when it is in the metadata" do - sut_instance = sut.new(default_opts.update({ "filter_transform_event_key" => "[@metadata][the_filter]" })) - sample_event["a_field"] = "a_value" - sample_event["@metadata"] = { "the_filter" => [{ "event_key" => "a_field", "column_name" => "a_column" }] } + it 'obtains the filter transform from the event even when it is in the metadata' do + sut_instance = sut.new(default_opts.update({ 'filter_transform_event_key' => '[@metadata][the_filter]' })) + sample_event['a_field'] = 'a_value' + sample_event['@metadata'] = { 'the_filter' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column' }] } action = sut_instance.parse(sample_event) - expect(action["data"]["a_column"]).to(eq("a_value")) + expect(action['data']['a_column']).to(eq('a_value')) end end end - describe "hints" do - it "removes fields starting with @" do - sut_instance = sut.new(default_opts.update({ "hints" => {} })) - sample_event["leave"] = "a_value" - sample_event["@remove"] = "another_value" + describe 'hints' do + it 'removes fields starting with @' do + sut_instance = sut.new(default_opts.update({ 'hints' => {} })) + sample_event['leave'] = 'a_value' + sample_event['@remove'] = 'another_value' action = sut_instance.parse(sample_event) - expect(action["data"]["leave"]).to(eq("a_value")) - expect(action["data"]).not_to(include("@remove")) + expect(action['data']['leave']).to(eq('a_value')) + expect(action['data']).not_to(include('@remove')) end - it "does not attempt to change items with no hints" do - sut_instance = sut.new(default_opts.update({ "hints" => {} })) + it 'does not attempt to change items with no hints' do + sut_instance = sut.new(default_opts.update({ 'hints' => {} })) expected_value = [ 1, 2, 3 ] - sample_event["no_hint_here"] = expected_value + sample_event['no_hint_here'] = expected_value action = sut_instance.parse(sample_event) - expect(action["data"]["no_hint_here"]).to(equal(expected_value)) + expect(action['data']['no_hint_here']).to(equal(expected_value)) end - it "converts items with hints" do - sut_instance = sut.new(default_opts.update({ "hints" => { "a_set" => "set", "an_int" => "int" } })) + it 'converts items with hints' do + sut_instance = sut.new(default_opts.update({ 'hints' => { 'a_set' => 'set', 'an_int' => 'int' } })) original_set = [ 1, 2, 3 ] - sample_event["a_set"] = original_set - sample_event["an_int"] = "123" + sample_event['a_set'] = original_set + sample_event['an_int'] = '123' action = sut_instance.parse(sample_event) - expect(action["data"]["a_set"]).to(be_a(Set)) - expect(action["data"]["a_set"].to_a).to(eql(original_set)) - expect(action["data"]["an_int"]).to(eql(123)) + expect(action['data']['a_set']).to(be_a(Set)) + expect(action['data']['a_set'].to_a).to(eql(original_set)) + expect(action['data']['an_int']).to(eql(123)) end - it "fails for unknown hint types" do - options = default_opts.update({ "hints" => { "a_field" => "not_a_real_type" } }) + it 'fails for unknown hint types' do + options = default_opts.update({ 'hints' => { 'a_field' => 'not_a_real_type' } }) sut_instance = sut.new(options) - expect(options["logger"]).to(receive(:error)) + expect(options['logger']).to(receive(:error)) - sample_event["a_field"] = "a value" + sample_event['a_field'] = 'a value' expect { sut_instance.parse(sample_event) }.to raise_error(/Cannot convert/) end - it "fails for unsuccessful hint conversion" do - options = default_opts.update({ "hints" => { "a_field" => "int" } }) - expect(options["logger"]).to(receive(:error)) + it 'fails for unsuccessful hint conversion' do + options = default_opts.update({ 'hints' => { 'a_field' => 'int' } }) + expect(options['logger']).to(receive(:error)) sut_instance = sut.new(options) - sample_event["a_field"] = "i am not an int!!!" + sample_event['a_field'] = 'i am not an int!!!' expect { sut_instance.parse(sample_event) }.to raise_error(/Cannot convert/) end end - describe "ignore_bad_values is turned on" do + describe 'ignore_bad_values is turned on' do [ - { :name => "timestamp", :value => "i dont have to_time", :expected => Time::parse("1970-01-01 00:00:00 +0000") }, - { :name => "inet", :value => "i am not an inet address", :expected => "0.0.0.0" }, - { :name => "float", :value => "i am not a float", :expected => 0.0 }, - { :name => "bigint", :value => "i am not a bigint", :expected => 0 }, - { :name => "counter", :value => "i am not a counter", :expected => 0 }, - { :name => "int", :value => "i am not a int", :expected => 0 }, - { :name => "varint", :value => "i am not a varint", :expected => 0 }, - { :name => "double", :value => "i am not a double", :expected => 0.0 }, - { :name => "timeuuid", :value => "i am not a timeuuid", :expected => "00000000-0000-0000-0000-000000000000" } + { :name => 'timestamp', :value => 'i dont have to_time', :expected => Time::parse('1970-01-01 00:00:00 +0000') }, + { :name => 'inet', :value => 'i am not an inet address', :expected => '0.0.0.0' }, + { :name => 'float', :value => 'i am not a float', :expected => 0.0 }, + { :name => 'bigint', :value => 'i am not a bigint', :expected => 0 }, + { :name => 'counter', :value => 'i am not a counter', :expected => 0 }, + { :name => 'int', :value => 'i am not a int', :expected => 0 }, + { :name => 'varint', :value => 'i am not a varint', :expected => 0 }, + { :name => 'double', :value => 'i am not a double', :expected => 0.0 }, + { :name => 'timeuuid', :value => 'i am not a timeuuid', :expected => '00000000-0000-0000-0000-000000000000' } ].each { |mapping| # NOTE: this is not the best test there is, but it is the best / simplest I could think of :/ it "properly defaults #{mapping[:name]}" do - options = default_opts.update({ "ignore_bad_values" => true, "hints" => { "a_field" => mapping[:name] } }) - expect(options["logger"]).to(receive(:warn)) + options = default_opts.update({ 'ignore_bad_values' => true, 'hints' => { 'a_field' => mapping[:name] } }) + expect(options['logger']).to(receive(:warn)) sut_instance = sut.new(options) - sample_event["a_field"] = mapping[:value] + sample_event['a_field'] = mapping[:value] action = sut_instance.parse(sample_event) - expect(action["data"]["a_field"].to_s).to(eq(mapping[:expected].to_s)) + expect(action['data']['a_field'].to_s).to(eq(mapping[:expected].to_s)) end } - it "properly default sets" do - options = default_opts.update({ "ignore_bad_values" => true, "hints" => { "a_field" => "set" } }) - expect(options["logger"]).to(receive(:warn)) + it 'properly default sets' do + options = default_opts.update({ 'ignore_bad_values' => true, 'hints' => { 'a_field' => 'set' } }) + expect(options['logger']).to(receive(:warn)) sut_instance = sut.new(options) - sample_event["a_field"] = "i am not a set" + sample_event['a_field'] = 'i am not a set' action = sut_instance.parse(sample_event) - expect(action["data"]["a_field"].size).to(be(0)) - expect(action["data"]["a_field"]).to(be_an_instance_of(Set)) + expect(action['data']['a_field'].size).to(be(0)) + expect(action['data']['a_field']).to(be_an_instance_of(Set)) + end + + it 'raises an ArgumentError in case we try to default a type we dont know' do + options = default_opts.update({ 'ignore_bad_values' => true, 'hints' => { 'a_field' => 'map' } }) + sut_instance = sut.new(options) + sample_event['a_field'] = 'i am not a set' + + expect { sut_instance.parse(sample_event) }.to raise_error ArgumentError end end end From a53d8e6f1b58d3f8ac8a63403872ec2cd6d35e0f Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Mon, 11 Apr 2016 22:29:23 +0300 Subject: [PATCH 095/126] cleanup of safe submiter according to ruby mine --- .../outputs/cassandra/event_parser.rb | 2 +- .../outputs/cassandra/safe_submitter.rb | 68 ++++----- spec/unit/outputs/safe_submitter_spec.rb | 139 +++++++++--------- 3 files changed, 105 insertions(+), 104 deletions(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index 340f0be..372cea7 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -53,7 +53,7 @@ def assert_filter_transform_structure(filter_transform) def add_event_value_from_filter_to_action(event, filter, action) event_data = event.sprintf(filter['event_key']) - if !filter.fetch('expansion_only', false) + unless filter.fetch('expansion_only', false) event_data = event[event_data] end if filter.has_key?('cassandra_type') diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb index 138b7f1..e531e03 100644 --- a/lib/logstash/outputs/cassandra/safe_submitter.rb +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -1,12 +1,12 @@ # encoding: utf-8 -require "cassandra" -require "logstash/outputs/cassandra/backoff_retry_policy" +require 'cassandra' +require 'logstash/outputs/cassandra/backoff_retry_policy' module LogStash; module Outputs; module Cassandra class SafeSubmitter def initialize(options) @statement_cache = {} - @logger = options["logger"] + @logger = options['logger'] setup_cassandra_session(options) end @@ -14,71 +14,71 @@ def submit(actions) begin futures = actions.map do |action| query = get_query(action) - execute_async(query, action["data"].values) + execute_async(query, action['data'].values) end futures.each(&:join) rescue Exception => e - @logger.error("Failed to send batch to cassandra", :actions => actions, :exception => e, :backtrace => e.backtrace) + @logger.error('Failed to send batch to cassandra', :actions => actions, :exception => e, :backtrace => e.backtrace) end end private def setup_cassandra_session(options) - cluster = options["cassandra"].cluster( - username: options["username"], - password: options["password"], - protocol_version: options["protocol_version"], - hosts: options["hosts"], - port: options["port"], - consistency: options["consistency"].to_sym, - timeout: options["request_timeout"], - retry_policy: get_retry_policy(options["retry_policy"]), - logger: options["logger"] + cluster = options['cassandra'].cluster( + username: options['username'], + password: options['password'], + protocol_version: options['protocol_version'], + hosts: options['hosts'], + port: options['port'], + consistency: options['consistency'].to_sym, + timeout: options['request_timeout'], + retry_policy: get_retry_policy(options['retry_policy']), + logger: options['logger'] ) - @session = cluster.connect(options["keyspace"]) + @session = cluster.connect(options['keyspace']) end def get_retry_policy(retry_policy) - case retry_policy["type"] - when "default" + case retry_policy['type'] + when 'default' return ::Cassandra::Retry::Policies::Default.new - when "downgrading_consistency" + when 'downgrading_consistency' return ::Cassandra::Retry::Policies::DowngradingConsistency.new - when "failthrough" + when 'failthrough' return ::Cassandra::Retry::Policies::Fallthrough.new - when "backoff" + when 'backoff' return ::Cassandra::Retry::Policies::Backoff.new({ - "backoff_type" => retry_policy["backoff_type"], "backoff_size" => retry_policy["backoff_size"], - "retry_limit" => retry_policy["retry_limit"], "logger" => @logger + 'backoff_type' => retry_policy['backoff_type'], 'backoff_size' => retry_policy['backoff_size'], + 'retry_limit' => retry_policy['retry_limit'], 'logger' => @logger }) else - raise ArgumentError, "unknown retry policy type: #{retry_policy["type"]}" + raise ArgumentError, "unknown retry policy type: #{retry_policy['type']}" end end def get_query(action) - @logger.debug("generating query for action", :action => action) + @logger.debug('generating query for action', :action => action) query = -"INSERT INTO #{action["table"]} (#{action["data"].keys.join(', ')}) -VALUES (#{("?" * action["data"].keys.count).split(//) * ", "})" - if !@statement_cache.has_key?(query) - @logger.debug("new query generated", :query => query) +"INSERT INTO #{action['table']} (#{action['data'].keys.join(', ')}) +VALUES (#{('?' * action['data'].keys.count).split(//) * ', '})" + unless @statement_cache.has_key?(query) + @logger.debug('preparing new query', :query => query) @statement_cache[query] = @session.prepare(query) end - return @statement_cache[query] + @statement_cache[query] end def execute_async(query, arguments) future = @session.execute_async(query, arguments: arguments) future.on_failure { |error| - @logger.error("error executing insert", :query => query, :arguments => arguments, :error => error) + @logger.error('error executing insert', :query => query, :arguments => arguments, :error => error) } future.on_complete { |value, error| - if !error.nil? - @logger.error("error executing insert", :query => query, :arguments => arguments, :error => error) + unless error.nil? + @logger.error('error executing insert', :query => query, :arguments => arguments, :error => error) end } - return future + future end end end end end diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index e734460..3f0674f 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -1,150 +1,151 @@ # encoding: utf-8 -require_relative "../../cassandra_spec_helper" -require "logstash/outputs/cassandra/safe_submitter" +require_relative '../../cassandra_spec_helper' +require 'logstash/outputs/cassandra/safe_submitter' RSpec.describe LogStash::Outputs::Cassandra::SafeSubmitter do let(:sut) { LogStash::Outputs::Cassandra::SafeSubmitter } let(:default_options) { - logger = double() + logger = double allow(logger).to(receive(:debug)) + allow(logger).to(receive(:error)) { - "logger" => logger, - "cassandra" => double(), - "username" => "a user", - "password" => "a password", - "protocol_version" => 3, - "hosts" => "some host", - "port" => 9042, - "consistency" => "one", - "request_timeout" => 10, - "retry_policy" => { "type" => "default" }, - "concrete_retry_policy" => ::Cassandra::Retry::Policies::Default, - "keyspace" => "the final frontier" + 'logger' => logger, + 'cassandra' => double, + 'username' => 'a user', + 'password' => 'a password', + 'protocol_version' => 3, + 'hosts' => 'some host', + 'port' => 9042, + 'consistency' => 'one', + 'request_timeout' => 10, + 'retry_policy' => { 'type' => 'default' }, + 'concrete_retry_policy' => ::Cassandra::Retry::Policies::Default, + 'keyspace' => 'the final frontier' } } def setup_session_double(options) - session_double = double() - cluster_double = double() - expect(cluster_double).to(receive(:connect)).with(options["keyspace"]).and_return(session_double) - expect(options["cassandra"]).to(receive(:cluster).with( - username: options["username"], - password: options["password"], - protocol_version: options["protocol_version"], - hosts: options["hosts"], - port: options["port"], - consistency: options["consistency"].to_sym, - timeout: options["request_timeout"], - retry_policy: options["concrete_retry_policy"], - logger: options["logger"] + session_double = double + cluster_double = double + expect(cluster_double).to(receive(:connect)).with(options['keyspace']).and_return(session_double) + expect(options['cassandra']).to(receive(:cluster).with( + username: options['username'], + password: options['password'], + protocol_version: options['protocol_version'], + hosts: options['hosts'], + port: options['port'], + consistency: options['consistency'].to_sym, + timeout: options['request_timeout'], + retry_policy: options['concrete_retry_policy'], + logger: options['logger'] )).and_return(cluster_double) return { :session_double => session_double } end - describe "init" do - it "properly inits the cassandra session" do + describe 'init' do + it 'properly inits the cassandra session' do setup_session_double(default_options) sut.new(default_options) end [ - { :setting => { "type" => "default" }, :concrete_retry_policy => ::Cassandra::Retry::Policies::Default }, - { :setting => { "type" => "downgrading_consistency" }, :concrete_retry_policy => ::Cassandra::Retry::Policies::DowngradingConsistency }, - { :setting => { "type" => "failthrough" }, :concrete_retry_policy => ::Cassandra::Retry::Policies::Fallthrough }, - { :setting => { "type" => "backoff", "backoff_type" => "**", "backoff_size" => 2, "retry_limit" => 10 }, + { :setting => { 'type' => 'default' }, :concrete_retry_policy => ::Cassandra::Retry::Policies::Default }, + { :setting => { 'type' => 'downgrading_consistency' }, :concrete_retry_policy => ::Cassandra::Retry::Policies::DowngradingConsistency }, + { :setting => { 'type' => 'failthrough' }, :concrete_retry_policy => ::Cassandra::Retry::Policies::Fallthrough }, + { :setting => { 'type' => 'backoff', 'backoff_type' => '**', 'backoff_size' => 2, 'retry_limit' => 10 }, :concrete_retry_policy => ::Cassandra::Retry::Policies::Backoff } ].each { |mapping| - it "supports the #{mapping[:concrete_retry_policy]} retry policy by passing #{mapping[:setting]["type"]} as the retry_policy" do - options = default_options.update({ "retry_policy" => mapping[:setting], "concrete_retry_policy" => mapping[:concrete_retry_policy] }) + it "supports the #{mapping[:concrete_retry_policy]} retry policy by passing #{mapping[:setting]['type']} as the retry_policy" do + options = default_options.update({ 'retry_policy' => mapping[:setting], 'concrete_retry_policy' => mapping[:concrete_retry_policy] }) setup_session_double(options) sut.new(options) end } - it "properly initializes the backoff retry policy" do - retry_policy_config = { "type" => "backoff", "backoff_type" => "**", "backoff_size" => 2, "retry_limit" => 10 } - expected_policy = double() - options = default_options.update({ "retry_policy" => retry_policy_config, "concrete_retry_policy" => expected_policy }) + it 'properly initializes the backoff retry policy' do + retry_policy_config = { 'type' => 'backoff', 'backoff_type' => '**', 'backoff_size' => 2, 'retry_limit' => 10 } + expected_policy = double + options = default_options.update({ 'retry_policy' => retry_policy_config, 'concrete_retry_policy' => expected_policy }) expect(::Cassandra::Retry::Policies::Backoff).to(receive(:new).with({ - "backoff_type" => options["retry_policy"]["backoff_type"], "backoff_size" => options["retry_policy"]["backoff_size"], - "retry_limit" => options["retry_policy"]["retry_limit"], "logger" => options["logger"]}).and_return(expected_policy)) + 'backoff_type' => options['retry_policy']['backoff_type'], 'backoff_size' => options['retry_policy']['backoff_size'], + 'retry_limit' => options['retry_policy']['retry_limit'], 'logger' => options['logger']}).and_return(expected_policy)) setup_session_double(options) sut.new(options) end - it "fails if the retry policy is unknown" do - options = default_options.update({ "retry_policy" => "bad policy" }) + it 'fails if the retry policy is unknown' do + options = default_options.update({ 'retry_policy' => 'bad policy' }) expect { sut.new(options) }.to(raise_error(ArgumentError)) end end - describe "execution" do + describe 'execution' do let(:one_action) {{ - "table" => "a_table", - "data" => { - "a_column" => "a_value", - "another_column" => "another_value" + 'table' => 'a_table', + 'data' => { + 'a_column' => 'a_value', + 'another_column' => 'another_value' } }} let(:expected_query_for_one_action) { "INSERT INTO a_table (a_column, another_column)\nVALUES (?, ?)" } let(:another_action) {{ - "table" => "another_table", - "data" => { - "a_column" => "a_value", - "another_column" => "another_value", - "a_third_column" => "another_value" + 'table' => 'another_table', + 'data' => { + 'a_column' => 'a_value', + 'another_column' => 'another_value', + 'a_third_column' => 'another_value' } }} let(:expected_query_for_another_action) { "INSERT INTO another_table (a_column, another_column, a_third_column)\nVALUES (?, ?, ?)" } - def generate_future_double() - future_double = double() + def generate_future_double + future_double = double expect(future_double).to(receive(:join)) expect(future_double).to(receive(:on_failure)) expect(future_double).to(receive(:on_complete)) return future_double end - it "prepares and executes the query" do + it 'prepares and executes the query' do doubles = setup_session_double(default_options) - expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_one_action)).and_return("eureka") - expect(doubles[:session_double]).to(receive(:execute_async).with("eureka", :arguments => one_action["data"].values)).and_return(generate_future_double()) + expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_one_action)).and_return('eureka') + expect(doubles[:session_double]).to(receive(:execute_async).with('eureka', :arguments => one_action['data'].values)).and_return(generate_future_double) sut_instance = sut.new(default_options) sut_instance.submit([one_action]) end - it "caches the generated query" do + it 'caches the generated query' do doubles = setup_session_double(default_options) - expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_one_action).once).and_return("eureka") + expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_one_action).once).and_return('eureka') 2.times { - expect(doubles[:session_double]).to(receive(:execute_async).with("eureka", :arguments => one_action["data"].values)).and_return(generate_future_double()) + expect(doubles[:session_double]).to(receive(:execute_async).with('eureka', :arguments => one_action['data'].values)).and_return(generate_future_double) } sut_instance = sut.new(default_options) sut_instance.submit([one_action, one_action]) end - it "does not confuse between a new query and cached queries" do + it 'does not confuse between a new query and cached queries' do doubles = setup_session_double(default_options) - expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_one_action).once).and_return("eureka") - expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_another_action).once).and_return("great scott") - expect(doubles[:session_double]).to(receive(:execute_async).with("eureka", :arguments => one_action["data"].values)).and_return(generate_future_double()) - expect(doubles[:session_double]).to(receive(:execute_async).with("great scott", :arguments => another_action["data"].values)).and_return(generate_future_double()) + expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_one_action).once).and_return('eureka') + expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_another_action).once).and_return('great scott') + expect(doubles[:session_double]).to(receive(:execute_async).with('eureka', :arguments => one_action['data'].values)).and_return(generate_future_double) + expect(doubles[:session_double]).to(receive(:execute_async).with('great scott', :arguments => another_action['data'].values)).and_return(generate_future_double) sut_instance = sut.new(default_options) sut_instance.submit([one_action, another_action]) end - it "logs and skips failed batches" do + it 'logs and skips failed batches' do setup_session_double(default_options) sut_instance = sut.new(default_options) expect(sut_instance).to(receive(:get_query).and_raise(ArgumentError)) - expect(default_options["logger"]).to(receive(:error)) + expect(default_options['logger']).to(receive(:error)) expect { sut_instance.submit([one_action]) }.to_not raise_error end From 7c41ff5476a2a09e0e275194e4b7635f49cbcb21 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Mon, 11 Apr 2016 22:33:21 +0300 Subject: [PATCH 096/126] cleanup of backoff retry policy according to ruby mine --- .../outputs/cassandra/backoff_retry_policy.rb | 38 +++++----- .../unit/outputs/backoff_retry_policy_spec.rb | 76 +++++++++---------- 2 files changed, 57 insertions(+), 57 deletions(-) diff --git a/lib/logstash/outputs/cassandra/backoff_retry_policy.rb b/lib/logstash/outputs/cassandra/backoff_retry_policy.rb index 1c03b2e..1e1c029 100644 --- a/lib/logstash/outputs/cassandra/backoff_retry_policy.rb +++ b/lib/logstash/outputs/cassandra/backoff_retry_policy.rb @@ -1,33 +1,33 @@ # encoding: utf-8 # This is a version of the default retry policy (https://github.com/datastax/ruby-driver/blob/v2.1.5/lib/cassandra/retry/policies/default.rb) with backoff retry configuration options -require "cassandra" +require 'cassandra' module Cassandra module Retry module Policies class Backoff - include Policy + include ::Cassandra::Retry::Policy def initialize(opts) - @logger = opts["logger"] - @backoff_type = opts["backoff_type"] - @backoff_size = opts["backoff_size"] - @retry_limit = opts["retry_limit"] + @logger = opts['logger'] + @backoff_type = opts['backoff_type'] + @backoff_size = opts['backoff_size'] + @retry_limit = opts['retry_limit'] end def read_timeout(statement, consistency, required, received, retrieved, retries) - return retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required, - :received => received, :retrieved => retrieved, :retries => retries }) + retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required, + :received => received, :retrieved => retrieved, :retries => retries }) end def write_timeout(statement, consistency, type, required, received, retries) - return retry_with_backoff({ :statement => statement, :consistency => consistency, :type => type, - :required => required, :received => received, :retries => retries }) + retry_with_backoff({ :statement => statement, :consistency => consistency, :type => type, + :required => required, :received => received, :retries => retries }) end def unavailable(statement, consistency, required, alive, retries) - return retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required, - :alive => alive, :retries => retries }) + retry_with_backoff({ :statement => statement, :consistency => consistency, :required => required, + :alive => alive, :retries => retries }) end def retry_with_backoff(opts) @@ -39,7 +39,7 @@ def retry_with_backoff(opts) @logger.error('activating backoff wait', :opts => opts) backoff_wait_before_next_retry(opts[:retries]) - return try_again(opts[:consistency]) + try_again(opts[:consistency]) end def backoff_wait_before_next_retry(retries) @@ -48,14 +48,14 @@ def backoff_wait_before_next_retry(retries) end def calculate_backoff_wait_time(retries) - backoff_wait_time = 0 case @backoff_type - when "**" - backoff_wait_time = @backoff_size ** retries - when "*" - backoff_wait_time = @backoff_size * retries + when '**' + return @backoff_size ** retries + when '*' + return @backoff_size * retries + else + raise ArgumentError, "unknown backoff type #{@backoff_type}" end - return backoff_wait_time end end end diff --git a/spec/unit/outputs/backoff_retry_policy_spec.rb b/spec/unit/outputs/backoff_retry_policy_spec.rb index 4734e6b..7840b56 100644 --- a/spec/unit/outputs/backoff_retry_policy_spec.rb +++ b/spec/unit/outputs/backoff_retry_policy_spec.rb @@ -1,30 +1,30 @@ # encoding: utf-8 -require_relative "../../cassandra_spec_helper" -require "logstash/outputs/cassandra/backoff_retry_policy" +require_relative '../../cassandra_spec_helper' +require 'logstash/outputs/cassandra/backoff_retry_policy' RSpec.describe ::Cassandra::Retry::Policies::Backoff do let(:sut) { ::Cassandra::Retry::Policies::Backoff } let(:linear_backoff) { - logger = double() + logger = double allow(logger).to(receive(:error)) { - "logger" => logger, - "backoff_type" => "*", - "backoff_size" => 5, - "retry_limit" => 10 + 'logger' => logger, + 'backoff_type' => '*', + 'backoff_size' => 5, + 'retry_limit' => 10 } } let(:exponential_backoff) { linear_backoff.merge({ - "backoff_type" => "**", - "backoff_size" => 2, - "retry_limit" => 10 + 'backoff_type' => '**', + 'backoff_size' => 2, + 'retry_limit' => 10 }) } - describe "#retry_with_backoff" do - describe "retry limit not reached" do - it "decides to try again with the same consistency level" do + describe '#retry_with_backoff' do + describe 'retry limit not reached' do + it 'decides to try again with the same consistency level' do sut_instance = sut.new(linear_backoff) decision = sut_instance.retry_with_backoff({ :retries => 0, :consistency => :one }) @@ -33,80 +33,80 @@ expect(decision.consistency).to(be(:one)) end - it "waits _before_ retrying" do + it 'waits _before_ retrying' do sut_instance = sut.new(linear_backoff) expect(Kernel).to(receive(:sleep)) sut_instance.retry_with_backoff({ :retries => 0 }) end - it "allows for exponential backoffs" do + it 'allows for exponential backoffs' do sut_instance = sut.new(exponential_backoff) - test_retry = exponential_backoff["retry_limit"] - 1 - expect(Kernel).to(receive(:sleep).with(exponential_backoff["backoff_size"] ** test_retry)) + test_retry = exponential_backoff['retry_limit'] - 1 + expect(Kernel).to(receive(:sleep).with(exponential_backoff['backoff_size'] ** test_retry)) sut_instance.retry_with_backoff({ :retries => test_retry }) { } end - it "allows for linear backoffs" do + it 'allows for linear backoffs' do sut_instance = sut.new(linear_backoff) - test_retry = exponential_backoff["retry_limit"] - 1 - expect(Kernel).to(receive(:sleep).with(linear_backoff["backoff_size"] * test_retry)) + test_retry = exponential_backoff['retry_limit'] - 1 + expect(Kernel).to(receive(:sleep).with(linear_backoff['backoff_size'] * test_retry)) sut_instance.retry_with_backoff({ :retries => test_retry }) { } end end - describe "retry limit reached" do - it "decides to reraise" do + describe 'retry limit reached' do + it 'decides to reraise' do sut_instance = sut.new(linear_backoff) - decision = sut_instance.retry_with_backoff({ :retries => linear_backoff["retry_limit"] + 1 }) + decision = sut_instance.retry_with_backoff({ :retries => linear_backoff['retry_limit'] + 1 }) expect(decision).to(be_an_instance_of(::Cassandra::Retry::Decisions::Reraise)) end - it "does not wait" do + it 'does not wait' do sut_instance = sut.new(linear_backoff) expect(Kernel).not_to(receive(:sleep)) - sut_instance.retry_with_backoff({ :retries => linear_backoff["retry_limit"] + 1 }) + sut_instance.retry_with_backoff({ :retries => linear_backoff['retry_limit'] + 1 }) end end end [ { - :method_name=> "read_timeout", - :expected_opts => { :statement => "statement", :consistency => :one, :required => 1, :received => 0, + :method_name=> 'read_timeout', + :expected_opts => { :statement => 'statement', :consistency => :one, :required => 1, :received => 0, :retrieved => false, :retries => 0 }, - :call_args => ["statement", :one, 1, 0, false, 0] + :call_args => ['statement', :one, 1, 0, false, 0] }, { - :method_name=> "write_timeout", - :expected_opts => { :statement => "statement", :consistency => :one, :type => :prepared, + :method_name=> 'write_timeout', + :expected_opts => { :statement => 'statement', :consistency => :one, :type => :prepared, :required => 1, :received => 2, :retries => 5 }, - :call_args => ["statement", :one, :prepared, 1, 2, 5] + :call_args => ['statement', :one, :prepared, 1, 2, 5] }, { - :method_name=> "unavailable", - :expected_opts => { :statement => "statement", :consistency => :one, :required => 3, + :method_name=> 'unavailable', + :expected_opts => { :statement => 'statement', :consistency => :one, :required => 3, :alive => 2, :retries => 4}, - :call_args => ["statement", :one, 3, 2, 4] + :call_args => ['statement', :one, 3, 2, 4] } ].each { |use_case| - describe "#{use_case[:method_name]}" do - it "properly calls #retry_with_backoff" do + describe '#{use_case[:method_name]}' do + it 'properly calls #retry_with_backoff' do sut_instance = sut.new(linear_backoff) expect(sut_instance).to(receive(:retry_with_backoff).with(use_case[:expected_opts])) sut_instance.send(use_case[:method_name], *use_case[:call_args]) end - it "returns the decision it got" do + it 'returns the decision it got' do sut_instance = sut.new(linear_backoff) - expected_result = double() + expected_result = double expect(sut_instance).to(receive(:retry_with_backoff).and_return(expected_result)) result = sut_instance.send(use_case[:method_name], *use_case[:call_args]) From 07fa977f0cad1bacb49e3fc790fb5f8bbe3554fb Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Mon, 11 Apr 2016 22:45:56 +0300 Subject: [PATCH 097/126] organized the other files to match rubymine checks --- spec/cassandra_spec_helper.rb | 4 +- spec/integration/outputs/cassandra_spec.rb | 99 +++++++++---------- .../integration/outputs/integration_helper.rb | 29 +++--- 3 files changed, 64 insertions(+), 68 deletions(-) diff --git a/spec/cassandra_spec_helper.rb b/spec/cassandra_spec_helper.rb index 4fbc774..0889b28 100644 --- a/spec/cassandra_spec_helper.rb +++ b/spec/cassandra_spec_helper.rb @@ -1,6 +1,6 @@ # encoding: utf-8 -require "logstash/devutils/rspec/spec_helper" -require "logstash/event" +require 'logstash/devutils/rspec/spec_helper' +require 'logstash/event' require 'simplecov' require 'simplecov-rcov' diff --git a/spec/integration/outputs/cassandra_spec.rb b/spec/integration/outputs/cassandra_spec.rb index 22476a0..c34b487 100644 --- a/spec/integration/outputs/cassandra_spec.rb +++ b/spec/integration/outputs/cassandra_spec.rb @@ -1,17 +1,17 @@ # encoding: utf-8 -require_relative "./integration_helper" -require "logstash/outputs/cassandra" +require_relative './integration_helper' +require 'logstash/outputs/cassandra' module Helper - def self.get_assert_timestamp_equallity() + def self.get_assert_timestamp_equallity Proc.new do |expect, row, type_to_test| - expect.call(row["value_column"].to_s).to(eq(Time.at(type_to_test[:value]).to_s)) + expect.call(row['value_column'].to_s).to(eq(Time.at(type_to_test[:value]).to_s)) end end - def self.get_assert_set_equallity() + def self.get_assert_set_equallity Proc.new do |expect, row, type_to_test| - set_from_cassandra = row["value_column"] + set_from_cassandra = row['value_column'] original_value = type_to_test[:value] expect.call(set_from_cassandra.size).to(eq(original_value.size)) set_from_cassandra.to_a.each { |item| @@ -21,31 +21,31 @@ def self.get_assert_set_equallity() end end -describe "client create actions", :docker => true do +describe 'client create actions', :docker => true do before(:each) do - get_session().execute("CREATE KEYSPACE test WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };") + get_session.execute("CREATE KEYSPACE test WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };") end after(:each) do - get_session().execute("DROP KEYSPACE test;") + get_session.execute('DROP KEYSPACE test;') end - def get_sut() + def get_sut options = { - "hosts" => [get_host_ip()], - "port" => get_port(), - "keyspace" => "test", - "table" => "%{[cassandra_table]}", - "username" => "cassandra", - "password" => "cassandra", - "filter_transform_event_key" => "cassandra_filter" + 'hosts' => [get_host_ip], + 'port' => get_port, + 'keyspace' => 'test', + 'table' => '%{[cassandra_table]}', + 'username' => 'cassandra', + 'password' => 'cassandra', + 'filter_transform_event_key' => 'cassandra_filter' } sut = LogStash::Outputs::CassandraOutput.new(options) return sut end def create_table(type_to_test) - get_session().execute(" + get_session.execute(" CREATE TABLE test.simple( idish_column text, value_column #{type_to_test[:type]}, @@ -55,65 +55,64 @@ def create_table(type_to_test) def build_event(type_to_test) options = { - "cassandra_table" => "simple", - "idish_field" => "some text", - "value_field" => type_to_test[:value], - "cassandra_filter" => [ - { "event_key" => "idish_field", "column_name" => "idish_column" }, - { "event_key" => "value_field", "column_name" => "value_column", "cassandra_type" => type_to_test[:type] } + 'cassandra_table' => 'simple', + 'idish_field' => 'some text', + 'value_field' => type_to_test[:value], + 'cassandra_filter' => [ + { 'event_key' => 'idish_field', 'column_name' => 'idish_column' }, + { 'event_key' => 'value_field', 'column_name' => 'value_column', 'cassandra_type' => type_to_test[:type] } ] } - event = LogStash::Event.new(options) - return event + LogStash::Event.new(options) end def assert_proper_insert(type_to_test) - result = get_session().execute("SELECT * FROM test.simple") + result = get_session.execute('SELECT * FROM test.simple') expect(result.size).to((eq(1))) result.each { |row| - expect(row["idish_column"]).to(eq("some text")) + expect(row['idish_column']).to(eq('some text')) if type_to_test.has_key?(:assert_override) expect_proc = Proc.new do |value| return expect(value) end type_to_test[:assert_override].call(expect_proc, row, type_to_test) else - expect(row["value_column"].to_s).to(eq(type_to_test[:value].to_s)) + expect(row['value_column'].to_s).to(eq(type_to_test[:value].to_s)) end } end [ - { type: "timestamp", value: 1457606758, assert_override: Helper::get_assert_timestamp_equallity() }, - { type: "inet", value: "192.168.99.100" }, - { type: "float", value: "10.050000190734863" }, - { type: "varchar", value: "some chars" }, - { type: "text", value: "some text" }, - { type: "blob", value: "a blob" }, - { type: "ascii", value: "some ascii" }, - { type: "bigint", value: "123456789" }, - { type: "int", value: "12345" }, - { type: "varint", value: "12345678" }, - { type: "boolean", value: "true" }, - { type: "decimal", value: "0.1015E2" }, - { type: "double", value: "200.54" }, - { type: "timeuuid", value: "d2177dd0-eaa2-11de-a572-001b779c76e3" }, - { type: "set", - value: ["d2177dd0-eaa2-11de-a572-001b779c76e3", "d2177dd0-eaa2-11de-a572-001b779c76e4", "d2177dd0-eaa2-11de-a572-001b779c76e5"], assert_override: Helper::get_assert_set_equallity() } + { type: 'timestamp', value: 1457606758, assert_override: Helper::get_assert_timestamp_equallity() }, + { type: 'inet', value: '192.168.99.100' }, + { type: 'float', value: '10.050000190734863' }, + { type: 'varchar', value: 'some chars' }, + { type: 'text', value: 'some text' }, + { type: 'blob', value: 'a blob' }, + { type: 'ascii', value: 'some ascii' }, + { type: 'bigint', value: '123456789' }, + { type: 'int', value: '12345' }, + { type: 'varint', value: '12345678' }, + { type: 'boolean', value: 'true' }, + { type: 'decimal', value: '0.1015E2' }, + { type: 'double', value: '200.54' }, + { type: 'timeuuid', value: 'd2177dd0-eaa2-11de-a572-001b779c76e3' }, + { type: 'set', + value: %w(d2177dd0-eaa2-11de-a572-001b779c76e3 d2177dd0-eaa2-11de-a572-001b779c76e4 d2177dd0-eaa2-11de-a572-001b779c76e5), assert_override: Helper::get_assert_set_equallity } ].each { |type_to_test| it "properly inserts data of type #{type_to_test[:type]}" do create_table(type_to_test) - sut = get_sut() - sut.register() + sut = get_sut + sut.register event = build_event(type_to_test) sut.receive(event) - sut.flush() + sut.flush assert_proper_insert(type_to_test) end } - it "properly works with counter columns" - it "properly adds multiple events to multiple tables in the same bulk" + it 'properly works with counter columns' + it 'properly adds multiple events to multiple tables in the same bulk' end diff --git a/spec/integration/outputs/integration_helper.rb b/spec/integration/outputs/integration_helper.rb index 90a0da5..d865cdd 100644 --- a/spec/integration/outputs/integration_helper.rb +++ b/spec/integration/outputs/integration_helper.rb @@ -1,34 +1,31 @@ # encoding: utf-8 -require_relative "../../cassandra_spec_helper" -require "longshoreman" -require "cassandra" +require_relative '../../cassandra_spec_helper' +require 'longshoreman' +require 'cassandra' CONTAINER_NAME = "logstash-output-cassandra-#{rand(999).to_s}" -CONTAINER_IMAGE = "cassandra" -CONTAINER_TAG = "2.2" +CONTAINER_IMAGE = 'cassandra' +CONTAINER_TAG = '2.2' module CassandraHelper def get_host_ip - address = Longshoreman.new.get_host_ip - return address + Longshoreman.new.get_host_ip end def get_port container = Longshoreman::Container.new container.get(CONTAINER_NAME) - port = container.rport(9042) - return port + container.rport(9042) end def get_session cluster = ::Cassandra.cluster( - username: "cassandra", - password: "cassandra", - port: get_port(), - hosts: [get_host_ip()] + username: 'cassandra', + password: 'cassandra', + port: get_port, + hosts: [get_host_ip] ) - session = cluster.connect() - return session + cluster.connect end end @@ -52,7 +49,7 @@ def get_session }) connect_retry = 0 begin - get_session() + get_session rescue ::Cassandra::Errors::NoHostsAvailable # retry connecting for a minute connect_retry += 1 From f6c72925c8d2cd57015b4d69b9cf678f1ca2106e Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Mon, 11 Apr 2016 22:46:35 +0300 Subject: [PATCH 098/126] ignoring .sonar dir --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index cbf4f18..93a3fcf 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ vendor .idea coverage tmp +.sonar From 1d5c3da2d9b20f305fcd3b075acb43309d1a84ee Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Mon, 11 Apr 2016 23:10:10 +0300 Subject: [PATCH 099/126] some changes due to metric_fu input --- lib/logstash/outputs/cassandra.rb | 3 +-- lib/logstash/outputs/cassandra/event_parser.rb | 9 +++++---- lib/logstash/outputs/cassandra/safe_submitter.rb | 5 +++-- spec/integration/outputs/cassandra_spec.rb | 4 ++-- spec/unit/outputs/backoff_retry_policy_spec.rb | 10 ++++++++-- 5 files changed, 19 insertions(+), 12 deletions(-) diff --git a/lib/logstash/outputs/cassandra.rb b/lib/logstash/outputs/cassandra.rb index 6b8b500..03a1f80 100644 --- a/lib/logstash/outputs/cassandra.rb +++ b/lib/logstash/outputs/cassandra.rb @@ -119,10 +119,9 @@ def receive(event) @buffer << @event_parser.parse(event) end - # Receive an array of events and immediately attempt to index them (no buffering) def multi_receive(events) events.each_slice(@flush_size) do |slice| - @safe_submitter.submit(slice.map {|e| @event_parser.parse(e) }) + @safe_submitter.submit(slice.map {|event| @event_parser.parse(event) }) end end diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index 372cea7..5ef586c 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -65,14 +65,15 @@ def add_event_value_from_filter_to_action(event, filter, action) end def add_event_data_using_configured_hints(event, action) - action['data'] = event.to_hash + action_data = event.to_hash # Filter out @timestamp, @version, etc to be able to use elasticsearch input plugin directly - action['data'].reject!{|key| %r{^@} =~ key} + action_data.reject!{|key| %r{^@} =~ key} @hints.each do |event_key, cassandra_type| - if action['data'].has_key?(event_key) - action['data'][event_key] = convert_value_to_cassandra_type_or_default_if_configured(action['data'][event_key], cassandra_type) + if action_data.has_key?(event_key) + action_data[event_key] = convert_value_to_cassandra_type_or_default_if_configured(action['data'][event_key], cassandra_type) end end + action['data'] = action_data end def convert_value_to_cassandra_type_or_default_if_configured(event_data, cassandra_type) diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb index e531e03..075eef4 100644 --- a/lib/logstash/outputs/cassandra/safe_submitter.rb +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -58,9 +58,10 @@ def get_retry_policy(retry_policy) def get_query(action) @logger.debug('generating query for action', :action => action) + action_data = action['data'] query = -"INSERT INTO #{action['table']} (#{action['data'].keys.join(', ')}) -VALUES (#{('?' * action['data'].keys.count).split(//) * ', '})" +"INSERT INTO #{action['table']} (#{action_data.keys.join(', ')}) +VALUES (#{('?' * action_data.keys.count).split(//) * ', '})" unless @statement_cache.has_key?(query) @logger.debug('preparing new query', :query => query) @statement_cache[query] = @session.prepare(query) diff --git a/spec/integration/outputs/cassandra_spec.rb b/spec/integration/outputs/cassandra_spec.rb index c34b487..bb26cae 100644 --- a/spec/integration/outputs/cassandra_spec.rb +++ b/spec/integration/outputs/cassandra_spec.rb @@ -43,7 +43,7 @@ def get_sut sut = LogStash::Outputs::CassandraOutput.new(options) return sut end - + def create_table(type_to_test) get_session.execute(" CREATE TABLE test.simple( @@ -105,7 +105,7 @@ def assert_proper_insert(type_to_test) sut = get_sut sut.register event = build_event(type_to_test) - + sut.receive(event) sut.flush diff --git a/spec/unit/outputs/backoff_retry_policy_spec.rb b/spec/unit/outputs/backoff_retry_policy_spec.rb index 7840b56..79a5ad4 100644 --- a/spec/unit/outputs/backoff_retry_policy_spec.rb +++ b/spec/unit/outputs/backoff_retry_policy_spec.rb @@ -45,7 +45,7 @@ test_retry = exponential_backoff['retry_limit'] - 1 expect(Kernel).to(receive(:sleep).with(exponential_backoff['backoff_size'] ** test_retry)) - sut_instance.retry_with_backoff({ :retries => test_retry }) { } + sut_instance.retry_with_backoff({ :retries => test_retry }) end it 'allows for linear backoffs' do @@ -53,7 +53,13 @@ test_retry = exponential_backoff['retry_limit'] - 1 expect(Kernel).to(receive(:sleep).with(linear_backoff['backoff_size'] * test_retry)) - sut_instance.retry_with_backoff({ :retries => test_retry }) { } + sut_instance.retry_with_backoff({ :retries => test_retry }) + end + + it 'fails for unknown backoff types' do + sut_instance = sut.new(linear_backoff.merge({ 'backoff_type' => '^' })) + + expect { sut_instance.retry_with_backoff({ :retries => 0}) }.to raise_error ArgumentError end end From a55448d3a233d85c29ef5ff061f6a35082bb48ec Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 12 Apr 2016 09:27:33 +0300 Subject: [PATCH 100/126] fixed incorrect call to action['data'] before populating it --- lib/logstash/outputs/cassandra/event_parser.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index 5ef586c..412f616 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -70,7 +70,7 @@ def add_event_data_using_configured_hints(event, action) action_data.reject!{|key| %r{^@} =~ key} @hints.each do |event_key, cassandra_type| if action_data.has_key?(event_key) - action_data[event_key] = convert_value_to_cassandra_type_or_default_if_configured(action['data'][event_key], cassandra_type) + action_data[event_key] = convert_value_to_cassandra_type_or_default_if_configured(action_data[event_key], cassandra_type) end end action['data'] = action_data From 85ff071f9daec405d1c28a42682aeb2ae08eaade Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 12 Apr 2016 14:35:08 +0300 Subject: [PATCH 101/126] rubymine issue hints / fixes --- lib/logstash/outputs/cassandra.rb | 54 +++++++++++++++---------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/lib/logstash/outputs/cassandra.rb b/lib/logstash/outputs/cassandra.rb index 03a1f80..494b835 100644 --- a/lib/logstash/outputs/cassandra.rb +++ b/lib/logstash/outputs/cassandra.rb @@ -1,15 +1,15 @@ # encoding: utf-8 -require "logstash/outputs/base" -require "logstash/namespace" -require "logstash/outputs/cassandra/buffer" -require "logstash/outputs/cassandra/event_parser" -require "logstash/outputs/cassandra/safe_submitter" +require 'logstash/outputs/base' +require 'logstash/namespace' +require 'logstash/outputs/cassandra/buffer' +require 'logstash/outputs/cassandra/event_parser' +require 'logstash/outputs/cassandra/safe_submitter' class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base milestone 1 - config_name "cassandra" + config_name 'cassandra' # List of Cassandra hostname(s) or IP-address(es) config :hosts, :validate => :array, :required => true @@ -23,7 +23,7 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base # Cassandra consistency level. # Options: "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one" # Default: "one" - config :consistency, :validate => [ "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one" ], :default => "one" + config :consistency, :validate => [ 'any', 'one', 'two', 'three', 'quorum', 'all', 'local_quorum', 'each_quorum', 'serial', 'local_serial', 'local_one' ], :default => 'one' # The keyspace to use config :keyspace, :validate => :string, :required => true @@ -73,10 +73,10 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base # example: # using { "type" => "backoff" "backoff_type" => "**" "backoff_size" => 2 "retry_limit" => 10 } will perform 10 retries with the following wait times: 1, 2, 4, 8, 16, ... 1024 # NOTE: there is an underlying assumption that the insert query is idempotent !!! - config :retry_policy, :validate => :hash, :default => { "type" => "default" }, :required => true + config :retry_policy, :validate => :hash, :default => { 'type' => 'default' }, :required => true # The command execution timeout - config :request_timeout, :validate => :number, :default => 5 + config :request_timeout, :validate => :number, :default => 0.1 # Ignore bad values config :ignore_bad_values, :validate => :boolean, :default => false @@ -109,10 +109,10 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base # near-real-time. config :idle_flush_time, :validate => :number, :default => 1 - def register() - setup_event_parser() - setup_safe_submitter() - setup_buffer_and_handler() + def register + setup_event_parser + setup_safe_submitter + setup_buffer_and_handler end def receive(event) @@ -125,33 +125,33 @@ def multi_receive(events) end end - def teardown() - close() + def teardown + close end - def close() - @buffer.stop() + def close + @buffer.stop end def flush - @buffer.flush() + @buffer.flush end private - def setup_event_parser() + def setup_event_parser @event_parser = ::LogStash::Outputs::Cassandra::EventParser.new( - "logger" => @logger, "table" => @table, - "filter_transform_event_key" => @filter_transform_event_key, "filter_transform" => @filter_transform, - "hints" => @hints, "ignore_bad_values" => @ignore_bad_values + 'logger' => @logger, 'table' => @table, + 'filter_transform_event_key' => @filter_transform_event_key, 'filter_transform' => @filter_transform, + 'hints' => @hints, 'ignore_bad_values' => @ignore_bad_values ) end - def setup_safe_submitter() + def setup_safe_submitter @safe_submitter = ::LogStash::Outputs::Cassandra::SafeSubmitter.new( - "logger" => @logger, "cassandra" => ::Cassandra, - "hosts" => @hosts, "port" => @port, "username" => @username, "password" => @password, - "consistency" => @consistency, "request_timeout" => @request_timeout, "retry_policy" => @retry_policy, - "keyspace" => @keyspace + 'logger' => @logger, 'cassandra' => ::Cassandra, + 'hosts' => @hosts, 'port' => @port, 'username' => @username, 'password' => @password, + 'consistency' => @consistency, 'request_timeout' => @request_timeout, 'retry_policy' => @retry_policy, + 'keyspace' => @keyspace ) end From bc7c8e2111b302327d42432100ac88f59f6038f9 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 12 Apr 2016 16:08:08 +0300 Subject: [PATCH 102/126] added option for infinite retries --- lib/logstash/outputs/cassandra/backoff_retry_policy.rb | 2 +- spec/unit/outputs/backoff_retry_policy_spec.rb | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/logstash/outputs/cassandra/backoff_retry_policy.rb b/lib/logstash/outputs/cassandra/backoff_retry_policy.rb index 1e1c029..0d0071d 100644 --- a/lib/logstash/outputs/cassandra/backoff_retry_policy.rb +++ b/lib/logstash/outputs/cassandra/backoff_retry_policy.rb @@ -31,7 +31,7 @@ def unavailable(statement, consistency, required, alive, retries) end def retry_with_backoff(opts) - if opts[:retries] > @retry_limit + if @retry_limit > -1 && opts[:retries] > @retry_limit @logger.error('backoff retries exhausted', :opts => opts) return reraise end diff --git a/spec/unit/outputs/backoff_retry_policy_spec.rb b/spec/unit/outputs/backoff_retry_policy_spec.rb index 79a5ad4..d1e6381 100644 --- a/spec/unit/outputs/backoff_retry_policy_spec.rb +++ b/spec/unit/outputs/backoff_retry_policy_spec.rb @@ -40,6 +40,13 @@ sut_instance.retry_with_backoff({ :retries => 0 }) end + it 'allows for an infinite amount of retries if configured with -1 as the retry limit' do + sut_instance = sut.new(linear_backoff.merge({ 'retry_limit' => -1 })) + expect(Kernel).to(receive(:sleep)) + + sut_instance.retry_with_backoff({ :retries => 1000000 }) + end + it 'allows for exponential backoffs' do sut_instance = sut.new(exponential_backoff) test_retry = exponential_backoff['retry_limit'] - 1 From ec93d405c851f04703094ea1a7cc05ab44a6e476 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 12 Apr 2016 18:16:31 +0300 Subject: [PATCH 103/126] added basic retry on future failures --- .../outputs/cassandra/safe_submitter.rb | 61 +++++++++++++------ spec/unit/outputs/safe_submitter_spec.rb | 33 +++++++++- 2 files changed, 74 insertions(+), 20 deletions(-) diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb index 075eef4..b981ff2 100644 --- a/lib/logstash/outputs/cassandra/safe_submitter.rb +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -1,4 +1,5 @@ # encoding: utf-8 +require 'thread' require 'cassandra' require 'logstash/outputs/cassandra/backoff_retry_policy' @@ -11,19 +12,13 @@ def initialize(options) end def submit(actions) - begin - futures = actions.map do |action| - query = get_query(action) - execute_async(query, action['data'].values) - end - futures.each(&:join) - rescue Exception => e - @logger.error('Failed to send batch to cassandra', :actions => actions, :exception => e, :backtrace => e.backtrace) - end + queries = prepare_queries(actions) + execute_queries_with_retries(queries) end private def setup_cassandra_session(options) + @retry_policy = get_retry_policy(options['retry_policy']) cluster = options['cassandra'].cluster( username: options['username'], password: options['password'], @@ -32,7 +27,7 @@ def setup_cassandra_session(options) port: options['port'], consistency: options['consistency'].to_sym, timeout: options['request_timeout'], - retry_policy: get_retry_policy(options['retry_policy']), + retry_policy: @retry_policy, logger: options['logger'] ) @session = cluster.connect(options['keyspace']) @@ -56,6 +51,19 @@ def get_retry_policy(retry_policy) end end + def prepare_queries(actions) + remaining_queries = Queue.new + actions.each do |action| + begin + query = get_query(action) + remaining_queries << { :query => query, :arguments => action['data'].values } + rescue Exception => e + @logger.error('Failed to prepare query', :action => action, :exception => e, :backtrace => e.backtrace) + end + end + remaining_queries + end + def get_query(action) @logger.debug('generating query for action', :action => action) action_data = action['data'] @@ -69,15 +77,32 @@ def get_query(action) @statement_cache[query] end - def execute_async(query, arguments) - future = @session.execute_async(query, arguments: arguments) - future.on_failure { |error| - @logger.error('error executing insert', :query => query, :arguments => arguments, :error => error) - } - future.on_complete { |value, error| - unless error.nil? - @logger.error('error executing insert', :query => query, :arguments => arguments, :error => error) + def execute_queries_with_retries(queries) + while queries.length > 0 + execute_queries(queries) + end + end + + def execute_queries(queries) + futures = [] + while queries.length > 0 + query = queries.pop + begin + future = execute_async(query, queries) + futures << future + rescue Exception => e + @logger.error('Failed to send query', :query => query, :exception => e, :backtrace => e.backtrace) end + end + futures.each(&:join) + end + + def execute_async(query, queries) + future = @session.execute_async(query[:query], arguments: query[:arguments]) + future.on_failure { |error| + @logger.error('Failed to execute query', :query => query, :error => error) + # TODO: add configuration for this + queries << query } future end diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index 3f0674f..8eed1dd 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -106,7 +106,6 @@ def generate_future_double future_double = double expect(future_double).to(receive(:join)) expect(future_double).to(receive(:on_failure)) - expect(future_double).to(receive(:on_complete)) return future_double end @@ -141,7 +140,7 @@ def generate_future_double sut_instance.submit([one_action, another_action]) end - it 'logs and skips failed batches' do + it 'logs and skips failed query preps' do setup_session_double(default_options) sut_instance = sut.new(default_options) expect(sut_instance).to(receive(:get_query).and_raise(ArgumentError)) @@ -149,5 +148,35 @@ def generate_future_double expect { sut_instance.submit([one_action]) }.to_not raise_error end + + it 'logs and skips queries which failed during send' do + setup_session_double(default_options) + sut_instance = sut.new(default_options) + expect(sut_instance).to(receive(:get_query).and_return(double)) + expect(sut_instance).to(receive(:execute_async).and_raise(ArgumentError)) + expect(default_options['logger']).to(receive(:error)) + + expect { sut_instance.submit([one_action]) }.to_not raise_error + end + + it 'retries queries which failed to execute' do + doubles = setup_session_double(default_options) + expect(doubles[:session_double]).to(receive(:prepare).and_return('eureka')) + expect(doubles[:session_double]).to(receive(:prepare).and_return('great scott')) + expect(doubles[:session_double]).to(receive(:execute_async).with('eureka', :arguments => one_action['data'].values)).and_return(generate_future_double) + # setup a fail once execution + fail_on_join_future = Object.new + def fail_on_join_future.on_failure(&block) + @block = block + end + def fail_on_join_future.join + @block.call('oh boy...') + end + expect(doubles[:session_double]).to(receive(:execute_async).with('great scott', :arguments => another_action['data'].values)).and_return(fail_on_join_future) + expect(doubles[:session_double]).to(receive(:execute_async).with('great scott', :arguments => another_action['data'].values)).and_return(generate_future_double) + sut_instance = sut.new(default_options) + + sut_instance.submit([one_action, another_action]) + end end end From a28b672b26ebf9ee9ea2d66684c420fefa0bd383 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Tue, 12 Apr 2016 19:09:25 +0300 Subject: [PATCH 104/126] added future failure / timeout retries based on the backoff retry policy if it is setup --- .../outputs/cassandra/backoff_retry_policy.rb | 1 + .../outputs/cassandra/safe_submitter.rb | 21 +++++++++----- spec/unit/outputs/safe_submitter_spec.rb | 29 +++++++++++++++---- 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/lib/logstash/outputs/cassandra/backoff_retry_policy.rb b/lib/logstash/outputs/cassandra/backoff_retry_policy.rb index 0d0071d..cd716a4 100644 --- a/lib/logstash/outputs/cassandra/backoff_retry_policy.rb +++ b/lib/logstash/outputs/cassandra/backoff_retry_policy.rb @@ -42,6 +42,7 @@ def retry_with_backoff(opts) try_again(opts[:consistency]) end + private def backoff_wait_before_next_retry(retries) backoff_wait_time = calculate_backoff_wait_time(retries) Kernel::sleep(backoff_wait_time) diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb index b981ff2..8e7d912 100644 --- a/lib/logstash/outputs/cassandra/safe_submitter.rb +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -19,13 +19,14 @@ def submit(actions) private def setup_cassandra_session(options) @retry_policy = get_retry_policy(options['retry_policy']) + @consistency = options['consistency'].to_sym cluster = options['cassandra'].cluster( username: options['username'], password: options['password'], protocol_version: options['protocol_version'], hosts: options['hosts'], port: options['port'], - consistency: options['consistency'].to_sym, + consistency: @consistency, timeout: options['request_timeout'], retry_policy: @retry_policy, logger: options['logger'] @@ -78,17 +79,19 @@ def get_query(action) end def execute_queries_with_retries(queries) + retries = 0 while queries.length > 0 - execute_queries(queries) + execute_queries(queries, retries) + retries += 1 end end - def execute_queries(queries) + def execute_queries(queries, retries) futures = [] while queries.length > 0 query = queries.pop begin - future = execute_async(query, queries) + future = execute_async(query, retries, queries) futures << future rescue Exception => e @logger.error('Failed to send query', :query => query, :exception => e, :backtrace => e.backtrace) @@ -97,12 +100,16 @@ def execute_queries(queries) futures.each(&:join) end - def execute_async(query, queries) + def execute_async(query, retries, queries) future = @session.execute_async(query[:query], arguments: query[:arguments]) future.on_failure { |error| @logger.error('Failed to execute query', :query => query, :error => error) - # TODO: add configuration for this - queries << query + if @retry_policy.is_a?(::Cassandra::Retry::Policies::Backoff) + decision = @retry_policy.retry_with_backoff({ :retries => retries, :consistency => @consistency }) + if decision.is_a?(::Cassandra::Retry::Decisions::Retry) + queries << query + end + end } future end diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb index 8eed1dd..c804e38 100644 --- a/spec/unit/outputs/safe_submitter_spec.rb +++ b/spec/unit/outputs/safe_submitter_spec.rb @@ -57,7 +57,7 @@ def setup_session_double(options) :concrete_retry_policy => ::Cassandra::Retry::Policies::Backoff } ].each { |mapping| it "supports the #{mapping[:concrete_retry_policy]} retry policy by passing #{mapping[:setting]['type']} as the retry_policy" do - options = default_options.update({ 'retry_policy' => mapping[:setting], 'concrete_retry_policy' => mapping[:concrete_retry_policy] }) + options = default_options.merge({ 'retry_policy' => mapping[:setting], 'concrete_retry_policy' => mapping[:concrete_retry_policy] }) setup_session_double(options) sut.new(options) @@ -67,7 +67,7 @@ def setup_session_double(options) it 'properly initializes the backoff retry policy' do retry_policy_config = { 'type' => 'backoff', 'backoff_type' => '**', 'backoff_size' => 2, 'retry_limit' => 10 } expected_policy = double - options = default_options.update({ 'retry_policy' => retry_policy_config, 'concrete_retry_policy' => expected_policy }) + options = default_options.merge({ 'retry_policy' => retry_policy_config, 'concrete_retry_policy' => expected_policy }) expect(::Cassandra::Retry::Policies::Backoff).to(receive(:new).with({ 'backoff_type' => options['retry_policy']['backoff_type'], 'backoff_size' => options['retry_policy']['backoff_size'], 'retry_limit' => options['retry_policy']['retry_limit'], 'logger' => options['logger']}).and_return(expected_policy)) @@ -77,7 +77,7 @@ def setup_session_double(options) end it 'fails if the retry policy is unknown' do - options = default_options.update({ 'retry_policy' => 'bad policy' }) + options = default_options.merge({ 'retry_policy' => 'bad policy' }) expect { sut.new(options) }.to(raise_error(ArgumentError)) end @@ -159,8 +159,27 @@ def generate_future_double expect { sut_instance.submit([one_action]) }.to_not raise_error end - it 'retries queries which failed to execute' do + it 'does not retry queries which failed to execute in case the retry policy is not backoff' do doubles = setup_session_double(default_options) + expect(doubles[:session_double]).to(receive(:prepare).and_return('great scott')) + # setup a fail once execution + fail_on_join_future = Object.new + def fail_on_join_future.on_failure(&block) + @block = block + end + def fail_on_join_future.join + @block.call('oh boy...') + end + expect(doubles[:session_double]).to(receive(:execute_async).with('great scott', :arguments => another_action['data'].values).once).and_return(fail_on_join_future) + sut_instance = sut.new(default_options) + + sut_instance.submit([another_action]) + end + + it 'retries queries which failed to execute' do + options = default_options.merge({ 'retry_policy' => { 'type' => 'backoff', 'backoff_type' => '**', 'backoff_size' => 2, 'retry_limit' => 10 }, + 'concrete_retry_policy' => ::Cassandra::Retry::Policies::Backoff }) + doubles = setup_session_double(options) expect(doubles[:session_double]).to(receive(:prepare).and_return('eureka')) expect(doubles[:session_double]).to(receive(:prepare).and_return('great scott')) expect(doubles[:session_double]).to(receive(:execute_async).with('eureka', :arguments => one_action['data'].values)).and_return(generate_future_double) @@ -174,7 +193,7 @@ def fail_on_join_future.join end expect(doubles[:session_double]).to(receive(:execute_async).with('great scott', :arguments => another_action['data'].values)).and_return(fail_on_join_future) expect(doubles[:session_double]).to(receive(:execute_async).with('great scott', :arguments => another_action['data'].values)).and_return(generate_future_double) - sut_instance = sut.new(default_options) + sut_instance = sut.new(options) sut_instance.submit([one_action, another_action]) end From b993d0ff9c00fe1d25ef65382b0e5efec9c77b98 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 13 Apr 2016 09:37:28 +0300 Subject: [PATCH 105/126] switching default timeout to 1 second --- lib/logstash/outputs/cassandra.rb | 2 +- spec/integration/outputs/cassandra_spec.rb | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/logstash/outputs/cassandra.rb b/lib/logstash/outputs/cassandra.rb index 494b835..04f4134 100644 --- a/lib/logstash/outputs/cassandra.rb +++ b/lib/logstash/outputs/cassandra.rb @@ -76,7 +76,7 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base config :retry_policy, :validate => :hash, :default => { 'type' => 'default' }, :required => true # The command execution timeout - config :request_timeout, :validate => :number, :default => 0.1 + config :request_timeout, :validate => :number, :default => 1 # Ignore bad values config :ignore_bad_values, :validate => :boolean, :default => false diff --git a/spec/integration/outputs/cassandra_spec.rb b/spec/integration/outputs/cassandra_spec.rb index bb26cae..b2ef5ee 100644 --- a/spec/integration/outputs/cassandra_spec.rb +++ b/spec/integration/outputs/cassandra_spec.rb @@ -112,7 +112,4 @@ def assert_proper_insert(type_to_test) assert_proper_insert(type_to_test) end } - - it 'properly works with counter columns' - it 'properly adds multiple events to multiple tables in the same bulk' end From 58d4359f8e6eeda96f55367e5e7297e2fab7c6ab Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 13 Apr 2016 09:50:49 +0300 Subject: [PATCH 106/126] removing trailling spaces --- spec/integration/outputs/cassandra_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/integration/outputs/cassandra_spec.rb b/spec/integration/outputs/cassandra_spec.rb index b2ef5ee..c4de7aa 100644 --- a/spec/integration/outputs/cassandra_spec.rb +++ b/spec/integration/outputs/cassandra_spec.rb @@ -65,7 +65,7 @@ def build_event(type_to_test) } LogStash::Event.new(options) end - + def assert_proper_insert(type_to_test) result = get_session.execute('SELECT * FROM test.simple') expect(result.size).to((eq(1))) From 18742adfe9c591367a4101dd49ab1e4b8539010b Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Wed, 13 Apr 2016 10:20:24 +0300 Subject: [PATCH 107/126] added some simple comments and updated readme --- README.md | 140 +++++++++++------- lib/logstash/outputs/cassandra.rb | 1 + .../outputs/cassandra/backoff_retry_policy.rb | 3 +- .../outputs/cassandra/event_parser.rb | 1 + .../outputs/cassandra/safe_submitter.rb | 1 + 5 files changed, 94 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 651a738..d04cfc9 100644 --- a/README.md +++ b/README.md @@ -9,64 +9,94 @@ It is fully free and fully open source. The license is Apache 2.0, meaning you a

 output {
     cassandra {
-        # Credentials of a target Cassandra, keyspace and table
-        # where you want to stream data to.
-        username => "cassandra"
-        password => "cassandra"
-        hosts => ["127.0.0.1"]
-        keyspace => "logs"
-        table => "query_log"
+        # List of Cassandra hostname(s) or IP-address(es)
+        hosts => [ "cass-01", "cass-02" ]
+
+        # The port cassandra is listening to
+        port => 9042
+
+        # The protocol version to use with cassandra
+        protocol_version => 4
+
         # Cassandra consistency level.
-        # Options: "any", "one", "two", "three", "quorum", "all",
-        #    "local_quorum", "each_quorum", "serial", "local_serial",
-        #    "local_one"
+        # Options: "any", "one", "two", "three", "quorum", "all", "local_quorum", "each_quorum", "serial", "local_serial", "local_one"
         # Default: "one"
-        consistency => "all"
-        
-        # Where from the event hash to take a message
-        source => "payload"
-        
-        # if cassandra does not understand formats of data
-        # you feeds it with, just provide some hints here
+        consistency => 'any'
+
+        # The keyspace to use
+        keyspace => "a_ks"
+
+        # The table to use (event level processing (e.g. %{[key]}) is supported)
+        table => "%{[@metadata][cassandra_table]}"
+
+        # Username
+        username => "cassandra"
+
+        # Password
+        password => "cassandra"
+
+        # An optional hints hash which will be used in case filter_transform or filter_transform_event_key are not in use
+        # It is used to trigger a forced type casting to the cassandra driver types in
+        # the form of a hash from column name to type name in the following manner:
         hints => {
             id => "int"
             at => "timestamp"
             resellerId => "int"
             errno => "int"
             duration => "float"
-            ip => "inet"}
-            
-        # Sometimes it's usefull to ignore malformed messages
-        # (e.x. source contains nothing),
-        # in the case set ignore_bad_messages to True.
-        # By default it is False
-        ignore_bad_messages => true
-        
-        # Sometimes it's usefull to ignore problems with a convertation
-        # of a received value to Cassandra format and set some default
-        # value (inet: 0.0.0.0, float: 0.0, int: 0,
-        # uuid: 00000000-0000-0000-0000-000000000000,
-        # timestamp: 1970-01-01 00:00:00) in the case set
-        # ignore_bad_messages to True.
-        # By default it is False
-        ignore_bad_values => true
-        
-        # Datastax cassandra driver supports batch insert.
-        # You can define the batch size explicitely.
-        # By default it is 1.
-        batch_size => 100
-        
-        # Every batch_processor_thread_period sec. a special thread
-        # pushes all collected messages to Cassandra. By default it is 1 (sec.)
-        batch_processor_thread_period => 1
-        
-        # max max_retries times the plugin will push failed batches
-        # to Cassandra before give up. By defult it is 3.
-        max_retries => 3
-        
-        # retry_delay secs. between two sequential tries to push a failed batch
-        # to Cassandra. By default it is 3 (secs.)
-        retry_delay => 3
+            ip => "inet"
+        }
+
+        # The retry policy to use (the default is the default retry policy)
+        # the hash requires the name of the policy and the params it requires
+        # The available policy names are:
+        # * default => retry once if needed / possible
+        # * downgrading_consistency => retry once with a best guess lowered consistency
+        # * failthrough => fail immediately (i.e. no retries)
+        # * backoff => a version of the default retry policy but with configurable backoff retries
+        # The backoff options are as follows:
+        # * backoff_type => either * or ** for linear and exponential backoffs respectively
+        # * backoff_size => the left operand for the backoff type in seconds
+        # * retry_limit => the maximum amount of retries to allow per query
+        # example:
+        # using { "type" => "backoff" "backoff_type" => "**" "backoff_size" => 2 "retry_limit" => 10 } will perform 10 retries with the following wait times: 1, 2, 4, 8, 16, ... 1024
+        # NOTE: there is an underlying assumption that the insert query is idempotent !!!
+        # NOTE: when the backoff retry policy is used, it will also be used to handle pure client timeouts and not just ones coming from the coordinator
+        retry_policy => { "type" => "default" }
+
+        # The command execution timeout
+        request_timeout => 1
+
+        # Ignore bad values
+        ignore_bad_values => false
+
+        # In Logstashes >= 2.2 this setting defines the maximum sized bulk request Logstash will make
+        # You you may want to increase this to be in line with your pipeline's batch size.
+        # If you specify a number larger than the batch size of your pipeline it will have no effect,
+        # save for the case where a filter increases the size of an inflight batch by outputting
+        # events.
+        #
+        # In Logstashes <= 2.1 this plugin uses its own internal buffer of events.
+        # This config option sets that size. In these older logstashes this size may
+        # have a significant impact on heap usage, whereas in 2.2+ it will never increase it.
+        # To make efficient bulk API calls, we will buffer a certain number of
+        # events before flushing that out to Cassandra. This setting
+        # controls how many events will be buffered before sending a batch
+        # of events. Increasing the `flush_size` has an effect on Logstash's heap size.
+        # Remember to also increase the heap size using `LS_HEAP_SIZE` if you are sending big commands
+        # or have increased the `flush_size` to a higher value.
+        flush_size => 500
+
+        # The amount of time since last flush before a flush is forced.
+        #
+        # This setting helps ensure slow event rates don't get stuck in Logstash.
+        # For example, if your `flush_size` is 100, and you have received 10 events,
+        # and it has been more than `idle_flush_time` seconds since the last flush,
+        # Logstash will flush those 10 events automatically.
+        #
+        # This helps keep both fast and slow log streams moving along in
+        # near-real-time.
+        idle_flush_time => 1
     }
 }
 
@@ -106,4 +136,12 @@ bin/logstash -e 'output {cassandra {}}' ``` ## TODO - +* Fix the authentication bug (no user;pass in cassandra plugin?!) +* Finish integration specs + * it "properly works with counter columns" + * it "properly adds multiple events to multiple tables in the same bulk" +* Improve retries to include (but probably only handle Errors::Timeout and Errors::NoHostsAvailable): + * \#get_query + * \#execute_async +* Upgrade / test with logstash 2.3 +* Upgrade / test with cassandra 3 diff --git a/lib/logstash/outputs/cassandra.rb b/lib/logstash/outputs/cassandra.rb index 04f4134..8aacb24 100644 --- a/lib/logstash/outputs/cassandra.rb +++ b/lib/logstash/outputs/cassandra.rb @@ -73,6 +73,7 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base # example: # using { "type" => "backoff" "backoff_type" => "**" "backoff_size" => 2 "retry_limit" => 10 } will perform 10 retries with the following wait times: 1, 2, 4, 8, 16, ... 1024 # NOTE: there is an underlying assumption that the insert query is idempotent !!! + # NOTE: when the backoff retry policy is used, it will also be used to handle pure client timeouts and not just ones coming from the coordinator config :retry_policy, :validate => :hash, :default => { 'type' => 'default' }, :required => true # The command execution timeout diff --git a/lib/logstash/outputs/cassandra/backoff_retry_policy.rb b/lib/logstash/outputs/cassandra/backoff_retry_policy.rb index cd716a4..f836764 100644 --- a/lib/logstash/outputs/cassandra/backoff_retry_policy.rb +++ b/lib/logstash/outputs/cassandra/backoff_retry_policy.rb @@ -1,10 +1,11 @@ # encoding: utf-8 -# This is a version of the default retry policy (https://github.com/datastax/ruby-driver/blob/v2.1.5/lib/cassandra/retry/policies/default.rb) with backoff retry configuration options require 'cassandra' module Cassandra module Retry module Policies + # This is a version of the default retry policy (https://github.com/datastax/ruby-driver/blob/v2.1.5/lib/cassandra/retry/policies/default.rb) + # with backoff retry configuration options class Backoff include ::Cassandra::Retry::Policy diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb index 412f616..16ef33c 100644 --- a/lib/logstash/outputs/cassandra/event_parser.rb +++ b/lib/logstash/outputs/cassandra/event_parser.rb @@ -3,6 +3,7 @@ require 'cassandra' module LogStash; module Outputs; module Cassandra + # Responsible for accepting events from the pipeline and returning actions for the SafeSubmitter class EventParser def initialize(options) @logger = options['logger'] diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb index 8e7d912..3a58560 100644 --- a/lib/logstash/outputs/cassandra/safe_submitter.rb +++ b/lib/logstash/outputs/cassandra/safe_submitter.rb @@ -4,6 +4,7 @@ require 'logstash/outputs/cassandra/backoff_retry_policy' module LogStash; module Outputs; module Cassandra + # Responsible for submitting parsed actions to cassandra (with or without a retry mechanism) class SafeSubmitter def initialize(options) @statement_cache = {} From 7401fc55542b0a0aa4dfabc2d3ae82482ed47772 Mon Sep 17 00:00:00 2001 From: Elad Amit Date: Fri, 15 Apr 2016 10:30:02 +0300 Subject: [PATCH 108/126] updates towards publishing --- CONTRIBUTORS | 23 ++++------------------- README.md | 4 +++- logstash-output-cassandra.gemspec | 22 +++++++++++----------- sonar-project.properties | 1 - 4 files changed, 18 insertions(+), 32 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 660db3e..4ca885a 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -1,21 +1,6 @@ -The following is a list of people who have contributed ideas, code, bug -reports, or in general have helped logstash along its way. +The following is a list of people who have contributed (in chronological order) ideas, code, bug +reports, or in general have helped this plugin along its way. Contributors: -* Aaron Mildenstein (untergeek) -* Graham Bleach (bleach) -* John E. Vincent (lusis) -* Jordan Sissel (jordansissel) -* Kevin Amorin (kamorin) -* Kevin O'Connor (kjoconnor) -* Kurt Hurtado (kurtado) -* Mathias Gug (zimathias) -* Pete Fritchman (fetep) -* Pier-Hugues Pellerin (ph) -* Richard Pijnenburg (electrical) -* bitsofinfo (bitsofinfo) - -Note: If you've sent us patches, bug reports, or otherwise contributed to -Logstash, and you aren't on the list above and want to be, please let us know -and we'll make sure you're here. Contributions from folks like you are what make -open source awesome. +* Oleg Tokarev (otokarev) +* Elad Amit (eladamitpxi, amitelad7) diff --git a/README.md b/README.md index d04cfc9..0803001 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,11 @@ # Logstash Cassandra Output Plugin -This is a plugin for [Logstash](https://github.com/elasticsearch/logstash). +This is a plugin for [Logstash](https://github.com/elastic/logstash). It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way. +It was originally a fork of the [logstash-output-cassandra](https://github.com/otokarev/logstash-output-cassandra) plugin by [Oleg Tokarev](https://github.com/otokarev), which has gone unmaintained and went through a major re-design in this version we built. + ## Usage

diff --git a/logstash-output-cassandra.gemspec b/logstash-output-cassandra.gemspec
index de62bd8..3b88c07 100644
--- a/logstash-output-cassandra.gemspec
+++ b/logstash-output-cassandra.gemspec
@@ -1,26 +1,26 @@
 Gem::Specification.new do |s|
 
   s.name            = 'logstash-output-cassandra'
-  s.version         = '0.2.0'
-  s.licenses        = ['Apache License (2.0)']
-  s.summary         = "Store events into Cassandra"
-  s.description     = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
-  s.authors         = ["Openprovider"]
-  s.email           = 'otokarev@openprovider.nl'
-  s.homepage        = "http://openprovider.nl"
-  s.require_paths = ["lib"]
+  s.version         = '0.9.0'
+  s.licenses        = [ 'Apache License (2.0)' ]
+  s.summary         = 'Store events into Cassandra'
+  s.description     = 'This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program'
+  s.authors         = [ 'PerimeterX' ]
+  s.email           = [ 'elad@perimeterx.com' ]
+  s.homepage        = 'https://github.com/PerimeterX/logstash-output-cassandra'
+  s.require_paths   = [ 'lib' ]
 
   # Files
-  s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
+  s.files = Dir[ 'lib/**/*', 'spec/**/*', 'vendor/**/*', '*.gemspec', '*.md', 'CONTRIBUTORS', 'Gemfile', 'LICENSE', 'NOTICE.TXT' ]
   # Tests
   s.test_files = s.files.grep(%r{^(test|spec|features)/})
 
   # Special flag to let us know this is actually a logstash plugin
-  s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
+  s.metadata = { 'logstash_plugin' => 'true', 'logstash_group' => 'output' }
 
   # Gem dependencies
   s.add_runtime_dependency 'concurrent-ruby'
-  s.add_runtime_dependency "logstash-core", '>= 2.0.0', '< 3.0.0'
+  s.add_runtime_dependency 'logstash-core', '>= 2.0.0', '< 3.0.0'
   s.add_runtime_dependency 'cassandra-driver', '>= 2.0.0', '< 3.0.0'
   s.add_development_dependency 'cabin', ['~> 0.6']
   s.add_development_dependency 'longshoreman'
diff --git a/sonar-project.properties b/sonar-project.properties
index 6e9e7e6..5a6f5dc 100644
--- a/sonar-project.properties
+++ b/sonar-project.properties
@@ -1,7 +1,6 @@
 sonar.projectKey=px:logstash-output-cassandra
 
 sonar.projectName=Logstash Cassandra Output
-sonar.projectVersion=1.0
 
 sonar.language=ruby
 sonar.sources=lib

From a243d3efff6369b6b08904aa630b59983cfdba63 Mon Sep 17 00:00:00 2001
From: Elad Amit 
Date: Fri, 15 Apr 2016 10:42:28 +0300
Subject: [PATCH 109/126] updates towards publishing

---
 logstash-output-cassandra.gemspec | 1 +
 1 file changed, 1 insertion(+)

diff --git a/logstash-output-cassandra.gemspec b/logstash-output-cassandra.gemspec
index 3b88c07..ece5213 100644
--- a/logstash-output-cassandra.gemspec
+++ b/logstash-output-cassandra.gemspec
@@ -31,4 +31,5 @@ Gem::Specification.new do |s|
   s.add_development_dependency 'unparser', '0.2.4'
   s.add_development_dependency 'metric_fu'
   s.add_development_dependency 'coveralls'
+  s.add_development_dependency 'gems'
 end

From ed85604dad0526aa9238ab6fcafaa90caf11a40d Mon Sep 17 00:00:00 2001
From: Elad Amit 
Date: Sat, 23 Apr 2016 20:42:51 +0300
Subject: [PATCH 110/126] adding valentinul to the contributor list

---
 CONTRIBUTORS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 4ca885a..a6f4374 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -4,3 +4,4 @@ reports, or in general have helped this plugin along its way.
 Contributors:
 * Oleg Tokarev (otokarev)
 * Elad Amit (eladamitpxi, amitelad7)
+* Valentin Fischer (valentinul)

From 591efa94e9f85549c71af81079d36a492ab6ea0d Mon Sep 17 00:00:00 2001
From: Elad Amit 
Date: Sun, 24 Apr 2016 09:34:33 +0300
Subject: [PATCH 111/126] removing incorrect auth todo

---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index 0803001..f8b4ae8 100644
--- a/README.md
+++ b/README.md
@@ -138,7 +138,6 @@ bin/logstash -e 'output {cassandra {}}'
 ```
 
 ## TODO
-* Fix the authentication bug (no user;pass in cassandra plugin?!)
 * Finish integration specs
     * it "properly works with counter columns"
     * it "properly adds multiple events to multiple tables in the same bulk"

From 970d25b05170cb48b3a7c25b2298b6d71e222680 Mon Sep 17 00:00:00 2001
From: Elad Amit 
Date: Tue, 26 Apr 2016 11:34:27 +0300
Subject: [PATCH 112/126] updated / fixed the license file

---
 LICENSE | 207 +-------------------------------------------------------
 1 file changed, 1 insertion(+), 206 deletions(-)

diff --git a/LICENSE b/LICENSE
index 1d90ead..43976b7 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,5 +1,4 @@
-<<<<<<< HEAD
-Copyright (c) 2012-2015 Elasticsearch 
+Copyright (c) 2012–2016 Elasticsearch 
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,207 +11,3 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-=======
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
->>>>>>> 56f47bec0cc3d3230ae48792e7ac094823872298

From be82df6b36d7380ecedb600f2930de7c4c9619bf Mon Sep 17 00:00:00 2001
From: Elad Amit 
Date: Tue, 26 Apr 2016 11:35:10 +0300
Subject: [PATCH 113/126] updated / fixed the license file

---
 LICENSE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LICENSE b/LICENSE
index 43976b7..90fcafc 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2012–2016 Elasticsearch 
+Copyright (c) 2012–2016 PerimeterX 
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.

From 7a6413c18f0fcad2fe8e7b5d10164f62e32a32cc Mon Sep 17 00:00:00 2001
From: Elad Amit 
Date: Tue, 26 Apr 2016 11:35:24 +0300
Subject: [PATCH 114/126] updated / fixed the license file

---
 LICENSE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LICENSE b/LICENSE
index 90fcafc..78d2b14 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2012–2016 PerimeterX 
+Copyright (c) 2016 PerimeterX 
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.

From 06156aceb5b0fad5b4498b3dbd16e2845d43d740 Mon Sep 17 00:00:00 2001
From: Elad Amit 
Date: Wed, 27 Apr 2016 16:37:30 +0300
Subject: [PATCH 115/126] updated / fixed the license file

---
 README.md | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index f8b4ae8..e84e7be 100644
--- a/README.md
+++ b/README.md
@@ -110,12 +110,17 @@ Edit Logstash Gemfile and add the local plugin path, for example:
 ```
 gem "logstash-output-cassandra", :path => "/your/local/logstash-output-cassandra"
 ```
-
-Install plugin
+And install by executing:
 ```
 bin/plugin install --no-verify
 ```
-Run Logstash with the plugin
+
+Or install plugin from RubyGems:
+```
+bin/plugin install logstash-output-cassandra
+```
+
+And then run Logstash with the plugin:
 ```
 bin/logstash -e 'output {cassandra {}}'
 ```

From b24e3becc44b4cda1391a54750744bc138be765a Mon Sep 17 00:00:00 2001
From: Elad Amit 
Date: Sun, 1 May 2016 16:15:55 +0300
Subject: [PATCH 116/126] fixing a type in the filter transform test name

---
 spec/unit/outputs/event_parser_spec.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb
index 78c7c9c..06e486b 100644
--- a/spec/unit/outputs/event_parser_spec.rb
+++ b/spec/unit/outputs/event_parser_spec.rb
@@ -40,7 +40,7 @@
   describe 'filter transforms' do
     describe 'from config' do
       describe 'malformed configurations' do
-        it 'fails if the transform has no event_data setting' do
+        it 'fails if the transform has no event_key setting' do
           expect { sut.new(default_opts.update({ 'filter_transform' => [{ 'column_name' => '' }] })) }.to raise_error(/item is incorrectly configured/)
         end
 

From b346933cf14f83cb9b7efbd4134dd815be3329e7 Mon Sep 17 00:00:00 2001
From: Elad Amit 
Date: Sun, 1 May 2016 21:58:50 +0300
Subject: [PATCH 117/126] added documentation for expand_only

---
 lib/logstash/outputs/cassandra.rb | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/logstash/outputs/cassandra.rb b/lib/logstash/outputs/cassandra.rb
index 8aacb24..816dd65 100644
--- a/lib/logstash/outputs/cassandra.rb
+++ b/lib/logstash/outputs/cassandra.rb
@@ -40,6 +40,8 @@ class LogStash::Outputs::CassandraOutput < LogStash::Outputs::Base
   # An optional hash describing how / what to transform / filter from the original event
   # Each key is expected to be of the form { event_key => "..." column_name => "..." cassandra_type => "..." }
   # Event level processing (e.g. %{[key]}) is supported for all three
+  # In case you only want to do string expansion (e.g. in the case of adding event specific dates) you can add the expansion_only key with a value of true
+  # Example: using { event_key => "%{+yyyyMMddHH}" column_name => "date" expansion_only => true } will result in a date column with a string of the specified format
   config :filter_transform, :validate => :array, :default => []
 
   # An optional string which points to the event specific location from which to pull the filter_transform definition

From a3c4acdb84bbff93bcd516b6565f1f078822bc67 Mon Sep 17 00:00:00 2001
From: Elad Amit 
Date: Mon, 23 May 2016 08:56:44 +0300
Subject: [PATCH 118/126] changing version around for testing

---
 lib/logstash/outputs/cassandra/event_parser.rb | 2 --
 logstash-output-cassandra.gemspec              | 4 ++--
 spec/integration/outputs/cassandra_spec.rb     | 1 -
 spec/integration/outputs/integration_helper.rb | 2 +-
 spec/unit/outputs/event_parser_spec.rb         | 1 -
 5 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb
index 16ef33c..7b6e788 100644
--- a/lib/logstash/outputs/cassandra/event_parser.rb
+++ b/lib/logstash/outputs/cassandra/event_parser.rb
@@ -119,8 +119,6 @@ def convert_value_to_cassandra_type(event_data, cassandra_type)
           return ::Cassandra::Types::Inet.new(event_data)
         when 'float'
           return ::Cassandra::Types::Float.new(event_data)
-        when 'varchar'
-          return ::Cassandra::Types::Varchar.new(event_data)
         when 'text'
           return ::Cassandra::Types::Text.new(event_data)
         when 'blob'
diff --git a/logstash-output-cassandra.gemspec b/logstash-output-cassandra.gemspec
index ece5213..c5414cf 100644
--- a/logstash-output-cassandra.gemspec
+++ b/logstash-output-cassandra.gemspec
@@ -1,7 +1,7 @@
 Gem::Specification.new do |s|
 
   s.name            = 'logstash-output-cassandra'
-  s.version         = '0.9.0'
+  s.version         = '0.9.1'
   s.licenses        = [ 'Apache License (2.0)' ]
   s.summary         = 'Store events into Cassandra'
   s.description     = 'This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program'
@@ -21,7 +21,7 @@ Gem::Specification.new do |s|
   # Gem dependencies
   s.add_runtime_dependency 'concurrent-ruby'
   s.add_runtime_dependency 'logstash-core', '>= 2.0.0', '< 3.0.0'
-  s.add_runtime_dependency 'cassandra-driver', '>= 2.0.0', '< 3.0.0'
+  s.add_runtime_dependency 'cassandra-driver', '3.0.0.rc.2'
   s.add_development_dependency 'cabin', ['~> 0.6']
   s.add_development_dependency 'longshoreman'
   s.add_development_dependency 'logstash-devutils'
diff --git a/spec/integration/outputs/cassandra_spec.rb b/spec/integration/outputs/cassandra_spec.rb
index c4de7aa..a5ecc06 100644
--- a/spec/integration/outputs/cassandra_spec.rb
+++ b/spec/integration/outputs/cassandra_spec.rb
@@ -86,7 +86,6 @@ def assert_proper_insert(type_to_test)
       { type: 'timestamp', value: 1457606758, assert_override: Helper::get_assert_timestamp_equallity() },
       { type: 'inet', value: '192.168.99.100' },
       { type: 'float', value: '10.050000190734863' },
-      { type: 'varchar', value: 'some chars' },
       { type: 'text', value: 'some text' },
       { type: 'blob', value: 'a blob' },
       { type: 'ascii', value: 'some ascii' },
diff --git a/spec/integration/outputs/integration_helper.rb b/spec/integration/outputs/integration_helper.rb
index d865cdd..0d5da46 100644
--- a/spec/integration/outputs/integration_helper.rb
+++ b/spec/integration/outputs/integration_helper.rb
@@ -5,7 +5,7 @@
 
 CONTAINER_NAME = "logstash-output-cassandra-#{rand(999).to_s}"
 CONTAINER_IMAGE = 'cassandra'
-CONTAINER_TAG = '2.2'
+CONTAINER_TAG = '3.5'
 
 module CassandraHelper
   def get_host_ip
diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb
index 06e486b..ae595b6 100644
--- a/spec/unit/outputs/event_parser_spec.rb
+++ b/spec/unit/outputs/event_parser_spec.rb
@@ -108,7 +108,6 @@
           { :name => 'timestamp', :type => ::Cassandra::Types::Timestamp, :value => 1457606758, expected: Time.at(1457606758) },
           { :name => 'inet',      :type => ::Cassandra::Types::Inet,      :value => '0.0.0.0' },
           { :name => 'float',     :type => ::Cassandra::Types::Float,     :value => '10.15' },
-          { :name => 'varchar',   :type => ::Cassandra::Types::Varchar,   :value => 'a varchar' },
           { :name => 'text',      :type => ::Cassandra::Types::Text,      :value => 'some text' },
           { :name => 'blob',      :type => ::Cassandra::Types::Blob,      :value => '12345678' },
           { :name => 'ascii',     :type => ::Cassandra::Types::Ascii,     :value => 'some ascii' },

From 19c8a0dd3ba26ccbc4a3977194f0ffd13a7ccad9 Mon Sep 17 00:00:00 2001
From: Elad Amit 
Date: Mon, 23 May 2016 09:43:34 +0300
Subject: [PATCH 119/126] changed the event parser to return a nil action in
 case of a parsing failure and changed the submitter to skip nil actions

---
 .../outputs/cassandra/event_parser.rb         | 26 +++++++++++--------
 .../outputs/cassandra/safe_submitter.rb       |  6 +++--
 logstash-output-cassandra.gemspec             |  2 +-
 spec/unit/outputs/event_parser_spec.rb        | 26 ++++++++++++-------
 spec/unit/outputs/safe_submitter_spec.rb      |  9 +++++++
 5 files changed, 45 insertions(+), 24 deletions(-)

diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb
index 16ef33c..4d8dc13 100644
--- a/lib/logstash/outputs/cassandra/event_parser.rb
+++ b/lib/logstash/outputs/cassandra/event_parser.rb
@@ -17,18 +17,22 @@ def initialize(options)
 
     def parse(event)
       action = {}
-      action['table'] = event.sprintf(@table)
-      filter_transform = get_filter_transform(event)
-      if filter_transform
-        action['data'] = {}
-        filter_transform.each { |filter|
-          add_event_value_from_filter_to_action(event, filter, action)
-        }
-      else
-        add_event_data_using_configured_hints(event, action)
+      begin
+        action['table'] = event.sprintf(@table)
+        filter_transform = get_filter_transform(event)
+        if filter_transform
+          action['data'] = {}
+          filter_transform.each { |filter|
+            add_event_value_from_filter_to_action(event, filter, action)
+          }
+        else
+          add_event_data_using_configured_hints(event, action)
+        end
+        @logger.debug('event parsed to action', :action => action)
+      rescue Exception => e
+        @logger.error('failed parsing event', :event => event, :error => e)
+        action = nil
       end
-
-      @logger.debug('event parsed to action', :action => action)
       action
     end
 
diff --git a/lib/logstash/outputs/cassandra/safe_submitter.rb b/lib/logstash/outputs/cassandra/safe_submitter.rb
index 3a58560..2fac0a9 100644
--- a/lib/logstash/outputs/cassandra/safe_submitter.rb
+++ b/lib/logstash/outputs/cassandra/safe_submitter.rb
@@ -57,8 +57,10 @@ def prepare_queries(actions)
       remaining_queries = Queue.new
       actions.each do |action|
         begin
-          query = get_query(action)
-          remaining_queries << { :query => query, :arguments => action['data'].values }
+          if action
+            query = get_query(action)
+            remaining_queries << { :query => query, :arguments => action['data'].values }
+          end
         rescue Exception => e
           @logger.error('Failed to prepare query', :action => action, :exception => e, :backtrace => e.backtrace)
         end
diff --git a/logstash-output-cassandra.gemspec b/logstash-output-cassandra.gemspec
index ece5213..8c69468 100644
--- a/logstash-output-cassandra.gemspec
+++ b/logstash-output-cassandra.gemspec
@@ -1,7 +1,7 @@
 Gem::Specification.new do |s|
 
   s.name            = 'logstash-output-cassandra'
-  s.version         = '0.9.0'
+  s.version         = '0.9.1'
   s.licenses        = [ 'Apache License (2.0)' ]
   s.summary         = 'Store events into Cassandra'
   s.description     = 'This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program'
diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb
index 06e486b..a4527bb 100644
--- a/spec/unit/outputs/event_parser_spec.rb
+++ b/spec/unit/outputs/event_parser_spec.rb
@@ -170,9 +170,10 @@
           options = default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column', 'cassandra_type' => 'what?!' }] })
           sut_instance = sut.new(options)
           sample_event['a_field'] = 'a_value'
-          expect(options['logger']).to(receive(:error))
+          expect(options['logger']).to(receive(:error)).at_least(:once)
 
-          expect { sut_instance.parse(sample_event) }.to raise_error(/Cannot convert/)
+          result = sut_instance.parse(sample_event)
+          expect(result).to be_nil
         end
       end
     end
@@ -238,21 +239,23 @@
     it 'fails for unknown hint types' do
       options = default_opts.update({ 'hints' => { 'a_field' => 'not_a_real_type' } })
       sut_instance = sut.new(options)
-      expect(options['logger']).to(receive(:error))
-
+      expect(options['logger']).to(receive(:error)).at_least(:once)
       sample_event['a_field'] = 'a value'
 
-      expect { sut_instance.parse(sample_event) }.to raise_error(/Cannot convert/)
+      result = sut_instance.parse(sample_event)
+
+      expect(result).to be_nil
     end
 
     it 'fails for unsuccessful hint conversion' do
       options = default_opts.update({ 'hints' => { 'a_field' => 'int' } })
-      expect(options['logger']).to(receive(:error))
-
+      expect(options['logger']).to(receive(:error)).at_least(:once)
       sut_instance = sut.new(options)
-
       sample_event['a_field'] = 'i am not an int!!!'
-      expect { sut_instance.parse(sample_event) }.to raise_error(/Cannot convert/)
+
+      result = sut_instance.parse(sample_event)
+
+      expect(result).to be_nil
     end
   end
 
@@ -297,8 +300,11 @@
       options = default_opts.update({ 'ignore_bad_values' => true, 'hints' => { 'a_field' => 'map' } })
       sut_instance = sut.new(options)
       sample_event['a_field'] = 'i am not a set'
+      expect(options['logger']).to(receive(:error))
+
+      result = sut_instance.parse(sample_event)
 
-      expect { sut_instance.parse(sample_event) }.to raise_error ArgumentError
+      expect(result).to be_nil
     end
   end
 end
diff --git a/spec/unit/outputs/safe_submitter_spec.rb b/spec/unit/outputs/safe_submitter_spec.rb
index c804e38..e65d427 100644
--- a/spec/unit/outputs/safe_submitter_spec.rb
+++ b/spec/unit/outputs/safe_submitter_spec.rb
@@ -109,6 +109,15 @@ def generate_future_double
       return future_double
     end
 
+    it 'does nothing in case it got a nil action' do
+      doubles = setup_session_double(default_options)
+      expect(doubles[:session_double]).to_not(receive(:prepare))
+      expect(doubles[:session_double]).to_not(receive(:execute_async))
+      sut_instance = sut.new(default_options)
+
+      expect { sut_instance.submit([nil]) }.to_not raise_error
+    end
+
     it 'prepares and executes the query' do
       doubles = setup_session_double(default_options)
       expect(doubles[:session_double]).to(receive(:prepare).with(expected_query_for_one_action)).and_return('eureka')

From 922c7ea9b3068d48befebd7b4b4916284b074c5f Mon Sep 17 00:00:00 2001
From: Tansinee 
Date: Thu, 8 Sep 2016 18:17:12 +0700
Subject: [PATCH 120/126] Create a new hash of action_data instead of assign
 from event.to_hash

---
 lib/logstash/outputs/cassandra/event_parser.rb | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb
index 4d8dc13..c918055 100644
--- a/lib/logstash/outputs/cassandra/event_parser.rb
+++ b/lib/logstash/outputs/cassandra/event_parser.rb
@@ -70,9 +70,11 @@ def add_event_value_from_filter_to_action(event, filter, action)
     end
 
     def add_event_data_using_configured_hints(event, action)
-      action_data = event.to_hash
-      # Filter out @timestamp, @version, etc to be able to use elasticsearch input plugin directly
-      action_data.reject!{|key| %r{^@} =~ key}
+      action_data = {}
+      event.to_hash.each do |key, value|
+        action_data[key] = value unless %r{^@} =~ key
+      end
+
       @hints.each do |event_key, cassandra_type|
         if action_data.has_key?(event_key)
           action_data[event_key] = convert_value_to_cassandra_type_or_default_if_configured(action_data[event_key], cassandra_type)

From d30007fc85fe77c63e29976501f78037a72fe04d Mon Sep 17 00:00:00 2001
From: Tansinee 
Date: Fri, 9 Sep 2016 09:25:39 +0700
Subject: [PATCH 121/126] Shorten re-assignement

---
 lib/logstash/outputs/cassandra/event_parser.rb | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb
index c918055..470f84e 100644
--- a/lib/logstash/outputs/cassandra/event_parser.rb
+++ b/lib/logstash/outputs/cassandra/event_parser.rb
@@ -70,11 +70,8 @@ def add_event_value_from_filter_to_action(event, filter, action)
     end
 
     def add_event_data_using_configured_hints(event, action)
-      action_data = {}
-      event.to_hash.each do |key, value|
-        action_data[key] = value unless %r{^@} =~ key
-      end
-
+      action_data = event.to_hash.reject { |key| %r{^@} =~ key }
+      
       @hints.each do |event_key, cassandra_type|
         if action_data.has_key?(event_key)
           action_data[event_key] = convert_value_to_cassandra_type_or_default_if_configured(action_data[event_key], cassandra_type)

From 317c953270745402235e6afe4b8d70f7fe827c2e Mon Sep 17 00:00:00 2001
From: Pitsanu Swangpheaw 
Date: Wed, 31 Aug 2016 11:31:24 +0700
Subject: [PATCH 122/126] make compattible with Cassandra 3.0

- remove varchar
- default time is localtime
---
 logstash-output-cassandra.gemspec              | 2 +-
 spec/integration/outputs/cassandra_spec.rb     | 1 -
 spec/integration/outputs/integration_helper.rb | 2 +-
 spec/unit/outputs/event_parser_spec.rb         | 3 +--
 4 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/logstash-output-cassandra.gemspec b/logstash-output-cassandra.gemspec
index 8c69468..6a9bdc0 100644
--- a/logstash-output-cassandra.gemspec
+++ b/logstash-output-cassandra.gemspec
@@ -21,7 +21,7 @@ Gem::Specification.new do |s|
   # Gem dependencies
   s.add_runtime_dependency 'concurrent-ruby'
   s.add_runtime_dependency 'logstash-core', '>= 2.0.0', '< 3.0.0'
-  s.add_runtime_dependency 'cassandra-driver', '>= 2.0.0', '< 3.0.0'
+  s.add_runtime_dependency 'cassandra-driver', '>= 3.0.3'
   s.add_development_dependency 'cabin', ['~> 0.6']
   s.add_development_dependency 'longshoreman'
   s.add_development_dependency 'logstash-devutils'
diff --git a/spec/integration/outputs/cassandra_spec.rb b/spec/integration/outputs/cassandra_spec.rb
index c4de7aa..a5ecc06 100644
--- a/spec/integration/outputs/cassandra_spec.rb
+++ b/spec/integration/outputs/cassandra_spec.rb
@@ -86,7 +86,6 @@ def assert_proper_insert(type_to_test)
       { type: 'timestamp', value: 1457606758, assert_override: Helper::get_assert_timestamp_equallity() },
       { type: 'inet', value: '192.168.99.100' },
       { type: 'float', value: '10.050000190734863' },
-      { type: 'varchar', value: 'some chars' },
       { type: 'text', value: 'some text' },
       { type: 'blob', value: 'a blob' },
       { type: 'ascii', value: 'some ascii' },
diff --git a/spec/integration/outputs/integration_helper.rb b/spec/integration/outputs/integration_helper.rb
index d865cdd..15be369 100644
--- a/spec/integration/outputs/integration_helper.rb
+++ b/spec/integration/outputs/integration_helper.rb
@@ -5,7 +5,7 @@
 
 CONTAINER_NAME = "logstash-output-cassandra-#{rand(999).to_s}"
 CONTAINER_IMAGE = 'cassandra'
-CONTAINER_TAG = '2.2'
+CONTAINER_TAG = '3.0'
 
 module CassandraHelper
   def get_host_ip
diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb
index a4527bb..3477458 100644
--- a/spec/unit/outputs/event_parser_spec.rb
+++ b/spec/unit/outputs/event_parser_spec.rb
@@ -108,7 +108,6 @@
           { :name => 'timestamp', :type => ::Cassandra::Types::Timestamp, :value => 1457606758, expected: Time.at(1457606758) },
           { :name => 'inet',      :type => ::Cassandra::Types::Inet,      :value => '0.0.0.0' },
           { :name => 'float',     :type => ::Cassandra::Types::Float,     :value => '10.15' },
-          { :name => 'varchar',   :type => ::Cassandra::Types::Varchar,   :value => 'a varchar' },
           { :name => 'text',      :type => ::Cassandra::Types::Text,      :value => 'some text' },
           { :name => 'blob',      :type => ::Cassandra::Types::Blob,      :value => '12345678' },
           { :name => 'ascii',     :type => ::Cassandra::Types::Ascii,     :value => 'some ascii' },
@@ -261,7 +260,7 @@
 
   describe 'ignore_bad_values is turned on' do
     [
-        { :name => 'timestamp', :value => 'i dont have to_time',      :expected => Time::parse('1970-01-01 00:00:00 +0000') },
+        { :name => 'timestamp', :value => 'i dont have to_time',      :expected => Time::parse('1970-01-01 00:00:00 +0000').localtime },
         { :name => 'inet',      :value => 'i am not an inet address', :expected => '0.0.0.0' },
         { :name => 'float',     :value => 'i am not a float',         :expected => 0.0 },
         { :name => 'bigint',    :value => 'i am not a bigint',        :expected => 0 },

From 0a860bcaaf889a3008e3b971215809dd31ede413 Mon Sep 17 00:00:00 2001
From: Elad Amit 
Date: Mon, 19 Sep 2016 09:22:16 +0300
Subject: [PATCH 123/126] bumping version for release

---
 logstash-output-cassandra.gemspec | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/logstash-output-cassandra.gemspec b/logstash-output-cassandra.gemspec
index 8c69468..37fd280 100644
--- a/logstash-output-cassandra.gemspec
+++ b/logstash-output-cassandra.gemspec
@@ -1,7 +1,7 @@
 Gem::Specification.new do |s|
 
   s.name            = 'logstash-output-cassandra'
-  s.version         = '0.9.1'
+  s.version         = '0.9.2'
   s.licenses        = [ 'Apache License (2.0)' ]
   s.summary         = 'Store events into Cassandra'
   s.description     = 'This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program'

From 114cfabe7c0907dfb41959a89fad03eb53501747 Mon Sep 17 00:00:00 2001
From: Elad Amit 
Date: Mon, 19 Sep 2016 09:29:00 +0300
Subject: [PATCH 124/126] updated contributors with tansinee

---
 CONTRIBUTORS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index a6f4374..74008f0 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -5,3 +5,4 @@ Contributors:
 * Oleg Tokarev (otokarev)
 * Elad Amit (eladamitpxi, amitelad7)
 * Valentin Fischer (valentinul)
+* tansinee

From df2d201abfcdac51f406bd3ff35898ac40d08799 Mon Sep 17 00:00:00 2001
From: Elad Amit 
Date: Mon, 19 Sep 2016 09:48:21 +0300
Subject: [PATCH 125/126] updated contributors, and bumping versions

---
 CONTRIBUTORS                                   | 1 +
 logstash-output-cassandra.gemspec              | 4 ++--
 spec/integration/outputs/integration_helper.rb | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index a6f4374..16d37b9 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -5,3 +5,4 @@ Contributors:
 * Oleg Tokarev (otokarev)
 * Elad Amit (eladamitpxi, amitelad7)
 * Valentin Fischer (valentinul)
+* Pitsanu Swangpheaw (roongr2k7)
diff --git a/logstash-output-cassandra.gemspec b/logstash-output-cassandra.gemspec
index c5414cf..1828d62 100644
--- a/logstash-output-cassandra.gemspec
+++ b/logstash-output-cassandra.gemspec
@@ -1,7 +1,7 @@
 Gem::Specification.new do |s|
 
   s.name            = 'logstash-output-cassandra'
-  s.version         = '0.9.1'
+  s.version         = '1.0.0'
   s.licenses        = [ 'Apache License (2.0)' ]
   s.summary         = 'Store events into Cassandra'
   s.description     = 'This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program'
@@ -21,7 +21,7 @@ Gem::Specification.new do |s|
   # Gem dependencies
   s.add_runtime_dependency 'concurrent-ruby'
   s.add_runtime_dependency 'logstash-core', '>= 2.0.0', '< 3.0.0'
-  s.add_runtime_dependency 'cassandra-driver', '3.0.0.rc.2'
+  s.add_runtime_dependency 'cassandra-driver', '>= 3.0.0'
   s.add_development_dependency 'cabin', ['~> 0.6']
   s.add_development_dependency 'longshoreman'
   s.add_development_dependency 'logstash-devutils'
diff --git a/spec/integration/outputs/integration_helper.rb b/spec/integration/outputs/integration_helper.rb
index 0d5da46..5de7844 100644
--- a/spec/integration/outputs/integration_helper.rb
+++ b/spec/integration/outputs/integration_helper.rb
@@ -5,7 +5,7 @@
 
 CONTAINER_NAME = "logstash-output-cassandra-#{rand(999).to_s}"
 CONTAINER_IMAGE = 'cassandra'
-CONTAINER_TAG = '3.5'
+CONTAINER_TAG = '3'
 
 module CassandraHelper
   def get_host_ip

From e4309515542c72649d19c5b8dcc84337c39102d4 Mon Sep 17 00:00:00 2001
From: Elad Amit 
Date: Sun, 30 Oct 2016 15:58:17 +0200
Subject: [PATCH 126/126] updated with logstash 5 APIs

---
 .../outputs/cassandra/event_parser.rb         |  4 +-
 logstash-output-cassandra.gemspec             |  9 +--
 spec/unit/outputs/event_parser_spec.rb        | 60 +++++++++----------
 3 files changed, 35 insertions(+), 38 deletions(-)

diff --git a/lib/logstash/outputs/cassandra/event_parser.rb b/lib/logstash/outputs/cassandra/event_parser.rb
index 8c9f148..8ba8d21 100644
--- a/lib/logstash/outputs/cassandra/event_parser.rb
+++ b/lib/logstash/outputs/cassandra/event_parser.rb
@@ -40,7 +40,7 @@ def parse(event)
     def get_filter_transform(event)
       filter_transform = nil
       if @filter_transform_event_key
-        filter_transform = event[@filter_transform_event_key]
+        filter_transform = event.get(@filter_transform_event_key)
         assert_filter_transform_structure(filter_transform)
       elsif @filter_transform.length > 0
         filter_transform = @filter_transform
@@ -59,7 +59,7 @@ def assert_filter_transform_structure(filter_transform)
     def add_event_value_from_filter_to_action(event, filter, action)
       event_data = event.sprintf(filter['event_key'])
       unless filter.fetch('expansion_only', false)
-        event_data = event[event_data]
+        event_data = event.get(event_data)
       end
       if filter.has_key?('cassandra_type')
         cassandra_type = event.sprintf(filter['cassandra_type'])
diff --git a/logstash-output-cassandra.gemspec b/logstash-output-cassandra.gemspec
index 1828d62..7c760df 100644
--- a/logstash-output-cassandra.gemspec
+++ b/logstash-output-cassandra.gemspec
@@ -1,7 +1,7 @@
 Gem::Specification.new do |s|
 
   s.name            = 'logstash-output-cassandra'
-  s.version         = '1.0.0'
+  s.version         = '5.0.0'
   s.licenses        = [ 'Apache License (2.0)' ]
   s.summary         = 'Store events into Cassandra'
   s.description     = 'This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program'
@@ -20,16 +20,13 @@ Gem::Specification.new do |s|
 
   # Gem dependencies
   s.add_runtime_dependency 'concurrent-ruby'
-  s.add_runtime_dependency 'logstash-core', '>= 2.0.0', '< 3.0.0'
+  s.add_runtime_dependency 'logstash-core-plugin-api', '>= 1.60', '<= 2.99'
   s.add_runtime_dependency 'cassandra-driver', '>= 3.0.0'
-  s.add_development_dependency 'cabin', ['~> 0.6']
+  s.add_development_dependency 'cabin'
   s.add_development_dependency 'longshoreman'
   s.add_development_dependency 'logstash-devutils'
   s.add_development_dependency 'logstash-codec-plain'
   s.add_development_dependency 'simplecov'
   s.add_development_dependency 'simplecov-rcov'
-  s.add_development_dependency 'unparser', '0.2.4'
-  s.add_development_dependency 'metric_fu'
-  s.add_development_dependency 'coveralls'
   s.add_development_dependency 'gems'
 end
diff --git a/spec/unit/outputs/event_parser_spec.rb b/spec/unit/outputs/event_parser_spec.rb
index 3477458..d172785 100644
--- a/spec/unit/outputs/event_parser_spec.rb
+++ b/spec/unit/outputs/event_parser_spec.rb
@@ -29,7 +29,7 @@
 
     it 'allows for string expansion in table names' do
       sut_instance = sut.new(default_opts.update({ 'table' => '%{[a_field]}' }))
-      sample_event['a_field'] = 'a_value'
+      sample_event.set('a_field', 'a_value')
 
       action = sut_instance.parse(sample_event)
 
@@ -52,7 +52,7 @@
       describe 'properly configured' do
         it 'maps the event key to the column' do
           sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column' }] }))
-          sample_event['a_field'] = 'a_value'
+          sample_event.set('a_field', 'a_value')
 
           action = sut_instance.parse(sample_event)
 
@@ -61,8 +61,8 @@
 
         it 'works with multiple filter transforms' do
           sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column' }, { 'event_key' => 'another_field', 'column_name' => 'a_different_column' }] }))
-          sample_event['a_field'] = 'a_value'
-          sample_event['another_field'] = 'a_second_value'
+          sample_event.set('a_field', 'a_value')
+          sample_event.set('another_field', 'a_second_value')
 
           action = sut_instance.parse(sample_event)
 
@@ -72,8 +72,8 @@
 
         it 'allows for string expansion in event keys' do
           sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => '%{[pointer_to_another_field]}', 'column_name' => 'a_column' }] }))
-          sample_event['pointer_to_another_field'] = 'another_field'
-          sample_event['another_field'] = 'a_value'
+          sample_event.set('pointer_to_another_field', 'another_field')
+          sample_event.set('another_field', 'a_value')
 
           action = sut_instance.parse(sample_event)
 
@@ -91,8 +91,8 @@
 
         it 'allows for string expansion in column names' do
           sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => '%{[pointer_to_another_field]}' }] }))
-          sample_event['a_field'] = 'a_value'
-          sample_event['pointer_to_another_field'] = 'a_different_column'
+          sample_event.set('a_field', 'a_value')
+          sample_event.set('pointer_to_another_field', 'a_different_column')
 
           action = sut_instance.parse(sample_event)
 
@@ -103,7 +103,7 @@
 
       describe 'cassandra type mapping' do
         [
-          { :name => 'timestamp', :type => ::Cassandra::Types::Timestamp, :value => Time::parse('1979-07-27 00:00:00 +0300') },
+          { :name => 'timestamp', :type => ::Cassandra::Types::Timestamp, :value => Time::parse('1979-07-27 00:00:00 +0000'), expected: Time::parse('1979-07-27 00:00:00 +0000').utc},
           { :name => 'timestamp', :type => ::Cassandra::Types::Timestamp, :value => '1982-05-04 00:00:00 +0300', expected: Time::parse('1982-05-04 00:00:00 +0300') },
           { :name => 'timestamp', :type => ::Cassandra::Types::Timestamp, :value => 1457606758, expected: Time.at(1457606758) },
           { :name => 'inet',      :type => ::Cassandra::Types::Inet,      :value => '0.0.0.0' },
@@ -123,7 +123,7 @@
           # NOTE: this is not the best test there is, but it is the best / simplest I could think of :/
           it "properly maps #{mapping[:name]} to #{mapping[:type]}" do
             sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column', 'cassandra_type' => mapping[:name] }] }))
-            sample_event['a_field'] = mapping[:value]
+            sample_event.set('a_field', mapping[:value])
 
             action = sut_instance.parse(sample_event)
 
@@ -135,7 +135,7 @@
         it 'properly maps sets to their specific set types' do
           sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column', 'cassandra_type' => 'set' }] }))
           original_value = [ 1, 2, 3 ]
-          sample_event['a_field'] = original_value
+          sample_event.set('a_field', original_value)
 
           action = sut_instance.parse(sample_event)
 
@@ -145,7 +145,7 @@
         it 'properly maps sets to their specific set types for type which also require actual conversion' do
           sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column', 'cassandra_type' => 'set' }] }))
           original_value = %w(00000000-0000-0000-0000-000000000000 00000000-0000-0000-0000-000000000001 00000000-0000-0000-0000-000000000002)
-          sample_event['a_field'] = original_value
+          sample_event.set('a_field', original_value)
 
           action = sut_instance.parse(sample_event)
 
@@ -157,8 +157,8 @@
 
         it 'allows for string expansion in cassandra types' do
           sut_instance = sut.new(default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column', 'cassandra_type' => '%{[pointer_to_a_field]}' }] }))
-          sample_event['a_field'] = '123'
-          sample_event['pointer_to_a_field'] = 'int'
+          sample_event.set('a_field', '123')
+          sample_event.set('pointer_to_a_field', 'int')
 
           action = sut_instance.parse(sample_event)
 
@@ -168,7 +168,7 @@
         it 'fails in case of an unknown type' do
           options = default_opts.update({ 'filter_transform' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column', 'cassandra_type' => 'what?!' }] })
           sut_instance = sut.new(options)
-          sample_event['a_field'] = 'a_value'
+          sample_event.set('a_field', 'a_value')
           expect(options['logger']).to(receive(:error)).at_least(:once)
 
           result = sut_instance.parse(sample_event)
@@ -180,8 +180,8 @@
     describe 'from event' do
       it 'obtains the filter transform from the event if defined' do
         sut_instance = sut.new(default_opts.update({ 'filter_transform_event_key' => 'an_event_filter' }))
-        sample_event['a_field'] = 'a_value'
-        sample_event['an_event_filter'] = [{ 'event_key' => 'a_field', 'column_name' => 'a_column' }]
+        sample_event.set('a_field', 'a_value')
+        sample_event.set('an_event_filter', [{ 'event_key' => 'a_field', 'column_name' => 'a_column' }])
 
         action = sut_instance.parse(sample_event)
 
@@ -190,8 +190,8 @@
 
       it 'obtains the filter transform from the event even when it is in the metadata' do
         sut_instance = sut.new(default_opts.update({ 'filter_transform_event_key' => '[@metadata][the_filter]' }))
-        sample_event['a_field'] = 'a_value'
-        sample_event['@metadata'] = { 'the_filter' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column' }] }
+        sample_event.set('a_field', 'a_value')
+        sample_event.set('@metadata', { 'the_filter' => [{ 'event_key' => 'a_field', 'column_name' => 'a_column' }] })
 
         action = sut_instance.parse(sample_event)
 
@@ -203,8 +203,8 @@
   describe 'hints' do
     it 'removes fields starting with @' do
       sut_instance = sut.new(default_opts.update({ 'hints' => {} }))
-      sample_event['leave'] = 'a_value'
-      sample_event['@remove'] = 'another_value'
+      sample_event.set('leave', 'a_value')
+      sample_event.set('@remove', 'another_value')
 
       action = sut_instance.parse(sample_event)
 
@@ -215,18 +215,18 @@
     it 'does not attempt to change items with no hints' do
       sut_instance = sut.new(default_opts.update({ 'hints' => {} }))
       expected_value = [ 1, 2, 3 ]
-      sample_event['no_hint_here'] = expected_value
+      sample_event.set('no_hint_here', expected_value)
 
       action = sut_instance.parse(sample_event)
 
-      expect(action['data']['no_hint_here']).to(equal(expected_value))
+      expect(action['data']['no_hint_here']).to(match_array(expected_value))
     end
 
     it 'converts items with hints' do
       sut_instance = sut.new(default_opts.update({ 'hints' => { 'a_set' => 'set', 'an_int' => 'int' } }))
       original_set = [ 1, 2, 3 ]
-      sample_event['a_set'] = original_set
-      sample_event['an_int'] = '123'
+      sample_event.set('a_set', original_set)
+      sample_event.set('an_int', '123')
 
       action = sut_instance.parse(sample_event)
 
@@ -239,7 +239,7 @@
       options = default_opts.update({ 'hints' => { 'a_field' => 'not_a_real_type' } })
       sut_instance = sut.new(options)
       expect(options['logger']).to(receive(:error)).at_least(:once)
-      sample_event['a_field'] = 'a value'
+      sample_event.set('a_field', 'a value')
 
       result = sut_instance.parse(sample_event)
 
@@ -250,7 +250,7 @@
       options = default_opts.update({ 'hints' => { 'a_field' => 'int' } })
       expect(options['logger']).to(receive(:error)).at_least(:once)
       sut_instance = sut.new(options)
-      sample_event['a_field'] = 'i am not an int!!!'
+      sample_event.set('a_field', 'i am not an int!!!')
 
       result = sut_instance.parse(sample_event)
 
@@ -275,7 +275,7 @@
         options = default_opts.update({ 'ignore_bad_values' => true, 'hints' => { 'a_field' => mapping[:name] } })
         expect(options['logger']).to(receive(:warn))
         sut_instance = sut.new(options)
-        sample_event['a_field'] = mapping[:value]
+        sample_event.set('a_field', mapping[:value])
 
         action = sut_instance.parse(sample_event)
 
@@ -287,7 +287,7 @@
       options = default_opts.update({ 'ignore_bad_values' => true, 'hints' => { 'a_field' => 'set' } })
       expect(options['logger']).to(receive(:warn))
       sut_instance = sut.new(options)
-      sample_event['a_field'] = 'i am not a set'
+      sample_event.set('a_field', 'i am not a set')
 
       action = sut_instance.parse(sample_event)
 
@@ -298,7 +298,7 @@
     it 'raises an ArgumentError in case we try to default a type we dont know' do
       options = default_opts.update({ 'ignore_bad_values' => true, 'hints' => { 'a_field' => 'map' } })
       sut_instance = sut.new(options)
-      sample_event['a_field'] = 'i am not a set'
+      sample_event.set('a_field', 'i am not a set')
       expect(options['logger']).to(receive(:error))
 
       result = sut_instance.parse(sample_event)