From 630a8b0c90b287d0f43d1c2de10681c38eb2fd4f Mon Sep 17 00:00:00 2001 From: Makyen Date: Sat, 1 Apr 2023 11:41:15 -0700 Subject: [PATCH] So, this is one of those non-magic places in Ruby --- app/models/spam_wave.rb | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/app/models/spam_wave.rb b/app/models/spam_wave.rb index 9b070980..b5729f7e 100644 --- a/app/models/spam_wave.rb +++ b/app/models/spam_wave.rb @@ -65,11 +65,14 @@ def post_matches?(post, site_ids = nil) # We only care about everything matching. # Returning here saves testing the longer strings if a shorter one doesn't match. post_text = post.send(f.to_sym) - post_text = '' if post_text.nil? - # UTF-8 -> UTF-16 -> UTF-8 idea and code from [answer to: "ruby 1.9: invalid byte sequence in UTF-8"](https://stackoverflow.com/a/8873922) - # by [RubenLaguna](https://stackoverflow.com/users/90580/rubenlaguna), which is under a CC BY-SA 3.0 license. - post_text = post_text.encode!('UTF-16', 'UTF-8', invalid: :replace, replace: '') - post_text = post_text.encode!('UTF-8', 'UTF-16') + if post_text.nil? + post_text = '' + else + # UTF-8 -> UTF-16 -> UTF-8 idea and code from [answer to: "ruby 1.9: invalid byte sequence in UTF-8"](https://stackoverflow.com/a/8873922) + # by [RubenLaguna](https://stackoverflow.com/users/90580/rubenlaguna), which is under a CC BY-SA 3.0 license. + post_text.encode!('UTF-16', 'UTF-8', invalid: :replace, replace: '') + post_text.encode!('UTF-8', 'UTF-16') + end Rails.logger.debug "[spam-wave] id: #{id}: #{name}:: post #{f}: encoding: #{post_text.encoding}" return false unless regex.match?(post_text) Rails.logger.debug "[spam-wave] id: #{id}: #{name}:: post_matches?: #{f}: MATCHES: post id: #{post.id}: #{post.title}"