Skip to content

Commit

Permalink
Handle force_encoding issue, according to #73
Browse files Browse the repository at this point in the history
  • Loading branch information
xijo committed Oct 2, 2019
1 parent d18fd18 commit 40e0229
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 8 deletions.
16 changes: 9 additions & 7 deletions lib/reverse_markdown.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,17 @@
module ReverseMarkdown

def self.convert(input, options = {})
root = case input
when String then Nokogiri::HTML(input).root
when Nokogiri::XML::Document then input.root
when Nokogiri::XML::Node then input
end
config.with(options) do
input = cleaner.force_encoding(input.to_s)

root or return ''
root = case input
when String then Nokogiri::HTML(input).root
when Nokogiri::XML::Document then input.root
when Nokogiri::XML::Node then input
end

root or return ''

config.with(options) do
result = ReverseMarkdown::Converters.lookup(root.name).convert(root)
cleaner.tidy(result)
end
Expand Down
5 changes: 5 additions & 0 deletions lib/reverse_markdown/cleaner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ def clean_punctuation_characters(string)
string.gsub(/(\*\*|~~|__)\s([\.!\?'"])/, "\\1".strip + "\\2")
end

def force_encoding(string)
ReverseMarkdown.config.force_encoding or return string
string.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
end

private

def preserve_border_whitespaces(string, options = {}, &block)
Expand Down
7 changes: 6 additions & 1 deletion lib/reverse_markdown/config.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
module ReverseMarkdown
class Config
attr_accessor :unknown_tags, :github_flavored, :tag_border
attr_accessor :unknown_tags, :github_flavored, :tag_border, :force_encoding

def initialize
@unknown_tags = :pass_through
@github_flavored = false
@force_encoding = false
@em_delimiter = '_'.freeze
@strong_delimiter = '**'.freeze
@inline_options = {}
Expand All @@ -29,5 +30,9 @@ def github_flavored
def tag_border
@inline_options[:tag_border] || @tag_border
end

def force_encoding
@inline_options[:force_encoding] || @force_encoding
end
end
end
10 changes: 10 additions & 0 deletions spec/lib/reverse_markdown_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,15 @@
end
expect(ReverseMarkdown.config.github_flavored).to eq true
end

describe 'force_encoding option' do
it 'raises invalid byte sequence in UTF-8 exception' do
expect { ReverseMarkdown.convert("hi \255") }.to raise_error(ArgumentError)
end

it 'handles invalid byte sequence if option is set' do
expect(ReverseMarkdown.convert("hi \255", force_encoding: true)).to eq "hi\n\n"
end
end
end
end

0 comments on commit 40e0229

Please sign in to comment.