diff --git a/lib/truncate_html/html_string.rb b/lib/truncate_html/html_string.rb index 76d82e9..1b31fdf 100644 --- a/lib/truncate_html/html_string.rb +++ b/lib/truncate_html/html_string.rb @@ -3,7 +3,8 @@ module TruncateHtml class HtmlString < String UNPAIRED_TAGS = %w(br hr img).freeze - REGEX = /(?:.*<\/script>)+|<\/?[^>]+>|[[[:alpha:]][0-9]\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'²³§",\.\/?]+|\s+|[[:punct:]]/.freeze + REGEX = /(?:.*<\/script>)+|<\/?[^>]+>|[[[:alpha:]][0-9]\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'²³§",\.\/?]+|\s+|[[:punct:]]/.freeze + HTMLTAGS = /]*>([\s\S]*?)<\/script>|<("[^"]*"|'[^']*'|[^'">])*>/.freeze def initialize(original_html) super(original_html) @@ -11,13 +12,7 @@ def initialize(original_html) def html_tokens scan(REGEX).map do |token| - HtmlString.new( - token.gsub( - /\n/,' ' #replace newline characters with a whitespace - ).gsub( - /\s+/, ' ' #clean out extra consecutive whitespace - ) - ) + HtmlString.new(token).replace_newline.clean_whitespaces end end @@ -37,5 +32,17 @@ def matching_close_tag gsub(/<(\w+)\s?.*>/, '').strip end + def clean_html + gsub(HTMLTAGS, '').replace_newline.clean_whitespaces + end + + def replace_newline + gsub(/\n/, ' ') + end + + def clean_whitespaces + gsub(/\s+/, ' ') + end + end end diff --git a/lib/truncate_html/html_truncator.rb b/lib/truncate_html/html_truncator.rb index 52b707f..d4edf4a 100644 --- a/lib/truncate_html/html_truncator.rb +++ b/lib/truncate_html/html_truncator.rb @@ -3,16 +3,17 @@ class HtmlTruncator def initialize(original_html, options = {}) @original_html = original_html - length = options[:length] || TruncateHtml.configuration.length + @length = options[:length] || TruncateHtml.configuration.length @omission = options[:omission] || TruncateHtml.configuration.omission @word_boundary = (options.has_key?(:word_boundary) ? options[:word_boundary] : TruncateHtml.configuration.word_boundary) @break_token = options[:break_token] || TruncateHtml.configuration.break_token || nil - @chars_remaining = length - @omission.length + @chars_remaining = @length - @omission.length @open_tags, @closing_tags, @truncated_html = [], [], [''] end def truncate return @omission if @chars_remaining < 0 + return @original_html if return_html? @original_html.html_tokens.each do |token| if @chars_remaining <= 0 || truncate_token?(token) close_open_tags @@ -93,5 +94,9 @@ def remove_latest_open_tag(close_tag) def truncate_token?(token) @break_token and token == @break_token end + + def return_html? + @original_html.clean_html.length <= @length && !@original_html.html_tokens.include?(@break_token) + end end end diff --git a/spec/truncate_html/html_string_spec.rb b/spec/truncate_html/html_string_spec.rb index 064f87b..cdbc7d8 100644 --- a/spec/truncate_html/html_string_spec.rb +++ b/spec/truncate_html/html_string_spec.rb @@ -79,4 +79,37 @@ def html_string(original_string) html_string('foo').should_not be_html_comment end end + + describe '#replace_newline' do + it 'returns the string with whitespaces instead of newlines' do + html = 'This is a string. +With newlines. +Dont want them.' + expected = 'This is a string. With newlines. Dont want them.' + + html_string(html).replace_newline.should == expected + end + end + + describe '#clean_whitespace' do + it 'returns the string with only single whitespaces' do + html = 'This is a string. With double white spaces.' + expected = 'This is a string. With double white spaces.' + html_string(html).clean_whitespaces.should == expected + end + end + + describe '#clean_html' do + it 'returns the html string without any html tags' do + html = 'This is bold.
This will show
' + expected = 'This is bold. This will show' + html_string(html).clean_html.should == expected + end + + it 'returns the html string without any comments' do + html = 'This is bold. And this will show' + expected = 'This is bold. And this will show' + html_string(html).clean_html.should == expected + end + end end diff --git a/spec/truncate_html/html_truncator_spec.rb b/spec/truncate_html/html_truncator_spec.rb index 0da14d6..faf3f82 100644 --- a/spec/truncate_html/html_truncator_spec.rb +++ b/spec/truncate_html/html_truncator_spec.rb @@ -15,12 +15,12 @@ def truncate(html, opts = {}) it 'retains the tags within the text' do html = 'some text CAPS some text' - truncate(html, :length => 25, :word_boundary => false).should == 'some text CAPS some te...' + truncate(html, :length => 19, :word_boundary => false).should == 'some text CAPS s...' end context 'and a custom omission value is passed' do it 'retains the omission text' do - truncate("testtest", :length => 10, :omission => '..', :word_boundary => false).should == 'testtest..' + truncate("testtest", :length => 7, :omission => '..', :word_boundary => false).should == 'testt..' end it 'handles multibyte characters' do @@ -204,4 +204,34 @@ def truncate(html, opts = {}) '

hello and ...

' end end + + context 'when the clean string length is the same than the length param' do + it 'does not truncate the string' do + html = 'exact string length' + truncate(html, length: 19).should == html + end + + it 'does not truncate the string even if it contains html tags' do + html = 'exact string length' + truncate(html, length: 19).should == html + end + + it 'does not truncate the string even if it contains html comments' do + html = 'exact string length' + truncate(html, length: 19).should == html + end + + context 'when the break_token is set' do + it 'truncates before the break_token if included in the string' do + html = 'exact string length' + expected = 'exact string' + truncate(html, length: 19, break_token: 'length').should == expected + end + + it 'does not truncate before if break token is not in the string' do + html = 'exact string length' + truncate(html, length: 19, break_token: 'nothere').should == html + end + end + end end