Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Echo/fix omission with break token #1

Merged
merged 2 commits into from
Apr 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions lib/truncate_html/html_truncator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def truncate
return @omission if @chars_remaining < 0

@original_html.html_tokens.each do |token|
if @chars_remaining <= 0 || truncate_token?(token)
if @chars_remaining <= 0
close_open_tags
break
else
Expand Down Expand Up @@ -49,18 +49,21 @@ def build_output
end

def process_token(token)
append_to_result(token)
if token.html_tag?
append_to_result(token) if !truncate_token?(token)
if truncate_token?(token)
@chars_remaining = 0
elsif token.html_tag?
if token.open_tag?
@open_tags << token
else
remove_latest_open_tag(token)
end
elsif !token.html_comment?
@chars_remaining -= (@word_boundary ? token.length : token[0, @chars_remaining].length)
if @chars_remaining <= 0
@truncated_html[-1] = @truncated_html[-1].rstrip + @omission
end
end

if @chars_remaining <= 0
@truncated_html[-1] = @truncated_html[-1].rstrip + @omission
end
end

Expand Down
56 changes: 31 additions & 25 deletions spec/truncate_html/html_truncator_spec.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# Encoding: UTF-8
require File.join(File.dirname(__FILE__), '..', 'spec_helper')

describe TruncateHtml::HtmlTruncator do

def truncate(html, opts = {})
html_string = TruncateHtml::HtmlString.new(html)
TruncateHtml::HtmlTruncator.new(html_string, opts).truncate
Expand All @@ -24,13 +22,13 @@ def truncate(html, opts = {})
context 'and a custom omission value is passed' do
it 'retains the omission text' do
expect(
truncate("testtest", length: 10, omission: '..', word_boundary: false)
truncate('testtest', length: 10, omission: '..', word_boundary: false)
).to eq 'testtest..'
end

it 'handles multibyte characters' do
expect(
truncate("prüfenprüfen", length: 8, omission: '..', word_boundary: false)
truncate('prüfenprüfen', length: 8, omission: '..', word_boundary: false)
).to eq 'prüfen..'
end
end
Expand All @@ -54,25 +52,25 @@ def truncate(html, opts = {})
it 'is respectful of closing tags' do
expect(truncate('<p>hmmm this <em>should</em> be okay. I think...</p>',
length: 28, omission: '', word_boundary: /\S[\.\?\!]/)).
to eq "<p>hmmm this <em>should</em> be okay.</p>"
to eq '<p>hmmm this <em>should</em> be okay.</p>'
end
end

it "includes the omission text's length in the returned truncated html" do
expect(truncate('a b c', length: 4, omission: '...')).to eq 'a...'
end

it "includes omission even on the edge (issue #18)" do
it 'includes omission even on the edge (issue #18)' do
opts = { word_boundary: false, length: 12 }
expect(truncate('One two three', opts)).to eq 'One two t...'
end

it "never returns a string longer than :length" do
expect(truncate("test this shit", length: 10)).to eq 'test...'
it 'never returns a string longer than :length' do
expect(truncate('test this shit', length: 10)).to eq 'test...'
end

it 'supports omissions longer than the maximum length' do
expect{ truncate('', length: 1, omission: '...') }.to_not raise_error
expect { truncate('', length: 1, omission: '...') }.to_not raise_error
end

it 'returns the omission when the specified length is smaller than the omission' do
Expand Down Expand Up @@ -110,24 +108,23 @@ def truncate(html, opts = {})
end

it 'handles multibyte characters and leaves them in the result' do
html = '<p>Look at our multibyte characters ā ž <a href = "awesomeful.net">this</a> link for randomness ā ž</p>'
html = '<p>Look at our multibyte characters ā ž <a href = "awesomeful.net">this</a> link for randomness ā ž</p>'
expect(truncate(html, length: html.length)).to eq html
end

#unusual, but just covering my ass
# unusual, but just covering my ass
it 'recognizes the multiline html properly' do
html = <<-END_HTML
<div id="foo"
class="bar">
This is ugly html.
</div>
html = <<~END_HTML
<div id="foo"
class="bar">
This is ugly html.
</div>
END_HTML
expect(truncate(html, length: 12)).to eq ' <div id="foo" class="bar"> This is...</div>'
end

%w(br hr img).each do |unpaired_tag|
%w[br hr img].each do |unpaired_tag|
context "when the html contains a #{unpaired_tag} tag" do

context "and the #{unpaired_tag} does not have the closing slash" do
it "does not close the #{unpaired_tag} tag" do
html = "<div>Some before. <#{unpaired_tag}>and some after</div>"
Expand All @@ -145,7 +142,6 @@ def truncate(html, opts = {})
expect(truncate(html_caps, length: 19)).to eq "<div>Some before. <#{unpaired_tag.capitalize} />and...</div>"
end
end

end
end

Expand All @@ -159,8 +155,8 @@ def truncate(html, opts = {})
“我现在使用的是中文的拼音。”<br>
测试一下具体的truncate</em>html功能。</p>"

expect(truncate(html, omission: "", length: 50)).
to include "<p>“我现在使用的是中文的拼音。”<br>"
expect(truncate(html, omission: '', length: 50)).
to include '<p>“我现在使用的是中文的拼音。”<br>'
end

context 'when the break_token option is set as <!-- truncate -->' do
Expand All @@ -177,7 +173,7 @@ def truncate(html, opts = {})
it 'truncates before the length param if the break_token is before the token at "length"' do
expect(truncate('This is line one. <!-- truncate --> This is line two.',
length: 30, break_token: '<!-- truncate -->')).
to eq 'This is line one.'
to eq 'This is line one....'
end
end

Expand All @@ -195,7 +191,7 @@ def truncate(html, opts = {})
it 'truncates before the length param if the break_token is before the token at "length"' do
expect(truncate('This is line one. <!-- break --> This is line two.',
length: 30, break_token: '<!-- break -->')).
to eq 'This is line one.'
to eq 'This is line one....'
end
end

Expand All @@ -213,7 +209,7 @@ def truncate(html, opts = {})
it 'truncates before the length param if the break_token is before the token at "length"' do
expect(truncate('This is line one. <break /> This is line two.',
length: 30, break_token: '<break />')).
to eq 'This is line one.'
to eq 'This is line one....'
end
end

Expand All @@ -231,7 +227,7 @@ def truncate(html, opts = {})
it 'truncates before the length param if the break_token is before the token at "length"' do
expect(truncate('This is line one. foobar This is line two.',
length: 30, break_token: 'foobar')).
to eq 'This is line one.'
to eq 'This is line one....'
end
end

Expand All @@ -242,4 +238,14 @@ def truncate(html, opts = {})
to eq '<h1>hello <!-- stuff --> and <!-- la -->...</h1>'
end
end

context 'when the break_token and a custom omission options are used' do
it 'includes the custom omission after the truncation' do
expect(truncate('This is the time to truncate this. Do it properly!',
length: 50,
break_token: 'truncate',
omission: ' <a href="path">MORE</a>')).
to eq 'This is the time to <a href="path">MORE</a>'
end
end
end