Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add rewrite rule for Ghost cards #3077

Merged
merged 3 commits into from
Jan 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions internal/reader/rewrite/rewrite_functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -455,3 +455,55 @@ func removeTables(entryContent string) string {
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output
}

func fixGhostCards(entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}

const cardSelector = "figure.kg-card"
var currentList *goquery.Selection

doc.Find(cardSelector).Each(func(i int, s *goquery.Selection) {
title := s.Find(".kg-bookmark-title").First().Text()
author := s.Find(".kg-bookmark-author").First().Text()
href := s.Find("a.kg-bookmark-container").First().AttrOr("href", "")

// if there is no link or title, skip processing
if href == "" || title == "" {
return
}

link := ""
if author == "" || strings.HasSuffix(title, author) {
link = fmt.Sprintf("<a href=\"%s\">%s</a>", href, title)
} else {
link = fmt.Sprintf("<a href=\"%s\">%s - %s</a>", href, title, author)
}

next := s.Next()

// if the next element is also a card, start a list
if next.Is(cardSelector) && currentList == nil {
currentList = s.BeforeHtml("<ul></ul>").Prev()
}

if currentList != nil {
// add this card to the list, then delete it
currentList.AppendHtml("<li>" + link + "</li>")
s.Remove()
} else {
// replace single card
s.ReplaceWithHtml(link)
}

// if the next element is not a card, start a new list
if !next.Is(cardSelector) && currentList != nil {
currentList = nil
}
})

output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return strings.TrimSpace(output)
}
2 changes: 2 additions & 0 deletions internal/reader/rewrite/rewriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ func (rule rule) applyRule(entryURL string, entry *model.Entry) {
entry.Content = removeTables(entry.Content)
case "remove_clickbait":
entry.Title = titlelize(entry.Title)
case "fix_ghost_cards":
entry.Content = fixGhostCards(entry.Content)
}
}

Expand Down
228 changes: 228 additions & 0 deletions internal/reader/rewrite/rewriter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -703,3 +703,231 @@ func TestAddImageTitle(t *testing.T) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}

func TestFixGhostCard(t *testing.T) {
testEntry := &model.Entry{
Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article">
<div class="kg-bookmark-content">
<div class="kg-bookmark-title">Example Article</div>
<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
<div class="kg-bookmark-metadata">
<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
<span class="kg-bookmark-author">Example</span>
<span class="kg-bookmark-publisher">Test Author</span>
</div>
</div>
<div class="kg-bookmark-thumbnail">
<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
</div>
</a>
</figure>`,
}

controlEntry := &model.Entry{
Title: `A title`,
Content: `<a href="https://example.org/article">Example Article - Example</a>`,
}
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)

if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}

func TestFixGhostCardNoCard(t *testing.T) {
testEntry := &model.Entry{
Title: `A title`,
Content: `<a href="https://example.org/article">Example Article - Example</a>`,
}

controlEntry := &model.Entry{
Title: `A title`,
Content: `<a href="https://example.org/article">Example Article - Example</a>`,
}
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)

if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}

func TestFixGhostCardInvalidCard(t *testing.T) {
testEntry := &model.Entry{
Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card">
<a href="https://example.org/article">This card does not have the required fields</a>
</figure>`,
}

controlEntry := &model.Entry{
Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card">
<a href="https://example.org/article">This card does not have the required fields</a>
</figure>`,
}
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)

if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}

func TestFixGhostCardMissingAuthor(t *testing.T) {
testEntry := &model.Entry{
Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article">
<div class="kg-bookmark-content">
<div class="kg-bookmark-title">Example Article</div>
<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
</div>
<div class="kg-bookmark-thumbnail">
<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
</div>
</a>
</figure>`,
}

controlEntry := &model.Entry{
Title: `A title`,
Content: `<a href="https://example.org/article">Example Article</a>`,
}
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)

if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}

func TestFixGhostCardDuplicatedAuthor(t *testing.T) {
testEntry := &model.Entry{
Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article">
<div class="kg-bookmark-content">
<div class="kg-bookmark-title">Example Article - Example</div>
<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
<div class="kg-bookmark-metadata">
<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
<span class="kg-bookmark-author">Example</span>
<span class="kg-bookmark-publisher">Test Author</span>
</div>
</div>
<div class="kg-bookmark-thumbnail">
<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
</div>
</a>
</figure>`,
}

controlEntry := &model.Entry{
Title: `A title`,
Content: `<a href="https://example.org/article">Example Article - Example</a>`,
}
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)

if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}

func TestFixGhostCardMultiple(t *testing.T) {
testEntry := &model.Entry{
Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article1">
<div class="kg-bookmark-content">
<div class="kg-bookmark-title">Example Article 1 - Example</div>
<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
<div class="kg-bookmark-metadata">
<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
<span class="kg-bookmark-author">Example</span>
<span class="kg-bookmark-publisher">Test Author</span>
</div>
</div>
<div class="kg-bookmark-thumbnail">
<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
</div>
</a>
</figure>
<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article2">
<div class="kg-bookmark-content">
<div class="kg-bookmark-title">Example Article 2 - Example</div>
<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
<div class="kg-bookmark-metadata">
<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
<span class="kg-bookmark-author">Example</span>
<span class="kg-bookmark-publisher">Test Author</span>
</div>
</div>
<div class="kg-bookmark-thumbnail">
<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
</div>
</a>
</figure>`,
}

controlEntry := &model.Entry{
Title: `A title`,
Content: `<ul><li><a href="https://example.org/article1">Example Article 1 - Example</a></li><li><a href="https://example.org/article2">Example Article 2 - Example</a></li></ul>`,
}
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)

if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}

func TestFixGhostCardMultipleSplit(t *testing.T) {
testEntry := &model.Entry{
Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article1">
<div class="kg-bookmark-content">
<div class="kg-bookmark-title">Example Article 1 - Example</div>
<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
<div class="kg-bookmark-metadata">
<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
<span class="kg-bookmark-author">Example</span>
<span class="kg-bookmark-publisher">Test Author</span>
</div>
</div>
<div class="kg-bookmark-thumbnail">
<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
</div>
</a>
</figure>
<p>This separates the two cards</p>
<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article2">
<div class="kg-bookmark-content">
<div class="kg-bookmark-title">Example Article 2 - Example</div>
<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
<div class="kg-bookmark-metadata">
<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
<span class="kg-bookmark-author">Example</span>
<span class="kg-bookmark-publisher">Test Author</span>
</div>
</div>
<div class="kg-bookmark-thumbnail">
<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
</div>
</a>
</figure>`,
}

controlEntry := &model.Entry{
Title: `A title`,
Content: `<a href="https://example.org/article1">Example Article 1 - Example</a>
<p>This separates the two cards</p>
<a href="https://example.org/article2">Example Article 2 - Example</a>`,
}
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)

if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
Loading