Skip to content

Commit

Permalink
feat: add fix_ghost_cards rewrite rule
Browse files Browse the repository at this point in the history
  • Loading branch information
jake-walker authored Jan 12, 2025
1 parent 1e54a07 commit 6cbe8c3
Show file tree
Hide file tree
Showing 3 changed files with 282 additions and 0 deletions.
52 changes: 52 additions & 0 deletions internal/reader/rewrite/rewrite_functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -455,3 +455,55 @@ func removeTables(entryContent string) string {
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output
}

func fixGhostCards(entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}

const cardSelector = "figure.kg-card"
var currentList *goquery.Selection

doc.Find(cardSelector).Each(func(i int, s *goquery.Selection) {
title := s.Find(".kg-bookmark-title").First().Text()
author := s.Find(".kg-bookmark-author").First().Text()
href := s.Find("a.kg-bookmark-container").First().AttrOr("href", "")

// if there is no link or title, skip processing
if href == "" || title == "" {
return
}

link := ""
if author == "" || strings.HasSuffix(title, author) {
link = fmt.Sprintf("<a href=\"%s\">%s</a>", href, title)
} else {
link = fmt.Sprintf("<a href=\"%s\">%s - %s</a>", href, title, author)
}

next := s.Next()

// if the next element is also a card, start a list
if next.Is(cardSelector) && currentList == nil {
currentList = s.BeforeHtml("<ul></ul>").Prev()
}

if currentList != nil {
// add this card to the list, then delete it
currentList.AppendHtml("<li>" + link + "</li>")
s.Remove()
} else {
// replace single card
s.ReplaceWithHtml(link)
}

// if the next element is not a card, start a new list
if !next.Is(cardSelector) && currentList != nil {
currentList = nil
}
})

output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return strings.TrimSpace(output)
}
2 changes: 2 additions & 0 deletions internal/reader/rewrite/rewriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ func (rule rule) applyRule(entryURL string, entry *model.Entry) {
entry.Content = removeTables(entry.Content)
case "remove_clickbait":
entry.Title = titlelize(entry.Title)
case "fix_ghost_cards":
entry.Content = fixGhostCards(entry.Content)
}
}

Expand Down
228 changes: 228 additions & 0 deletions internal/reader/rewrite/rewriter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -703,3 +703,231 @@ func TestAddImageTitle(t *testing.T) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}

func TestFixGhostCard(t *testing.T) {
testEntry := &model.Entry{
Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article">
<div class="kg-bookmark-content">
<div class="kg-bookmark-title">Example Article</div>
<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
<div class="kg-bookmark-metadata">
<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
<span class="kg-bookmark-author">Example</span>
<span class="kg-bookmark-publisher">Test Author</span>
</div>
</div>
<div class="kg-bookmark-thumbnail">
<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
</div>
</a>
</figure>`,
}

controlEntry := &model.Entry{
Title: `A title`,
Content: `<a href="https://example.org/article">Example Article - Example</a>`,
}
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)

if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}

func TestFixGhostCardNoCard(t *testing.T) {
testEntry := &model.Entry{
Title: `A title`,
Content: `<a href="https://example.org/article">Example Article - Example</a>`,
}

controlEntry := &model.Entry{
Title: `A title`,
Content: `<a href="https://example.org/article">Example Article - Example</a>`,
}
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)

if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}

func TestFixGhostCardInvalidCard(t *testing.T) {
testEntry := &model.Entry{
Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card">
<a href="https://example.org/article">This card does not have the required fields</a>
</figure>`,
}

controlEntry := &model.Entry{
Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card">
<a href="https://example.org/article">This card does not have the required fields</a>
</figure>`,
}
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)

if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}

func TestFixGhostCardMissingAuthor(t *testing.T) {
testEntry := &model.Entry{
Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article">
<div class="kg-bookmark-content">
<div class="kg-bookmark-title">Example Article</div>
<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
</div>
<div class="kg-bookmark-thumbnail">
<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
</div>
</a>
</figure>`,
}

controlEntry := &model.Entry{
Title: `A title`,
Content: `<a href="https://example.org/article">Example Article</a>`,
}
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)

if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}

func TestFixGhostCardDuplicatedAuthor(t *testing.T) {
testEntry := &model.Entry{
Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article">
<div class="kg-bookmark-content">
<div class="kg-bookmark-title">Example Article - Example</div>
<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
<div class="kg-bookmark-metadata">
<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
<span class="kg-bookmark-author">Example</span>
<span class="kg-bookmark-publisher">Test Author</span>
</div>
</div>
<div class="kg-bookmark-thumbnail">
<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
</div>
</a>
</figure>`,
}

controlEntry := &model.Entry{
Title: `A title`,
Content: `<a href="https://example.org/article">Example Article - Example</a>`,
}
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)

if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}

func TestFixGhostCardMultiple(t *testing.T) {
testEntry := &model.Entry{
Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article1">
<div class="kg-bookmark-content">
<div class="kg-bookmark-title">Example Article 1 - Example</div>
<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
<div class="kg-bookmark-metadata">
<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
<span class="kg-bookmark-author">Example</span>
<span class="kg-bookmark-publisher">Test Author</span>
</div>
</div>
<div class="kg-bookmark-thumbnail">
<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
</div>
</a>
</figure>
<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article2">
<div class="kg-bookmark-content">
<div class="kg-bookmark-title">Example Article 2 - Example</div>
<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
<div class="kg-bookmark-metadata">
<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
<span class="kg-bookmark-author">Example</span>
<span class="kg-bookmark-publisher">Test Author</span>
</div>
</div>
<div class="kg-bookmark-thumbnail">
<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
</div>
</a>
</figure>`,
}

controlEntry := &model.Entry{
Title: `A title`,
Content: `<ul><li><a href="https://example.org/article1">Example Article 1 - Example</a></li><li><a href="https://example.org/article2">Example Article 2 - Example</a></li></ul>`,
}
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)

if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}

func TestFixGhostCardMultipleSplit(t *testing.T) {
testEntry := &model.Entry{
Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article1">
<div class="kg-bookmark-content">
<div class="kg-bookmark-title">Example Article 1 - Example</div>
<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
<div class="kg-bookmark-metadata">
<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
<span class="kg-bookmark-author">Example</span>
<span class="kg-bookmark-publisher">Test Author</span>
</div>
</div>
<div class="kg-bookmark-thumbnail">
<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
</div>
</a>
</figure>
<p>This separates the two cards</p>
<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article2">
<div class="kg-bookmark-content">
<div class="kg-bookmark-title">Example Article 2 - Example</div>
<div class="kg-bookmark-description">Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...</div>
<div class="kg-bookmark-metadata">
<img class="kg-bookmark-icon" src="https://example.org/favicon.ico" alt="">
<span class="kg-bookmark-author">Example</span>
<span class="kg-bookmark-publisher">Test Author</span>
</div>
</div>
<div class="kg-bookmark-thumbnail">
<img src="https://example.org/article-image.jpg" alt="" onerror="this.style.display = 'none'">
</div>
</a>
</figure>`,
}

controlEntry := &model.Entry{
Title: `A title`,
Content: `<a href="https://example.org/article1">Example Article 1 - Example</a>
<p>This separates the two cards</p>
<a href="https://example.org/article2">Example Article 2 - Example</a>`,
}
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`)

if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}

0 comments on commit 6cbe8c3

Please sign in to comment.