From 78f6e48515c7920b44356efa8b36ac034ee848f9 Mon Sep 17 00:00:00 2001 From: Jake Walker Date: Sat, 11 Jan 2025 20:11:02 +0000 Subject: [PATCH 1/3] feat: ghost card rewrite rule --- internal/reader/rewrite/rewrite_functions.go | 26 ++++++++++++++++++++ internal/reader/rewrite/rewriter.go | 2 ++ 2 files changed, 28 insertions(+) diff --git a/internal/reader/rewrite/rewrite_functions.go b/internal/reader/rewrite/rewrite_functions.go index 1b48eb9b388..b8f4a5cc857 100644 --- a/internal/reader/rewrite/rewrite_functions.go +++ b/internal/reader/rewrite/rewrite_functions.go @@ -455,3 +455,29 @@ func removeTables(entryContent string) string { output, _ := doc.FindMatcher(goquery.Single("body")).Html() return output } + +func fixGhostCards(entryContent string) string { + doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent)) + if err != nil { + return entryContent + } + + doc.Find("figure.kg-card").Each(func(i int, s *goquery.Selection) { + title := s.Find(".kg-bookmark-title").First().Text() + author := s.Find(".kg-bookmark-author").First().Text() + href := s.Find("a.kg-bookmark-container").First().AttrOr("href", "") + + if href == "" { + return + } + + if author == "" || strings.HasSuffix(title, author) { + s.SetHtml(fmt.Sprintf("%s", href, title)) + } else { + s.SetHtml(fmt.Sprintf("%s - %s", href, title, author)) + } + }) + + output, _ := doc.FindMatcher(goquery.Single("body")).Html() + return output +} diff --git a/internal/reader/rewrite/rewriter.go b/internal/reader/rewrite/rewriter.go index e2c26b6c6f9..35395ac9c8a 100644 --- a/internal/reader/rewrite/rewriter.go +++ b/internal/reader/rewrite/rewriter.go @@ -92,6 +92,8 @@ func (rule rule) applyRule(entryURL string, entry *model.Entry) { entry.Content = removeTables(entry.Content) case "remove_clickbait": entry.Title = titlelize(entry.Title) + case "fix_ghost_cards": + entry.Content = fixGhostCards(entry.Content) } } From 992f29eedba76556d4712bfc8a7ddf8508c98e6a Mon Sep 17 00:00:00 2001 From: Jake Walker Date: Sun, 12 Jan 2025 12:08:49 +0000 Subject: [PATCH 2/3] test(rewriter): add tests for ghost card rule --- internal/reader/rewrite/rewrite_functions.go | 4 +- internal/reader/rewrite/rewriter_test.go | 129 +++++++++++++++++++ 2 files changed, 131 insertions(+), 2 deletions(-) diff --git a/internal/reader/rewrite/rewrite_functions.go b/internal/reader/rewrite/rewrite_functions.go index b8f4a5cc857..878167c1444 100644 --- a/internal/reader/rewrite/rewrite_functions.go +++ b/internal/reader/rewrite/rewrite_functions.go @@ -472,9 +472,9 @@ func fixGhostCards(entryContent string) string { } if author == "" || strings.HasSuffix(title, author) { - s.SetHtml(fmt.Sprintf("%s", href, title)) + s.ReplaceWithHtml(fmt.Sprintf("%s", href, title)) } else { - s.SetHtml(fmt.Sprintf("%s - %s", href, title, author)) + s.ReplaceWithHtml(fmt.Sprintf("%s - %s", href, title, author)) } }) diff --git a/internal/reader/rewrite/rewriter_test.go b/internal/reader/rewrite/rewriter_test.go index 93123dbb78e..151dc0aefa7 100644 --- a/internal/reader/rewrite/rewriter_test.go +++ b/internal/reader/rewrite/rewriter_test.go @@ -703,3 +703,132 @@ func TestAddImageTitle(t *testing.T) { t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } + +func TestFixGhostCard(t *testing.T) { + testEntry := &model.Entry{ + Title: `A title`, + Content: `
+ +
+
Example Article
+
Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...
+ +
+
+ +
+
+
`, + } + + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Example Article - Example`, + } + Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestFixGhostCardNoCard(t *testing.T) { + testEntry := &model.Entry{ + Title: `A title`, + Content: `Example Article - Example`, + } + + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Example Article - Example`, + } + Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestFixGhostCardInvalidCard(t *testing.T) { + testEntry := &model.Entry{ + Title: `A title`, + Content: `
+ This card does not have the required fields +
`, + } + + controlEntry := &model.Entry{ + Title: `A title`, + Content: `
+ This card does not have the required fields +
`, + } + Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestFixGhostCardMissingAuthor(t *testing.T) { + testEntry := &model.Entry{ + Title: `A title`, + Content: `
+ +
+
Example Article
+
Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...
+
+
+ +
+
+
`, + } + + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Example Article`, + } + Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestFixGhostCardDuplicatedAuthor(t *testing.T) { + testEntry := &model.Entry{ + Title: `A title`, + Content: `
+ +
+
Example Article - Example
+
Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...
+ +
+
+ +
+
+
`, + } + + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Example Article - Example`, + } + Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} From 0d6cb6422edc864a08155fc2e25c5d18fb496c6e Mon Sep 17 00:00:00 2001 From: Jake Walker Date: Sun, 12 Jan 2025 22:19:34 +0000 Subject: [PATCH 3/3] fix(rewriter): multiple ghost card formatting --- internal/reader/rewrite/rewrite_functions.go | 36 ++++++- internal/reader/rewrite/rewriter_test.go | 99 ++++++++++++++++++++ 2 files changed, 130 insertions(+), 5 deletions(-) diff --git a/internal/reader/rewrite/rewrite_functions.go b/internal/reader/rewrite/rewrite_functions.go index 878167c1444..e128a22a416 100644 --- a/internal/reader/rewrite/rewrite_functions.go +++ b/internal/reader/rewrite/rewrite_functions.go @@ -462,22 +462,48 @@ func fixGhostCards(entryContent string) string { return entryContent } - doc.Find("figure.kg-card").Each(func(i int, s *goquery.Selection) { + const cardSelector = "figure.kg-card" + var currentList *goquery.Selection + + doc.Find(cardSelector).Each(func(i int, s *goquery.Selection) { title := s.Find(".kg-bookmark-title").First().Text() author := s.Find(".kg-bookmark-author").First().Text() href := s.Find("a.kg-bookmark-container").First().AttrOr("href", "") - if href == "" { + // if there is no link or title, skip processing + if href == "" || title == "" { return } + link := "" if author == "" || strings.HasSuffix(title, author) { - s.ReplaceWithHtml(fmt.Sprintf("%s", href, title)) + link = fmt.Sprintf("%s", href, title) + } else { + link = fmt.Sprintf("%s - %s", href, title, author) + } + + next := s.Next() + + // if the next element is also a card, start a list + if next.Is(cardSelector) && currentList == nil { + currentList = s.BeforeHtml("
    ").Prev() + } + + if currentList != nil { + // add this card to the list, then delete it + currentList.AppendHtml("
  • " + link + "
  • ") + s.Remove() } else { - s.ReplaceWithHtml(fmt.Sprintf("%s - %s", href, title, author)) + // replace single card + s.ReplaceWithHtml(link) + } + + // if the next element is not a card, start a new list + if !next.Is(cardSelector) && currentList != nil { + currentList = nil } }) output, _ := doc.FindMatcher(goquery.Single("body")).Html() - return output + return strings.TrimSpace(output) } diff --git a/internal/reader/rewrite/rewriter_test.go b/internal/reader/rewrite/rewriter_test.go index 151dc0aefa7..52ea5c01d26 100644 --- a/internal/reader/rewrite/rewriter_test.go +++ b/internal/reader/rewrite/rewriter_test.go @@ -832,3 +832,102 @@ func TestFixGhostCardDuplicatedAuthor(t *testing.T) { t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } + +func TestFixGhostCardMultiple(t *testing.T) { + testEntry := &model.Entry{ + Title: `A title`, + Content: `
    + +
    +
    Example Article 1 - Example
    +
    Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...
    + +
    +
    + +
    +
    +
    +
    + +
    +
    Example Article 2 - Example
    +
    Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...
    + +
    +
    + +
    +
    +
    `, + } + + controlEntry := &model.Entry{ + Title: `A title`, + Content: ``, + } + Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestFixGhostCardMultipleSplit(t *testing.T) { + testEntry := &model.Entry{ + Title: `A title`, + Content: `
    + +
    +
    Example Article 1 - Example
    +
    Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...
    + +
    +
    + +
    +
    +
    +

    This separates the two cards

    +
    + +
    +
    Example Article 2 - Example
    +
    Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...
    + +
    +
    + +
    +
    +
    `, + } + + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Example Article 1 - Example +

    This separates the two cards

    + Example Article 2 - Example`, + } + Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +}