Skip to content

Commit

Permalink
[1.228.*] Pre-release merge (#708)
Browse files Browse the repository at this point in the history
  • Loading branch information
tramline-github[bot] authored Aug 9, 2024
2 parents 3b53e70 + d941f0f commit a5389a2
Show file tree
Hide file tree
Showing 10 changed files with 105 additions and 69 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -858,14 +858,6 @@ class RssRepository(
)
}

suspend fun updatedFeedPinnedPosition(pinnedPosition: Double, id: String) {
withContext(ioDispatcher) { feedQueries.updatedPinnedPosition(pinnedPosition, id) }
}

suspend fun updatedFeedGroupPinnedPosition(pinnedPosition: Double, id: String) {
withContext(ioDispatcher) { feedGroupQueries.updatedPinnedPosition(pinnedPosition, id) }
}

suspend fun updatedSourcePinnedPosition(sources: List<Source>) {
withContext(ioDispatcher) {
transactionRunner.invoke {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,8 @@ internal object AtomContentParser : ContentParser() {
rawContent = parser.nextText().trimIndent()

val htmlContent = HtmlContentParser.parse(htmlContent = rawContent)
if (image.isNullOrBlank() && htmlContent != null) {
image = htmlContent.imageUrl
}

content = htmlContent?.content?.ifBlank { rawContent.trim() } ?: rawContent.trim()
image = htmlContent?.leadImage ?: image
content = htmlContent?.content?.ifBlank { null } ?: rawContent.trim()
}
TAG_PUBLISHED,
TAG_UPDATED -> {
Expand All @@ -150,7 +147,7 @@ internal object AtomContentParser : ContentParser() {
return PostPayload(
link = FeedParser.cleanText(link)!!,
title = FeedParser.cleanText(title).orEmpty().decodeHTMLString(),
description = FeedParser.cleanTextCompact(content).orEmpty().decodeHTMLString(),
description = content.orEmpty().decodeHTMLString(),
rawContent = rawContent,
imageUrl = FeedParser.safeUrl(hostLink, image),
date = postPubDateInMillis ?: Clock.System.now().toEpochMilliseconds(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,6 @@ class FeedParser(private val dispatchersProvider: DispatchersProvider) {

fun cleanText(text: String?) = text?.replace(htmlTag, "")?.replace(blankLine, "")?.trim()

fun cleanTextCompact(text: String?) = cleanText(text)?.take(300)

fun feedIcon(host: String): String {
return "https://icon.horse/icon/$host"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,36 +17,41 @@ package dev.sasikanth.rss.reader.core.network.parser

import co.touchlab.crashkios.bugsnag.BugsnagKotlin
import com.fleeksoft.ksoup.Ksoup
import com.fleeksoft.ksoup.safety.Safelist
import io.ktor.utils.io.charsets.MalformedInputException

internal object HtmlContentParser {

private val allowedContentTags = setOf("p", "span", "em", "u", "b", "i", "strong")
private const val TAG_BODY = "body"
private const val TAG_IMG = "img"
private const val TAG_FIGCAPTION = "figcaption"
private const val ATTR_SRC = "src"

fun parse(htmlContent: String): HtmlContent? {
private val allowedContentTags =
Safelist().addTags(TAG_FIGCAPTION, TAG_IMG).addAttributes(TAG_IMG, ATTR_SRC)
private val gifRegex by lazy { Regex("/\\.gif(\\?.*)?\\$/i") }

fun parse(htmlContent: String): Result? {
if (htmlContent.isBlank()) return null

return try {
val document = Ksoup.parse(htmlContent)

val imageUrl =
document
.getElementsByTag("img")
.firstOrNull { it.hasAttr("src") && !it.attr("src").endsWith(".gif") }
?.attr("src")

val contentStringBuilder = StringBuilder()
document.getAllElements().forEach { element ->
if (allowedContentTags.contains(element.tagName())) {
contentStringBuilder.append(element.text().cleanWhitespaces())
}
val cleanedHtml = Ksoup.clean(htmlContent, allowedContentTags)
val document = Ksoup.parse(cleanedHtml)
val body = document.getElementsByTag(TAG_BODY).first() ?: return null
val elements = body.children()

if (element.tagName() == "p" || element.tagName() == "br") {
contentStringBuilder.appendLine()
val leadImage =
elements.firstNotNullOfOrNull {
val imageUrl = it.attr(ATTR_SRC)
if (it.tagName() == TAG_IMG && !gifRegex.containsMatchIn(imageUrl)) {
imageUrl.removeSurrounding("\"")
} else {
null
}
}
}
val content = body.ownText()

HtmlContent(imageUrl = imageUrl, content = contentStringBuilder.toString())
Result(leadImage = leadImage, content = content)
} catch (e: Exception) {
null
} catch (e: MalformedInputException) {
Expand All @@ -55,18 +60,5 @@ internal object HtmlContentParser {
}
}

private fun String.cleanWhitespaces(): String {
var formattedText = this.trim()
if (formattedText.isNotBlank()) {
if (this[0].isWhitespace()) {
formattedText = " $formattedText"
}
if (this.last().isWhitespace()) {
formattedText += " "
}
}
return formattedText
}

data class HtmlContent(val imageUrl: String?, val content: String)
data class Result(val leadImage: String?, val content: String)
}
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,8 @@ internal object RDFContentParser : ContentParser() {
rawContent = parser.nextText().trimIndent()

val htmlContent = HtmlContentParser.parse(htmlContent = rawContent)
if (image.isNullOrBlank() && htmlContent != null) {
image = htmlContent.imageUrl
}

description = htmlContent?.content?.ifBlank { rawContent.trim() } ?: rawContent.trim()
image = htmlContent?.leadImage ?: image
description = htmlContent?.content?.ifBlank { null } ?: rawContent.trim()
}
name == TAG_PUB_DATE || name == TAG_DC_DATE -> {
date = parser.nextText()
Expand All @@ -149,7 +146,7 @@ internal object RDFContentParser : ContentParser() {
return PostPayload(
link = FeedParser.cleanText(link)!!,
title = FeedParser.cleanText(title).orEmpty().decodeHTMLString(),
description = FeedParser.cleanTextCompact(description).orEmpty().decodeHTMLString(),
description = description.orEmpty().decodeHTMLString(),
rawContent = rawContent,
imageUrl = FeedParser.safeUrl(hostLink, image),
date = postPubDateInMillis ?: Clock.System.now().toEpochMilliseconds(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,8 @@ internal object RSSContentParser : ContentParser() {
rawContent = parser.nextText().trimIndent()

val htmlContent = HtmlContentParser.parse(htmlContent = rawContent)
if (image.isNullOrBlank() && htmlContent != null) {
image = htmlContent.imageUrl
}

description = htmlContent?.content?.ifBlank { rawContent.trim() } ?: rawContent.trim()
image = htmlContent?.leadImage ?: image
description = htmlContent?.content?.ifBlank { null } ?: rawContent.trim()
}
name == TAG_PUB_DATE -> {
date = parser.nextText()
Expand All @@ -158,7 +155,7 @@ internal object RSSContentParser : ContentParser() {
return PostPayload(
link = FeedParser.cleanText(link)!!,
title = FeedParser.cleanText(title).orEmpty().decodeHTMLString(),
description = FeedParser.cleanTextCompact(description).orEmpty().decodeHTMLString(),
description = description.orEmpty().decodeHTMLString(),
rawContent = rawContent,
imageUrl = FeedParser.safeUrl(hostLink, image),
date = postPubDateInMillis ?: Clock.System.now().toEpochMilliseconds(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import me.tatarka.inject.annotations.Inject

@Inject
@AppScope
class PostSourceFetcher(
class FullArticleFetcher(
private val httpClient: HttpClient,
private val dispatchersProvider: DispatchersProvider
) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright 2024 Sasikanth Miriyampalli
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package dev.sasikanth.rss.reader.core.network.parser

import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertNull

class HtmlContentParserTest {

companion object {
private const val TEST_HTML =
"""
<figure>
<img alt="A screenshot from DOOM + DOOM II." src="https://cdn.vox-cdn.com/thumbor/LJt9a0BM9fnTyZtP68Ba1Mr1YDY=/150x0:1770x1080/1310x873/cdn.vox-cdn.com/uploads/chorus_image/image/73510530/ss_c5781b8f9a8181e6c989869b86d0b455ccca344a.0.jpg"/>
<figcaption>Image: Bethesda</figcaption>
</figure>
<p id="2Z0e9a">If you haven’t played <em>Doom</em> or <em>Doom II</em> for a while — or ever — a new re-release that Bethesda <a href="https://slayersclub.bethesda.net/en-US/article/doom-doomii-release-notes?linkId=100000279162898">surprise-dropped</a> (<a href="https://x.com/Wario64/status/1821578978462699748">sorta</a>) on Thursday might be the perfect excuse to jump in to the classic games. The re-release, which combines both games into one package called <em>Doom + Doom II</em> and is a free update for anyone who already owns <em>Doom (1993)</em> or <em>Doom II</em>, offers a long list of great new features — including a brand new single-player episode and online, cross-platform deathmatch multiplayer.</p>
<p id="Pm12nB">With <em>Doom + Doom II, </em>you’ll have access to both of those two games as well as extra single-player content like John Romero’s <em>Sigil</em> episode <a href="https://romero.com/sigil">released in 2019</a> and <em>Legacy of Rust</em>, which is a new <em>Doom</em> episode created by “individuals from id Software, Nightdive Studios...</p>
<p><a href="https://www.theverge.com/2024/8/8/24216379/doom-doom-ii-definitive-re-release">Continue reading&hellip;</a> </p>
"""
}

@Test
fun parsingLeadImageAndContentFromHtmlShouldWorkCorrectly() {
// when
val result = HtmlContentParser.parse(TEST_HTML)

// then
assertEquals(
"https://cdn.vox-cdn.com/thumbor/LJt9a0BM9fnTyZtP68Ba1Mr1YDY=/150x0:1770x1080/1310x873/cdn.vox-cdn.com/uploads/chorus_image/image/73510530/ss_c5781b8f9a8181e6c989869b86d0b455ccca344a.0.jpg",
result?.leadImage,
)
assertEquals(
"If you haven’t played Doom or Doom II for a while — or ever — a new re-release that Bethesda surprise-dropped (sorta) on Thursday might be the perfect excuse to jump in to the classic games. The re-release, which combines both games into one package called Doom + Doom II and is a free update for anyone who already owns Doom (1993) or Doom II, offers a long list of great new features — including a brand new single-player episode and online, cross-platform deathmatch multiplayer. With Doom + Doom II, you’ll have access to both of those two games as well as extra single-player content like John Romero’s Sigil episode released in 2019 and Legacy of Rust, which is a new Doom episode created by “individuals from id Software, Nightdive Studios... Continue reading…",
result?.content,
)
}

@Test
fun parsingContentFromTextShouldWorkCorrectly() {
// when
val result = HtmlContentParser.parse("This is a normal text")

// then
assertNull(result?.leadImage)
assertEquals("This is a normal text", result?.content)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import android.content.Context
import android.content.pm.ApplicationInfo.FLAG_DEBUGGABLE
import android.os.Build
import dev.sasikanth.rss.reader.app.AppInfo
import dev.sasikanth.rss.reader.core.network.post.PostSourceFetcher
import dev.sasikanth.rss.reader.core.network.post.FullArticleFetcher
import dev.sasikanth.rss.reader.data.repository.RssRepository
import dev.sasikanth.rss.reader.data.repository.SettingsRepository
import dev.sasikanth.rss.reader.di.scopes.AppScope
Expand All @@ -35,7 +35,7 @@ abstract class ApplicationComponent(@get:Provides val context: Context) :

abstract val settingsRepository: SettingsRepository

abstract val postSourceFetcher: PostSourceFetcher
abstract val fullArticleFetcher: FullArticleFetcher

@Provides
@AppScope
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import com.arkivanov.essenty.instancekeeper.InstanceKeeper
import com.arkivanov.essenty.instancekeeper.getOrCreate
import com.arkivanov.essenty.lifecycle.doOnCreate
import com.arkivanov.essenty.lifecycle.doOnDestroy
import dev.sasikanth.rss.reader.core.network.post.PostSourceFetcher
import dev.sasikanth.rss.reader.core.network.post.FullArticleFetcher
import dev.sasikanth.rss.reader.data.repository.RssRepository
import dev.sasikanth.rss.reader.reader.ReaderState.PostMode.Idle
import dev.sasikanth.rss.reader.reader.ReaderState.PostMode.InProgress
Expand Down Expand Up @@ -51,7 +51,7 @@ internal typealias ReaderPresenterFactory =
class ReaderPresenter(
dispatchersProvider: DispatchersProvider,
private val rssRepository: RssRepository,
private val postSourceFetcher: PostSourceFetcher,
private val fullArticleFetcher: FullArticleFetcher,
@Assisted private val postId: String,
@Assisted componentContext: ComponentContext,
@Assisted private val goBack: () -> Unit
Expand All @@ -63,7 +63,7 @@ class ReaderPresenter(
dispatchersProvider = dispatchersProvider,
rssRepository = rssRepository,
postId = postId,
postSourceFetcher = postSourceFetcher
fullArticleFetcher = fullArticleFetcher
)
}

Expand All @@ -89,7 +89,7 @@ class ReaderPresenter(
private val dispatchersProvider: DispatchersProvider,
private val rssRepository: RssRepository,
private val postId: String,
private val postSourceFetcher: PostSourceFetcher,
private val fullArticleFetcher: FullArticleFetcher,
) : InstanceKeeper.Instance {

private val coroutineScope = CoroutineScope(SupervisorJob() + dispatchersProvider.main)
Expand Down Expand Up @@ -175,7 +175,7 @@ class ReaderPresenter(
val postLink = _state.value.link
if (!postLink.isNullOrBlank()) {
_state.update { it.copy(postMode = InProgress) }
val content = postSourceFetcher.fetch(postLink)
val content = fullArticleFetcher.fetch(postLink)

if (content.isSuccess) {
_state.update { it.copy(content = content.getOrThrow()) }
Expand Down

0 comments on commit a5389a2

Please sign in to comment.