diff --git a/core/data/src/commonMain/kotlin/dev/sasikanth/rss/reader/data/repository/RssRepository.kt b/core/data/src/commonMain/kotlin/dev/sasikanth/rss/reader/data/repository/RssRepository.kt index 8821c9596..ade735176 100644 --- a/core/data/src/commonMain/kotlin/dev/sasikanth/rss/reader/data/repository/RssRepository.kt +++ b/core/data/src/commonMain/kotlin/dev/sasikanth/rss/reader/data/repository/RssRepository.kt @@ -858,14 +858,6 @@ class RssRepository( ) } - suspend fun updatedFeedPinnedPosition(pinnedPosition: Double, id: String) { - withContext(ioDispatcher) { feedQueries.updatedPinnedPosition(pinnedPosition, id) } - } - - suspend fun updatedFeedGroupPinnedPosition(pinnedPosition: Double, id: String) { - withContext(ioDispatcher) { feedGroupQueries.updatedPinnedPosition(pinnedPosition, id) } - } - suspend fun updatedSourcePinnedPosition(sources: List) { withContext(ioDispatcher) { transactionRunner.invoke { diff --git a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/AtomContentParser.kt b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/AtomContentParser.kt index 37eebfb2b..5aa35f295 100644 --- a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/AtomContentParser.kt +++ b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/AtomContentParser.kt @@ -123,11 +123,8 @@ internal object AtomContentParser : ContentParser() { rawContent = parser.nextText().trimIndent() val htmlContent = HtmlContentParser.parse(htmlContent = rawContent) - if (image.isNullOrBlank() && htmlContent != null) { - image = htmlContent.imageUrl - } - - content = htmlContent?.content?.ifBlank { rawContent.trim() } ?: rawContent.trim() + image = htmlContent?.leadImage ?: image + content = htmlContent?.content?.ifBlank { null } ?: rawContent.trim() } TAG_PUBLISHED, TAG_UPDATED -> { @@ -150,7 +147,7 @@ internal object AtomContentParser : ContentParser() { return PostPayload( link = FeedParser.cleanText(link)!!, title = FeedParser.cleanText(title).orEmpty().decodeHTMLString(), - description = FeedParser.cleanTextCompact(content).orEmpty().decodeHTMLString(), + description = content.orEmpty().decodeHTMLString(), rawContent = rawContent, imageUrl = FeedParser.safeUrl(hostLink, image), date = postPubDateInMillis ?: Clock.System.now().toEpochMilliseconds(), diff --git a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/FeedParser.kt b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/FeedParser.kt index ffda49fc6..1a4e1e1a1 100644 --- a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/FeedParser.kt +++ b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/FeedParser.kt @@ -108,8 +108,6 @@ class FeedParser(private val dispatchersProvider: DispatchersProvider) { fun cleanText(text: String?) = text?.replace(htmlTag, "")?.replace(blankLine, "")?.trim() - fun cleanTextCompact(text: String?) = cleanText(text)?.take(300) - fun feedIcon(host: String): String { return "https://icon.horse/icon/$host" } diff --git a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/HtmlContentParser.kt b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/HtmlContentParser.kt index 3d12066a3..f7a13398a 100644 --- a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/HtmlContentParser.kt +++ b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/HtmlContentParser.kt @@ -17,36 +17,41 @@ package dev.sasikanth.rss.reader.core.network.parser import co.touchlab.crashkios.bugsnag.BugsnagKotlin import com.fleeksoft.ksoup.Ksoup +import com.fleeksoft.ksoup.safety.Safelist import io.ktor.utils.io.charsets.MalformedInputException internal object HtmlContentParser { - private val allowedContentTags = setOf("p", "span", "em", "u", "b", "i", "strong") + private const val TAG_BODY = "body" + private const val TAG_IMG = "img" + private const val TAG_FIGCAPTION = "figcaption" + private const val ATTR_SRC = "src" - fun parse(htmlContent: String): HtmlContent? { + private val allowedContentTags = + Safelist().addTags(TAG_FIGCAPTION, TAG_IMG).addAttributes(TAG_IMG, ATTR_SRC) + private val gifRegex by lazy { Regex("/\\.gif(\\?.*)?\\$/i") } + + fun parse(htmlContent: String): Result? { if (htmlContent.isBlank()) return null return try { - val document = Ksoup.parse(htmlContent) - - val imageUrl = - document - .getElementsByTag("img") - .firstOrNull { it.hasAttr("src") && !it.attr("src").endsWith(".gif") } - ?.attr("src") - - val contentStringBuilder = StringBuilder() - document.getAllElements().forEach { element -> - if (allowedContentTags.contains(element.tagName())) { - contentStringBuilder.append(element.text().cleanWhitespaces()) - } + val cleanedHtml = Ksoup.clean(htmlContent, allowedContentTags) + val document = Ksoup.parse(cleanedHtml) + val body = document.getElementsByTag(TAG_BODY).first() ?: return null + val elements = body.children() - if (element.tagName() == "p" || element.tagName() == "br") { - contentStringBuilder.appendLine() + val leadImage = + elements.firstNotNullOfOrNull { + val imageUrl = it.attr(ATTR_SRC) + if (it.tagName() == TAG_IMG && !gifRegex.containsMatchIn(imageUrl)) { + imageUrl.removeSurrounding("\"") + } else { + null + } } - } + val content = body.ownText() - HtmlContent(imageUrl = imageUrl, content = contentStringBuilder.toString()) + Result(leadImage = leadImage, content = content) } catch (e: Exception) { null } catch (e: MalformedInputException) { @@ -55,18 +60,5 @@ internal object HtmlContentParser { } } - private fun String.cleanWhitespaces(): String { - var formattedText = this.trim() - if (formattedText.isNotBlank()) { - if (this[0].isWhitespace()) { - formattedText = " $formattedText" - } - if (this.last().isWhitespace()) { - formattedText += " " - } - } - return formattedText - } - - data class HtmlContent(val imageUrl: String?, val content: String) + data class Result(val leadImage: String?, val content: String) } diff --git a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/RDFContentParser.kt b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/RDFContentParser.kt index 055ac3d49..cf736ea47 100644 --- a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/RDFContentParser.kt +++ b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/RDFContentParser.kt @@ -127,11 +127,8 @@ internal object RDFContentParser : ContentParser() { rawContent = parser.nextText().trimIndent() val htmlContent = HtmlContentParser.parse(htmlContent = rawContent) - if (image.isNullOrBlank() && htmlContent != null) { - image = htmlContent.imageUrl - } - - description = htmlContent?.content?.ifBlank { rawContent.trim() } ?: rawContent.trim() + image = htmlContent?.leadImage ?: image + description = htmlContent?.content?.ifBlank { null } ?: rawContent.trim() } name == TAG_PUB_DATE || name == TAG_DC_DATE -> { date = parser.nextText() @@ -149,7 +146,7 @@ internal object RDFContentParser : ContentParser() { return PostPayload( link = FeedParser.cleanText(link)!!, title = FeedParser.cleanText(title).orEmpty().decodeHTMLString(), - description = FeedParser.cleanTextCompact(description).orEmpty().decodeHTMLString(), + description = description.orEmpty().decodeHTMLString(), rawContent = rawContent, imageUrl = FeedParser.safeUrl(hostLink, image), date = postPubDateInMillis ?: Clock.System.now().toEpochMilliseconds(), diff --git a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/RSSContentParser.kt b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/RSSContentParser.kt index c13230cd0..fcff82ee1 100644 --- a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/RSSContentParser.kt +++ b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/RSSContentParser.kt @@ -127,11 +127,8 @@ internal object RSSContentParser : ContentParser() { rawContent = parser.nextText().trimIndent() val htmlContent = HtmlContentParser.parse(htmlContent = rawContent) - if (image.isNullOrBlank() && htmlContent != null) { - image = htmlContent.imageUrl - } - - description = htmlContent?.content?.ifBlank { rawContent.trim() } ?: rawContent.trim() + image = htmlContent?.leadImage ?: image + description = htmlContent?.content?.ifBlank { null } ?: rawContent.trim() } name == TAG_PUB_DATE -> { date = parser.nextText() @@ -158,7 +155,7 @@ internal object RSSContentParser : ContentParser() { return PostPayload( link = FeedParser.cleanText(link)!!, title = FeedParser.cleanText(title).orEmpty().decodeHTMLString(), - description = FeedParser.cleanTextCompact(description).orEmpty().decodeHTMLString(), + description = description.orEmpty().decodeHTMLString(), rawContent = rawContent, imageUrl = FeedParser.safeUrl(hostLink, image), date = postPubDateInMillis ?: Clock.System.now().toEpochMilliseconds(), diff --git a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/post/PostSourceFetcher.kt b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/post/FullArticleFetcher.kt similarity index 98% rename from core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/post/PostSourceFetcher.kt rename to core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/post/FullArticleFetcher.kt index b9a6cb1dc..0b825213c 100644 --- a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/post/PostSourceFetcher.kt +++ b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/post/FullArticleFetcher.kt @@ -32,7 +32,7 @@ import me.tatarka.inject.annotations.Inject @Inject @AppScope -class PostSourceFetcher( +class FullArticleFetcher( private val httpClient: HttpClient, private val dispatchersProvider: DispatchersProvider ) { diff --git a/core/network/src/commonTest/kotlin/dev/sasikanth/rss/reader/core/network/parser/HtmlContentParserTest.kt b/core/network/src/commonTest/kotlin/dev/sasikanth/rss/reader/core/network/parser/HtmlContentParserTest.kt new file mode 100644 index 000000000..97c253745 --- /dev/null +++ b/core/network/src/commonTest/kotlin/dev/sasikanth/rss/reader/core/network/parser/HtmlContentParserTest.kt @@ -0,0 +1,63 @@ +/* + * Copyright 2024 Sasikanth Miriyampalli + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package dev.sasikanth.rss.reader.core.network.parser + +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertNull + +class HtmlContentParserTest { + + companion object { + private const val TEST_HTML = + """ +
+ A screenshot from DOOM + DOOM II. +
Image: Bethesda
+
+

If you haven’t played Doom or Doom II for a while — or ever — a new re-release that Bethesda surprise-dropped (sorta) on Thursday might be the perfect excuse to jump in to the classic games. The re-release, which combines both games into one package called Doom + Doom II and is a free update for anyone who already owns Doom (1993) or Doom II, offers a long list of great new features — including a brand new single-player episode and online, cross-platform deathmatch multiplayer.

+

With Doom + Doom II, you’ll have access to both of those two games as well as extra single-player content like John Romero’s Sigil episode released in 2019 and Legacy of Rust, which is a new Doom episode created by “individuals from id Software, Nightdive Studios...

+

Continue reading…

+ """ + } + + @Test + fun parsingLeadImageAndContentFromHtmlShouldWorkCorrectly() { + // when + val result = HtmlContentParser.parse(TEST_HTML) + + // then + assertEquals( + "https://cdn.vox-cdn.com/thumbor/LJt9a0BM9fnTyZtP68Ba1Mr1YDY=/150x0:1770x1080/1310x873/cdn.vox-cdn.com/uploads/chorus_image/image/73510530/ss_c5781b8f9a8181e6c989869b86d0b455ccca344a.0.jpg", + result?.leadImage, + ) + assertEquals( + "If you haven’t played Doom or Doom II for a while — or ever — a new re-release that Bethesda surprise-dropped (sorta) on Thursday might be the perfect excuse to jump in to the classic games. The re-release, which combines both games into one package called Doom + Doom II and is a free update for anyone who already owns Doom (1993) or Doom II, offers a long list of great new features — including a brand new single-player episode and online, cross-platform deathmatch multiplayer. With Doom + Doom II, you’ll have access to both of those two games as well as extra single-player content like John Romero’s Sigil episode released in 2019 and Legacy of Rust, which is a new Doom episode created by “individuals from id Software, Nightdive Studios... Continue reading…", + result?.content, + ) + } + + @Test + fun parsingContentFromTextShouldWorkCorrectly() { + // when + val result = HtmlContentParser.parse("This is a normal text") + + // then + assertNull(result?.leadImage) + assertEquals("This is a normal text", result?.content) + } +} diff --git a/shared/src/androidMain/kotlin/dev/sasikanth/rss/reader/di/ApplicationComponent.kt b/shared/src/androidMain/kotlin/dev/sasikanth/rss/reader/di/ApplicationComponent.kt index 50f9de20f..28a665ac6 100644 --- a/shared/src/androidMain/kotlin/dev/sasikanth/rss/reader/di/ApplicationComponent.kt +++ b/shared/src/androidMain/kotlin/dev/sasikanth/rss/reader/di/ApplicationComponent.kt @@ -19,7 +19,7 @@ import android.content.Context import android.content.pm.ApplicationInfo.FLAG_DEBUGGABLE import android.os.Build import dev.sasikanth.rss.reader.app.AppInfo -import dev.sasikanth.rss.reader.core.network.post.PostSourceFetcher +import dev.sasikanth.rss.reader.core.network.post.FullArticleFetcher import dev.sasikanth.rss.reader.data.repository.RssRepository import dev.sasikanth.rss.reader.data.repository.SettingsRepository import dev.sasikanth.rss.reader.di.scopes.AppScope @@ -35,7 +35,7 @@ abstract class ApplicationComponent(@get:Provides val context: Context) : abstract val settingsRepository: SettingsRepository - abstract val postSourceFetcher: PostSourceFetcher + abstract val fullArticleFetcher: FullArticleFetcher @Provides @AppScope diff --git a/shared/src/commonMain/kotlin/dev/sasikanth/rss/reader/reader/ReaderPresenter.kt b/shared/src/commonMain/kotlin/dev/sasikanth/rss/reader/reader/ReaderPresenter.kt index eced7b6c6..46706dcd6 100644 --- a/shared/src/commonMain/kotlin/dev/sasikanth/rss/reader/reader/ReaderPresenter.kt +++ b/shared/src/commonMain/kotlin/dev/sasikanth/rss/reader/reader/ReaderPresenter.kt @@ -21,7 +21,7 @@ import com.arkivanov.essenty.instancekeeper.InstanceKeeper import com.arkivanov.essenty.instancekeeper.getOrCreate import com.arkivanov.essenty.lifecycle.doOnCreate import com.arkivanov.essenty.lifecycle.doOnDestroy -import dev.sasikanth.rss.reader.core.network.post.PostSourceFetcher +import dev.sasikanth.rss.reader.core.network.post.FullArticleFetcher import dev.sasikanth.rss.reader.data.repository.RssRepository import dev.sasikanth.rss.reader.reader.ReaderState.PostMode.Idle import dev.sasikanth.rss.reader.reader.ReaderState.PostMode.InProgress @@ -51,7 +51,7 @@ internal typealias ReaderPresenterFactory = class ReaderPresenter( dispatchersProvider: DispatchersProvider, private val rssRepository: RssRepository, - private val postSourceFetcher: PostSourceFetcher, + private val fullArticleFetcher: FullArticleFetcher, @Assisted private val postId: String, @Assisted componentContext: ComponentContext, @Assisted private val goBack: () -> Unit @@ -63,7 +63,7 @@ class ReaderPresenter( dispatchersProvider = dispatchersProvider, rssRepository = rssRepository, postId = postId, - postSourceFetcher = postSourceFetcher + fullArticleFetcher = fullArticleFetcher ) } @@ -89,7 +89,7 @@ class ReaderPresenter( private val dispatchersProvider: DispatchersProvider, private val rssRepository: RssRepository, private val postId: String, - private val postSourceFetcher: PostSourceFetcher, + private val fullArticleFetcher: FullArticleFetcher, ) : InstanceKeeper.Instance { private val coroutineScope = CoroutineScope(SupervisorJob() + dispatchersProvider.main) @@ -175,7 +175,7 @@ class ReaderPresenter( val postLink = _state.value.link if (!postLink.isNullOrBlank()) { _state.update { it.copy(postMode = InProgress) } - val content = postSourceFetcher.fetch(postLink) + val content = fullArticleFetcher.fetch(postLink) if (content.isSuccess) { _state.update { it.copy(content = content.getOrThrow()) }