Skip to content

Commit

Permalink
[1.37.*] Pre-release merge (#228)
Browse files Browse the repository at this point in the history
  • Loading branch information
tramline-github[bot] authored Jan 17, 2024
2 parents 1963c8d + 0733032 commit 928ca9b
Show file tree
Hide file tree
Showing 26 changed files with 2,271 additions and 168 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,13 @@
*/
package dev.sasikanth.rss.reader.core.network.fetcher

import com.mohamedrejeb.ksoup.html.parser.KsoupHtmlHandler
import com.mohamedrejeb.ksoup.html.parser.KsoupHtmlParser
import com.fleeksoft.ksoup.Ksoup
import dev.sasikanth.rss.reader.core.network.parser.FeedParser
import dev.sasikanth.rss.reader.core.network.parser.FeedParser.Companion.ATOM_MEDIA_TYPE
import dev.sasikanth.rss.reader.core.network.parser.FeedParser.Companion.ATTR_HREF
import dev.sasikanth.rss.reader.core.network.parser.FeedParser.Companion.ATTR_TYPE
import dev.sasikanth.rss.reader.core.network.parser.FeedParser.Companion.RSS_MEDIA_TYPE
import dev.sasikanth.rss.reader.core.network.parser.FeedParser.Companion.TAG_LINK
import io.ktor.client.HttpClient
import io.ktor.client.request.get
import io.ktor.client.statement.HttpResponse
Expand All @@ -28,8 +32,6 @@ import io.ktor.http.URLBuilder
import io.ktor.http.URLProtocol
import io.ktor.http.Url
import io.ktor.http.contentType
import kotlin.coroutines.resume
import kotlin.coroutines.suspendCoroutine
import me.tatarka.inject.annotations.Inject

@Inject
Expand Down Expand Up @@ -131,40 +133,18 @@ class FeedFetcher(private val httpClient: HttpClient, private val feedParser: Fe
}
}

private suspend fun fetchFeedLinkFromHtmlIfExists(
htmlContent: String,
originalUrl: String
): String? {
return suspendCoroutine { continuation ->
var link: String? = null
KsoupHtmlParser(
handler =
object : KsoupHtmlHandler {
override fun onOpenTag(
name: String,
attributes: Map<String, String>,
isImplied: Boolean
) {
if (
link.isNullOrBlank() &&
name == "link" &&
(attributes["type"] == FeedParser.RSS_MEDIA_TYPE ||
attributes["type"] == FeedParser.ATOM_MEDIA_TYPE)
) {
link = attributes["href"]
}
}

override fun onEnd() {
val host = URLBuilder(originalUrl).build().host
val rootUrl = "https://$host"
val feedUrl = FeedParser.safeUrl(rootUrl, link)
private fun fetchFeedLinkFromHtmlIfExists(htmlContent: String, originalUrl: String): String? {
val document = Ksoup.parse(htmlContent)
val linkElement =
document.getElementsByTag(TAG_LINK).firstOrNull {
val linkType = it.attr(ATTR_TYPE)
linkType == RSS_MEDIA_TYPE || linkType == ATOM_MEDIA_TYPE
}
?: return null
val link = linkElement.attr(ATTR_HREF)
val host = URLBuilder(originalUrl).build().host
val rootUrl = "https://$host"

continuation.resume(feedUrl)
}
}
)
.parseComplete(htmlContent)
}
return FeedParser.safeUrl(rootUrl, link)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

package dev.sasikanth.rss.reader.core.network.parser

import com.mohamedrejeb.ksoup.html.parser.KsoupHtmlOptions
import com.mohamedrejeb.ksoup.html.parser.KsoupHtmlParser
import dev.sasikanth.rss.reader.core.model.remote.FeedPayload
import dev.sasikanth.rss.reader.core.model.remote.PostPayload
import dev.sasikanth.rss.reader.core.network.parser.FeedParser.Companion.ATTR_HREF
Expand Down Expand Up @@ -80,8 +78,8 @@ internal object AtomContentParser : ContentParser() {
val iconUrl = FeedParser.feedIcon(host)

return FeedPayload(
name = FeedParser.cleanText(title ?: link, decodeUrlEncoding = true)!!,
description = FeedParser.cleanText(description, decodeUrlEncoding = true).orEmpty(),
name = FeedParser.cleanText(title ?: link)!!,
description = FeedParser.cleanText(description).orEmpty(),
icon = iconUrl,
homepageLink = link,
link = feedUrl,
Expand Down Expand Up @@ -115,15 +113,13 @@ internal object AtomContentParser : ContentParser() {
}
TAG_CONTENT -> {
rawContent = readTagText(tagName, parser).trimIndent()
KsoupHtmlParser(
handler =
HtmlContentParser {
if (image.isNullOrBlank()) image = it.imageUrl
content = it.content.ifBlank { rawContent.trim() }
},
options = KsoupHtmlOptions(decodeEntities = false)
)
.parseComplete(rawContent)

val htmlContent = HtmlContentParser.parse(htmlContent = rawContent)
if (image.isNullOrBlank() && htmlContent != null) {
image = htmlContent.imageUrl
}

content = htmlContent?.content?.ifBlank { rawContent.trim() } ?: rawContent.trim()
}
TAG_PUBLISHED,
TAG_UPDATED -> {
Expand All @@ -144,9 +140,9 @@ internal object AtomContentParser : ContentParser() {
}

return PostPayload(
title = FeedParser.cleanText(title, decodeUrlEncoding = true).orEmpty(),
title = FeedParser.cleanText(title).orEmpty(),
link = FeedParser.cleanText(link)!!,
description = FeedParser.cleanTextCompact(content, decodeUrlEncoding = true).orEmpty(),
description = FeedParser.cleanTextCompact(content).orEmpty(),
rawContent = rawContent,
imageUrl = FeedParser.safeUrl(hostLink, image),
date = postPubDateInMillis ?: Clock.System.now().toEpochMilliseconds(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import dev.sasikanth.rss.reader.core.model.remote.FeedPayload
import dev.sasikanth.rss.reader.di.scopes.AppScope
import dev.sasikanth.rss.reader.exceptions.XmlParsingError
import dev.sasikanth.rss.reader.util.DispatchersProvider
import dev.sasikanth.rss.reader.util.decodeUrlEncodedString
import io.github.aakira.napier.LogLevel
import io.github.aakira.napier.log
import io.ktor.http.URLBuilder
Expand Down Expand Up @@ -101,18 +100,9 @@ class FeedParser(private val dispatchersProvider: DispatchersProvider) {
internal const val ATTR_VALUE_ALTERNATE = "alternate"
internal const val ATTR_VALUE_IMAGE = "image/jpeg"

fun cleanText(text: String?, decodeUrlEncoding: Boolean = false): String? {
var sanitizedString = text?.replace(htmlTag, "")?.replace(blankLine, "")?.trim()
fun cleanText(text: String?) = text?.replace(htmlTag, "")?.replace(blankLine, "")?.trim()

if (decodeUrlEncoding) {
sanitizedString = sanitizedString?.decodeUrlEncodedString()
}

return sanitizedString
}

fun cleanTextCompact(text: String?, decodeUrlEncoding: Boolean = false) =
cleanText(text, decodeUrlEncoding)?.take(300)
fun cleanTextCompact(text: String?) = cleanText(text)?.take(300)

fun feedIcon(host: String): String {
return "https://icon.horse/icon/$host"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,45 +15,40 @@
*/
package dev.sasikanth.rss.reader.core.network.parser

import com.mohamedrejeb.ksoup.html.parser.KsoupHtmlHandler
import com.fleeksoft.ksoup.Ksoup

internal class HtmlContentParser(private val onEnd: (HtmlContent) -> Unit) : KsoupHtmlHandler {
internal object HtmlContentParser {

private val contentStringBuilder = StringBuilder()
private var imageUrl: String? = null
private val allowedContentTags = setOf("p", "span", "em", "u", "b", "i", "strong")

private var currentTagsStack: ArrayDeque<String> = ArrayDeque()
fun parse(htmlContent: String): HtmlContent? {
if (htmlContent.isBlank()) return null

private val allowedContentTags = setOf("p", "a", "span", "em", "u", "b", "i", "strong")

override fun onText(text: String) {
val tag = currentTagsStack.firstOrNull()
if (tag in allowedContentTags || tag.isNullOrBlank()) {
contentStringBuilder.append(text.cleanWhitespaces())
}
}
val document =
try {
Ksoup.parse(htmlContent)
} catch (e: Exception) {
return null
}

override fun onOpenTag(name: String, attributes: Map<String, String>, isImplied: Boolean) {
currentTagsStack.addFirst(name)
val imageUrl =
document
.getElementsByTag("img")
.firstOrNull { it.hasAttr("src") && !it.attr("src").endsWith(".gif") }
?.attr("src")

if (name == "p" || name == "br") {
contentStringBuilder.appendLine()
}
val contentStringBuilder = StringBuilder()
document.getAllElements().forEach { element ->
if (allowedContentTags.contains(element.tagName())) {
contentStringBuilder.append(element.text().cleanWhitespaces())
}

val srcAttr = attributes["src"].orEmpty()
if (
name == "img" && imageUrl.isNullOrBlank() && srcAttr.isNotBlank() && !srcAttr.endsWith(".gif")
) {
this.imageUrl = srcAttr
if (element.tagName() == "p" || element.tagName() == "br") {
contentStringBuilder.appendLine()
}
}
}

override fun onCloseTag(name: String, isImplied: Boolean) {
currentTagsStack.removeFirst()
}

override fun onEnd() {
onEnd(HtmlContent(imageUrl = imageUrl, content = contentStringBuilder.toString()))
return HtmlContent(imageUrl = imageUrl, content = contentStringBuilder.toString())
}

private fun String.cleanWhitespaces(): String {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

package dev.sasikanth.rss.reader.core.network.parser

import com.mohamedrejeb.ksoup.html.parser.KsoupHtmlOptions
import com.mohamedrejeb.ksoup.html.parser.KsoupHtmlParser
import dev.sasikanth.rss.reader.core.model.remote.FeedPayload
import dev.sasikanth.rss.reader.core.model.remote.PostPayload
import dev.sasikanth.rss.reader.core.network.parser.FeedParser.Companion.ATTR_TYPE
Expand Down Expand Up @@ -79,8 +77,8 @@ internal object RssContentParser : ContentParser() {
val iconUrl = FeedParser.feedIcon(host)

return FeedPayload(
name = FeedParser.cleanText(title ?: link, decodeUrlEncoding = true)!!,
description = FeedParser.cleanText(description, decodeUrlEncoding = true).orEmpty(),
name = FeedParser.cleanText(title ?: link)!!,
description = FeedParser.cleanText(description).orEmpty(),
icon = iconUrl,
homepageLink = link,
link = feedUrl,
Expand Down Expand Up @@ -114,8 +112,14 @@ internal object RssContentParser : ContentParser() {
link = readAttrText(ATTR_URL, parser)
}
name == TAG_DESCRIPTION || name == TAG_CONTENT_ENCODED -> {
description = readTagText(name, parser)
rawContent = description.trimIndent()
rawContent = readTagText(name, parser).trimIndent()

val htmlContent = HtmlContentParser.parse(htmlContent = rawContent)
if (image.isNullOrBlank() && htmlContent != null) {
image = htmlContent.imageUrl
}

description = htmlContent?.content?.ifBlank { rawContent.trim() } ?: rawContent.trim()
}
name == TAG_PUB_DATE -> {
date = readTagText(name, parser)
Expand All @@ -135,24 +139,14 @@ internal object RssContentParser : ContentParser() {

val postPubDateInMillis = date?.let { dateString -> dateString.dateStringToEpochMillis() }

KsoupHtmlParser(
handler =
HtmlContentParser {
if (image.isNullOrBlank()) image = it.imageUrl
description = it.content.ifBlank { description?.trim() }
},
options = KsoupHtmlOptions(decodeEntities = false)
)
.parseComplete(description.orEmpty())

if (title.isNullOrBlank() && description.isNullOrBlank()) {
return null
}

return PostPayload(
title = FeedParser.cleanText(title, decodeUrlEncoding = true).orEmpty(),
title = FeedParser.cleanText(title).orEmpty(),
link = FeedParser.cleanText(link)!!,
description = FeedParser.cleanTextCompact(description, decodeUrlEncoding = true).orEmpty(),
description = FeedParser.cleanTextCompact(description).orEmpty(),
rawContent = rawContent,
imageUrl = FeedParser.safeUrl(hostLink, image),
date = postPubDateInMillis ?: Clock.System.now().toEpochMilliseconds(),
Expand Down
8 changes: 5 additions & 3 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ ktfmt = "0.44"
kotlininject = "0.6.3"
ksp = "1.9.21-1.0.16"
material_color_utilities = "1.0.0-alpha01"
ksoup = "0.3.1"
ksoup = "0.1.2"
sentry = "0.3.0"
sentry_android = "4.1.1"
sentry_android = "4.2.0"
buildKonfig = "0.15.1"
sqliteAndroid = "3.43.0"
windowSizeClass = "0.3.2"
Expand All @@ -45,6 +45,7 @@ paging = "3.3.0-alpha02-0.4.0"
stately = "2.0.6"
xmlutil = "0.86.3"
ktxml = "0.2.3"
uri = "0.0.16"

[libraries]
compose_runtime = { module = "org.jetbrains.compose.runtime:runtime", version.ref = "compose" }
Expand Down Expand Up @@ -89,7 +90,7 @@ coil_compose = { module = "io.coil-kt:coil-compose", version.ref = "coil" }
kotlininject-compiler = { module = 'me.tatarka.inject:kotlin-inject-compiler-ksp', version.ref = 'kotlininject' }
kotlininject-runtime = { module = 'me.tatarka.inject:kotlin-inject-runtime', version.ref = 'kotlininject' }
material_color_utilities = { module = "dev.sasikanth:material-color-utilities", version.ref = "material_color_utilities" }
ksoup = { module = "com.mohamedrejeb.ksoup:ksoup-html", version.ref = "ksoup" }
ksoup = { module = "com.fleeksoft.ksoup:ksoup", version.ref = "ksoup" }
sentry = { module = "io.sentry:sentry-kotlin-multiplatform", version.ref = "sentry" }
sqliteAndroid = { module = "com.github.requery:sqlite-android", version.ref = "sqliteAndroid" }
windowSizeClass = { module = "dev.chrisbanes.material3:material3-window-size-class-multiplatform", version.ref = "windowSizeClass" }
Expand All @@ -104,6 +105,7 @@ stately-iso-collections = { module = "co.touchlab:stately-iso-collections", vers
xmlutil-core = { module = "io.github.pdvrieze.xmlutil:core", version.ref = "xmlutil" }
xmlutil-serialization = { module = "io.github.pdvrieze.xmlutil:serialization", version.ref = "xmlutil" }
ktxml = { module = "org.kobjects.ktxml:core", version.ref = "ktxml" }
uri = { module = "com.eygraber:uri-kmp", version.ref = "uri" }

[plugins]
android_application = { id = "com.android.application", version.ref = "android_gradle_plugin" }
Expand Down
2 changes: 2 additions & 0 deletions settings.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,5 @@ include(":core:network")
include(":resources:strings")

include(":resources:icons")

include(":thirdparty:readability")
Loading

0 comments on commit 928ca9b

Please sign in to comment.