Skip to content

Commit

Permalink
Try to fetch feed link from HTML when XML parse error occurs
Browse files Browse the repository at this point in the history
There are scenarios where XML parsers fails to properly identify if the given document starts with HTML tag and just fails immediately with XML parse error. So, we will just try one final time to see if it's a HTML using Ksoup and try and fetch the URL
  • Loading branch information
msasikanth committed Sep 16, 2023
1 parent 4d5ee10 commit c51c858
Showing 1 changed file with 29 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package dev.sasikanth.rss.reader.network

import com.mohamedrejeb.ksoup.html.parser.KsoupHtmlHandler
import com.mohamedrejeb.ksoup.html.parser.KsoupHtmlParser
import dev.sasikanth.rss.reader.utils.XmlParsingError
import io.ktor.client.HttpClient
import io.ktor.client.request.get
import io.ktor.client.statement.HttpResponse
Expand Down Expand Up @@ -98,21 +99,33 @@ class FeedFetcher(private val httpClient: HttpClient, private val feedParser: Fe
return try {
val feedPayload = feedParser.parse(xmlContent = responseContent, feedUrl = url)
FeedFetchResult.Success(feedPayload)
} catch (e: HtmlContentException) {
val newUrl = fetchFeedLinkFromHtmlIfExists(responseContent)
val host = URLBuilder(url).build().host
val rootUrl = "https://$host"
val feedUrl = FeedParser.safeUrl(rootUrl, newUrl)

if (!feedUrl.isNullOrBlank()) {
fetch(feedUrl)
} else {
throw UnsupportedOperationException()
} catch (e: Exception) {
when (e) {
// There are situation where XML parsers fail to identify if it's
// a HTML document and fail, so trying to fetch link with HTML one
// last time just to be safe if it fails with XML parsing issue.
is HtmlContentException,
is XmlParsingError -> {
val feedUrl = fetchFeedLinkFromHtmlIfExists(responseContent, url)
if (!feedUrl.isNullOrBlank()) {
fetch(feedUrl)
} else {
if (e is XmlParsingError) {
throw e
} else {
throw UnsupportedOperationException()
}
}
}
else -> throw e
}
}
}

private suspend fun fetchFeedLinkFromHtmlIfExists(htmlContent: String): String? {
private suspend fun fetchFeedLinkFromHtmlIfExists(
htmlContent: String,
originalUrl: String
): String? {
return suspendCoroutine { continuation ->
var link: String? = null
KsoupHtmlParser(
Expand All @@ -134,7 +147,11 @@ class FeedFetcher(private val httpClient: HttpClient, private val feedParser: Fe
}

override fun onEnd() {
continuation.resume(link)
val host = URLBuilder(originalUrl).build().host
val rootUrl = "https://$host"
val feedUrl = FeedParser.safeUrl(rootUrl, link)

continuation.resume(feedUrl)
}
}
)
Expand Down

0 comments on commit c51c858

Please sign in to comment.