Skip to content

Commit

Permalink
Use ktxml for parsing feed XML (#212)
Browse files Browse the repository at this point in the history
* Add ktxml dependency

* Add common RSS and Atom content parsers

* Add common feed parser

For the time being the feed parser interface is converted to open class, until the platform specific feed parsers are removed. After that we can change this to normal class

* Stop providing platform specific feed parsers

* Remove platform specific feed parsers

* Remove open modifier for `FeedParser`

* Convert content parsers to Kotlin objects

* Rename `xmlContent` param name to `feedContent`
  • Loading branch information
msasikanth authored Jan 8, 2024
1 parent f388291 commit 9a25c14
Show file tree
Hide file tree
Showing 17 changed files with 110 additions and 716 deletions.
1 change: 1 addition & 0 deletions core/network/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ kotlin {
// TODO: Extract logging abstraction into separate module
implementation(libs.napier)
implementation(libs.sentry)
implementation(libs.ktxml)
}
commonTest.dependencies { implementation(libs.kotlin.test) }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,13 @@

package dev.sasikanth.rss.reader.core.network.di

import dev.sasikanth.rss.reader.core.network.parser.AndroidFeedParser
import dev.sasikanth.rss.reader.core.network.parser.FeedParser
import dev.sasikanth.rss.reader.di.scopes.AppScope
import io.ktor.client.HttpClient
import io.ktor.client.engine.okhttp.OkHttp
import me.tatarka.inject.annotations.Provides

actual interface NetworkComponent {

val AndroidFeedParser.bind: FeedParser
@Provides @AppScope get() = this

@Provides
@AppScope
fun providesHttpClient(): HttpClient {
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class FeedFetcher(private val httpClient: HttpClient, private val feedParser: Fe
throw UnsupportedOperationException()
}
} else {
val feedPayload = feedParser.parse(xmlContent = responseContent, feedUrl = url)
val feedPayload = feedParser.parse(feedContent = responseContent, feedUrl = url)
FeedFetchResult.Success(feedPayload)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

package dev.sasikanth.rss.reader.core.network.parser

import android.net.Uri
import com.mohamedrejeb.ksoup.html.parser.KsoupHtmlOptions
import com.mohamedrejeb.ksoup.html.parser.KsoupHtmlParser
import dev.sasikanth.rss.reader.core.model.remote.FeedPayload
Expand All @@ -32,23 +31,24 @@ import dev.sasikanth.rss.reader.core.network.parser.FeedParser.Companion.TAG_PUB
import dev.sasikanth.rss.reader.core.network.parser.FeedParser.Companion.TAG_SUBTITLE
import dev.sasikanth.rss.reader.core.network.parser.FeedParser.Companion.TAG_TITLE
import dev.sasikanth.rss.reader.core.network.parser.FeedParser.Companion.TAG_UPDATED
import io.ktor.http.Url
import kotlinx.datetime.Clock
import org.xmlpull.v1.XmlPullParser
import org.kobjects.ktxml.api.EventType
import org.kobjects.ktxml.api.XmlPullParser

internal class AndroidAtomParser(private val parser: XmlPullParser, private val feedUrl: String) :
Parser() {
internal object AtomContentParser : ContentParser() {

override fun parse(): FeedPayload {
parser.require(XmlPullParser.START_TAG, namespace, TAG_ATOM_FEED)
override fun parse(feedUrl: String, parser: XmlPullParser): FeedPayload {
parser.require(EventType.START_TAG, parser.namespace, TAG_ATOM_FEED)

val posts = mutableListOf<PostPayload?>()

var title: String? = null
var description: String? = null
var link: String? = null

while (parser.next() != XmlPullParser.END_TAG) {
if (parser.eventType != XmlPullParser.START_TAG) continue
while (parser.next() != EventType.END_TAG) {
if (parser.eventType != EventType.START_TAG) continue
when (val name = parser.name) {
TAG_TITLE -> {
title = readTagText(name, parser)
Expand All @@ -70,30 +70,36 @@ internal class AndroidAtomParser(private val parser: XmlPullParser, private val
}
}

val domain = Uri.parse(link).host!!
val iconUrl = FeedParser.feedIcon(domain)
val domain = Url(link!!)
val host =
if (domain.host != "localhost") {
domain.host
} else {
throw NullPointerException("Unable to get host domain")
}
val iconUrl = FeedParser.feedIcon(host)

return FeedPayload(
name = FeedParser.cleanText(title ?: link, decodeUrlEncoding = true)!!,
description = FeedParser.cleanText(description, decodeUrlEncoding = true).orEmpty(),
icon = iconUrl,
homepageLink = link!!,
homepageLink = link,
link = feedUrl,
posts = posts.filterNotNull()
)
}

private fun readAtomEntry(parser: XmlPullParser, hostLink: String): PostPayload? {
parser.require(XmlPullParser.START_TAG, null, "entry")
parser.require(EventType.START_TAG, null, "entry")

var title: String? = null
var link: String? = null
var content: String? = null
var date: String? = null
var image: String? = null

while (parser.next() != XmlPullParser.END_TAG) {
if (parser.eventType != XmlPullParser.START_TAG) continue
while (parser.next() != EventType.END_TAG) {
if (parser.eventType != EventType.START_TAG) continue

when (val tagName = parser.name) {
TAG_TITLE -> {
Expand Down Expand Up @@ -148,13 +154,13 @@ internal class AndroidAtomParser(private val parser: XmlPullParser, private val

private fun readAtomLink(tagName: String, parser: XmlPullParser): String? {
var link: String? = null
parser.require(XmlPullParser.START_TAG, namespace, tagName)
val relType = parser.getAttributeValue(namespace, ATTR_REL)
parser.require(EventType.START_TAG, parser.namespace, tagName)
val relType = parser.getAttributeValue(parser.namespace, ATTR_REL)
if (relType == ATTR_VALUE_ALTERNATE || relType.isNullOrBlank()) {
link = parser.getAttributeValue(namespace, ATTR_HREF)
link = parser.getAttributeValue(parser.namespace, ATTR_HREF)
}
parser.nextTag()
parser.require(XmlPullParser.END_TAG, namespace, tagName)
parser.require(EventType.END_TAG, parser.namespace, tagName)
return link
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2023 Sasikanth Miriyampalli
* Copyright 2024 Sasikanth Miriyampalli
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,43 +17,45 @@
package dev.sasikanth.rss.reader.core.network.parser

import dev.sasikanth.rss.reader.core.model.remote.FeedPayload
import org.xmlpull.v1.XmlPullParser
import org.kobjects.ktxml.api.EventType
import org.kobjects.ktxml.api.XmlPullParser

abstract class Parser {
abstract class ContentParser {

val namespace: String? = null

abstract fun parse(): FeedPayload
abstract fun parse(feedUrl: String, parser: XmlPullParser): FeedPayload

fun readAttrText(attrName: String, parser: XmlPullParser): String? {
val url = parser.getAttributeValue(namespace, attrName)
val url = parser.getAttributeValue(parser.namespace, attrName)
skip(parser)
return url
}

fun readTagText(tagName: String, parser: XmlPullParser): String {
parser.require(XmlPullParser.START_TAG, namespace, tagName)
parser.require(EventType.START_TAG, parser.namespace, tagName)
val title = readText(parser)
parser.require(XmlPullParser.END_TAG, namespace, tagName)
parser.require(EventType.END_TAG, parser.namespace, tagName)
return title
}

private fun readText(parser: XmlPullParser): String {
var result = ""
if (parser.next() == XmlPullParser.TEXT) {
if (parser.next() == EventType.TEXT) {
result = parser.text
parser.nextTag()
}
return result
}

fun skip(parser: XmlPullParser) {
parser.require(XmlPullParser.START_TAG, namespace, null)
parser.require(EventType.START_TAG, parser.namespace, null)
var depth = 1
while (depth != 0) {
when (parser.next()) {
XmlPullParser.END_TAG -> depth--
XmlPullParser.START_TAG -> depth++
EventType.END_TAG -> depth--
EventType.START_TAG -> depth++
else -> {
// no-op
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,49 @@
package dev.sasikanth.rss.reader.core.network.parser

import dev.sasikanth.rss.reader.core.model.remote.FeedPayload
import dev.sasikanth.rss.reader.di.scopes.AppScope
import dev.sasikanth.rss.reader.exceptions.XmlParsingError
import dev.sasikanth.rss.reader.util.DispatchersProvider
import dev.sasikanth.rss.reader.util.decodeUrlEncodedString
import io.github.aakira.napier.LogLevel
import io.github.aakira.napier.log
import io.ktor.http.URLBuilder
import io.ktor.http.URLProtocol
import io.ktor.http.set

interface FeedParser {
import kotlinx.coroutines.withContext
import me.tatarka.inject.annotations.Inject
import org.kobjects.ktxml.api.XmlPullParserException
import org.kobjects.ktxml.mini.MiniXmlPullParser

@Inject
@AppScope
class FeedParser(private val dispatchersProvider: DispatchersProvider) {

suspend fun parse(feedContent: String, feedUrl: String): FeedPayload {
return try {
withContext(dispatchersProvider.io) {
// Currently MiniXmlPullParser fails to parse XML if it contains
// the <?xml ?> tag in the first line. So we are removing it until
// the issue gets resolved.
// https://github.com/kobjects/ktxml/issues/5
val xmlDeclarationPattern = Regex("<\\?xml .*\\?>")
val parser =
MiniXmlPullParser(source = xmlDeclarationPattern.replaceFirst(feedContent, "").iterator())

parser.nextTag()

return@withContext when (parser.name) {
RSS_TAG -> RssContentParser.parse(feedUrl, parser)
ATOM_TAG -> AtomContentParser.parse(feedUrl, parser)
HTML_TAG -> throw HtmlContentException()
else -> throw UnsupportedOperationException("Unknown feed type: ${parser.name}")
}
}
} catch (e: XmlPullParserException) {
log(LogLevel.ERROR, throwable = e) { "Failed to parse the XML" }
throw XmlParsingError(e.stackTraceToString())
}
}

companion object {
const val RSS_TAG = "rss"
Expand Down Expand Up @@ -103,8 +140,6 @@ interface FeedParser {
return pattern.containsMatchIn(url)
}
}

suspend fun parse(xmlContent: String, feedUrl: String): FeedPayload
}

internal class HtmlContentException : Exception()
Loading

0 comments on commit 9a25c14

Please sign in to comment.