From e93aadc87fcc57da8090835af91843e32cf13cfd Mon Sep 17 00:00:00 2001 From: Sasikanth Miriyampalli Date: Wed, 10 Jan 2024 09:05:59 +0530 Subject: [PATCH] Save raw content of posts in db (#215) * Add column in post table for raw post content * Add raw content param to `PostPayload` * Save raw content when parsing post contents --- .../reader/core/model/remote/PostPayload.kt | 1 + .../core/network/parser/AtomContentParser.kt | 6 ++-- .../core/network/parser/RssContentParser.kt | 5 ++- .../rss/reader/repository/RssRepository.kt | 1 + .../src/commonMain/sqldelight/databases/9.db | Bin 0 -> 86016 bytes .../dev/sasikanth/rss/reader/database/Post.sq | 7 ++-- .../commonMain/sqldelight/migrations/8.sqm | 1 + .../sasikanth/rss/reader/FeedParserTest.kt | 34 ++++++++++++++++++ 8 files changed, 49 insertions(+), 6 deletions(-) create mode 100644 shared/src/commonMain/sqldelight/databases/9.db create mode 100644 shared/src/commonMain/sqldelight/migrations/8.sqm diff --git a/core/model/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/model/remote/PostPayload.kt b/core/model/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/model/remote/PostPayload.kt index ae9e49530..7ce960ffc 100644 --- a/core/model/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/model/remote/PostPayload.kt +++ b/core/model/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/model/remote/PostPayload.kt @@ -19,6 +19,7 @@ data class PostPayload( val title: String, val link: String, val description: String, + val rawContent: String?, val imageUrl: String?, val date: Long, val commentsLink: String? diff --git a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/AtomContentParser.kt b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/AtomContentParser.kt index b99130988..b2f406ff6 100644 --- a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/AtomContentParser.kt +++ b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/AtomContentParser.kt @@ -95,6 +95,7 @@ internal object AtomContentParser : ContentParser() { var title: String? = null var link: String? = null var content: String? = null + var rawContent: String? = null var date: String? = null var image: String? = null @@ -113,7 +114,7 @@ internal object AtomContentParser : ContentParser() { } } TAG_CONTENT -> { - val rawContent = readTagText(tagName, parser) + rawContent = readTagText(tagName, parser).trimIndent() KsoupHtmlParser( handler = HtmlContentParser { @@ -144,8 +145,9 @@ internal object AtomContentParser : ContentParser() { return PostPayload( title = FeedParser.cleanText(title, decodeUrlEncoding = true).orEmpty(), - description = FeedParser.cleanTextCompact(content, decodeUrlEncoding = true).orEmpty(), link = FeedParser.cleanText(link)!!, + description = FeedParser.cleanTextCompact(content, decodeUrlEncoding = true).orEmpty(), + rawContent = rawContent, imageUrl = FeedParser.safeUrl(hostLink, image), date = postPubDateInMillis ?: Clock.System.now().toEpochMilliseconds(), commentsLink = null diff --git a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/RssContentParser.kt b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/RssContentParser.kt index a709bfdf0..421bf4dde 100644 --- a/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/RssContentParser.kt +++ b/core/network/src/commonMain/kotlin/dev/sasikanth/rss/reader/core/network/parser/RssContentParser.kt @@ -94,6 +94,7 @@ internal object RssContentParser : ContentParser() { var title: String? = null var link: String? = null var description: String? = null + var rawContent: String? = null var date: String? = null var image: String? = null var commentsLink: String? = null @@ -114,6 +115,7 @@ internal object RssContentParser : ContentParser() { } name == TAG_DESCRIPTION || name == TAG_CONTENT_ENCODED -> { description = readTagText(name, parser) + rawContent = description.trimIndent() } name == TAG_PUB_DATE -> { date = readTagText(name, parser) @@ -149,8 +151,9 @@ internal object RssContentParser : ContentParser() { return PostPayload( title = FeedParser.cleanText(title, decodeUrlEncoding = true).orEmpty(), - description = FeedParser.cleanTextCompact(description, decodeUrlEncoding = true).orEmpty(), link = FeedParser.cleanText(link)!!, + description = FeedParser.cleanTextCompact(description, decodeUrlEncoding = true).orEmpty(), + rawContent = rawContent, imageUrl = FeedParser.safeUrl(hostLink, image), date = postPubDateInMillis ?: Clock.System.now().toEpochMilliseconds(), commentsLink = commentsLink?.trim() diff --git a/shared/src/commonMain/kotlin/dev/sasikanth/rss/reader/repository/RssRepository.kt b/shared/src/commonMain/kotlin/dev/sasikanth/rss/reader/repository/RssRepository.kt index 6ebb8700b..fe04908db 100644 --- a/shared/src/commonMain/kotlin/dev/sasikanth/rss/reader/repository/RssRepository.kt +++ b/shared/src/commonMain/kotlin/dev/sasikanth/rss/reader/repository/RssRepository.kt @@ -95,6 +95,7 @@ class RssRepository( link = post.link, commnetsLink = post.commentsLink, feedLink = feedPayload.link, + rawContent = post.rawContent ) } } diff --git a/shared/src/commonMain/sqldelight/databases/9.db b/shared/src/commonMain/sqldelight/databases/9.db new file mode 100644 index 0000000000000000000000000000000000000000..e964145b2e31f826a41aad051040bb332b246159 GIT binary patch literal 86016 zcmeI5O>Y~=8OKSza4E{N?AR=us0Ic$QjrlYn>7TtFocAnRx%TsbopZ0$bndsYnc$= zOs-?+1t=)PIpp3C&|7jza|wDXdd#7xev)2_Ab>BOXJ&_-<%*P&G(aK$2}|0Y_viQg z=b2gWST6lB*Kln1sMBj%4!aRJ7YGIe?=cn#@XJ6T%D>d-Eq*blZurljc0K9xTwuNT zmw76@{EEsfFTXnb&f=#_GYembzc&tzo1rh}znc%veL4GTHa3$AeighK_-6TsNzDZU zgSm@FY;7$#c-OHWHtk2YU9a|St5+ZKZNp`oGy^}6JU>}$d^z1}}?Pbfi3zae}7WT5+-3vzS>eb-jf^QUc%dzy1 z=sq%e9F)BpZ`4^fUp9BmqCS8md)VyM4%k+%u$36hzhT6#TnP_K<6UaKx&7SB7R!}% z&b3?1XO&Vmzsq*YrHy!m@lV@o*-3HLY_tzpC7;c2oA=FandWp3>~`Z5d-Jl>YwY)| z*5ycIP&;p|n9=azGfLaHd)-doQ7(Ao^sDKevRSNV^Ch!braWbPF}q9lO)DM|bvX*E z%L;i>II?B#X7j_uBFpaP3q_NF74+8PPQz*1NmjS}wO*s^G&=1hBdZeZUOHDXODx{D zpRK#4=+aY?5=7}+5i`F%u+B*<`%|r~+}-B-Iq7DMtaI~w^3~-Ymb@f zChimqcXSsr&tN;vy6}h??B3Z>Y$F=%H`;akxqu>MO~zD*X$!8_gotIsW2MwM;*_2v zaSSp`($hCx3fgq3j_aB7#`HX`xKGbic&eVY*5YDr9-&~FKBm|A&PXd#Q>~ElIr@V1uW;Uyhm;x*WA$r^qoI3B}f; zK}$$aR2eCeZo?iWjujvgQc|4iBo3b}7%MlT;g@gg)3DZQJ9gWtKJD@q!k!diyrsqh z%Z>~dV-c~@vi9vtugQ~n!%5b&t-6{OWN==DmWQ3rLCfkL*mbtK$x>3Ye4X(5TNV6C z!70hqXk3Y?TRAut&BQosomPtj>C>cjGxQ|a5Ewek1UuO9N|ElD2v`5n{Wxh;hwa5{rx#7c& zrrwMim%j}PQQlO~4wVmKM9$YVU;KB{m0X!!r`~m>vPqS#tP!zDZG_eMyp8m%XBin9 zhgQ)xgYOB&mR%W=;L6MKKJK*aE)Urv z?)e(5ab%8NPdh&L)bR1n>o(eLy}GDn@lAWvw%V2Ms9nx!ZW<@DyPBf!^M_A@M(on1 z;L8ocN$ErVobm8dsqB{*`SCQ=VEEjfu0pbtf|K8ultaPl=$r{o)R#8l;vYFNB8h7= zp;+e9BoQmSsws6fE1$5J7MAFOp7ikn0T2KI5C8!X009sH0T2KI5C8!XIJE>KWEr0S zPpyAp7zls>2!H?xfB*=900@8p2!H?x5COdZhYmmh1V8`;KmY_l00ck)1V8`;K;ZNf z!2AEx?_*d90w4eaAOHd&00JNY0w4eaAOHe*{|^m-00@8p2!H?xfB*=900@8p2!O!p zCxG|=r{Bl05ClK~1V8`;KmY_l00ck)1V8`;@ca)AfB*=900@8p2!H?xfB*=900@A< z=_i2a|I_bdSO@|j00JNY0w4eaAOHd&00JNY0`%|yXO{mRSboL7;R6C700JNY0w4ea zAOHd&00JNY0w8c22t*>Yfwx>kW`nbVGkE`h8r%vSKmY_l00ck)1V8`;KmY_l00cl_ zIsyFt|LJ^?3j!bj0w4eaAOHd&00JNY0w4ear-8uI@(+Op<1lcxwEW%D+Szv&KV6zx z_&WT(acJBOeKG&td~oi|*;li%nN;wr;LX4{fj{sH^m!UJVmEFC2j?B@VbgwO+x2SS zwtBV4)mo?R*lp*Cq%)bKnJ$~GoZiZrZ1so|tMNviW%Ffo*DSKT#q6DQ@d5k5e2`?d z>-;N~7|dNXVry%`!MnbG8};XUi~M$Ra(_H`HQu-P8}%e}>|QI$y8G=;QtwmGP9)e~ zwtTx#DYIf>FT35nV8pIo4Gu2&Mp3sMOCJW^M<$PhvL_q;u-T~{u&rESD>0aV!-!qE z5+0PsyVQDf`?;4bmMiI;%UR23l~Ojp%XZ48jd+CdPuptQNpaO|v=3M%pUrQZ_swmY z=5!A1cH<8u7Bk8tLdGxS*&LBC9_zj zJY{<^yGu@;Ry-o=a{p6ZR>+IOku7sKn;#|?S#~#HD4L{_g5Fx(X*f+g$?A5$)@yW~ zMyH)*WL1LQOXn(PiN)LYvvs!=U3yAVf+&3}V&=C8);Vcqf2x(0yW3pPC$>@+cde8+ zV^*rfp_LjgRx-;>44-W>%@oZHH$zX^yKQnvGQN{UmR#Nm?#W93FP{PJym8oG;U z^=X%{5cZ@9<1IB7SaxKv7>kIFmbGtJdQG0p8&0yGZPnGRAcONFv^?x|4q8_4z^=2+ zO_q|HTqG3QkF;M&n9E-O9nKXeP#4>$F;YSJ9_Q>t^Ult|2nSkxhKCQ?#}b zHde0j(5I%0h$v6n${M>qiMU?I$9q)$IHK3xD4;7()t)l$7w;R!%33u1c~#ensyKPn zqY^)3%p(=31-i(J=G|O6<1WUkmm^nVuWRMXoMfts&9`A*chUsO)9yq}l`L1fydwWg zXD7IeOMGq0l=an+Vv4r@B}Lw-bCE3srGk$vp$~*|{DWDSahJxm-WU!?tWdHES9M}h zqf#C}N?}qm86X1Qi@A9t7L5j99*7+sDUQBEYJm}(WJ;l}nq2qG=KV6u7x-5tm!mzJ zW-0S!Dyv0~D9sHY?ymJ_)VTa@P>AxT>RV)LbVSbAG++F8(v@79U8jONQrV=+R@R7E zq;~&meBMTS*0YR!z8G3X+YG)Zq{oEOvd)#c!#6(p4Bz*Wbu!k{v~;IjCSA&;x6Mdm zH57{7iUtEBl=q)B8;)JIo;n?IUnTz46n!@apUnz?z4&mr1E-efJpK}Wabv||=l2NL zjhE)JPJG;H*SmVeXyPkG@?5W}7o!4!&+j@0T%i^2%rfsz=-BG)o)7&&p zWOp@1-{%jX1dZ6GOTm{Lf|JsR`Z?p_rBc~1FY@DQsKM~LJ6(ljCj}?JFDZwD)zLW< zoTx8t!o@#wVnhpKk z@AD=vhz|&W00{iQ66jqrVyRScusnRJq7J9n|`eSU!Eh@^{?dvz;KJ{HvfM4k!yct!U6=25oN87#eD zZayVlnB1>hVsgLo3>JS=_d9+L>n;5pmfjIRS~Yh2#^gSK?m4VC^>bKyw{);-{Qmsp zah&`dR_xdE9M-dO=vr@XKi+d#TE6cbmOPcjU)gwG>CxL5_btu|PGkYUfJ|gMt(S$PUZ9iBmgH^dbHJ zzu@wJ0{jm?AOHd&00JNY0w4eaAOHd&00JNY0;h<;;_OQBBfHn<|3Z2W|Nq}9ax6>% z0T2KI5C8!X009sH0T2KI5C8#50RR6VlmG%C00JNY0w4eaAOHd&00JNY0&g8&GC00@8p2!H?xfB*=900_Jm1oYqk_g~5Av5f!# literal 0 HcmV?d00001 diff --git a/shared/src/commonMain/sqldelight/dev/sasikanth/rss/reader/database/Post.sq b/shared/src/commonMain/sqldelight/dev/sasikanth/rss/reader/database/Post.sq index 2d2461b9c..8c36f92e4 100644 --- a/shared/src/commonMain/sqldelight/dev/sasikanth/rss/reader/database/Post.sq +++ b/shared/src/commonMain/sqldelight/dev/sasikanth/rss/reader/database/Post.sq @@ -11,16 +11,17 @@ CREATE TABLE post( bookmarked INTEGER AS Boolean NOT NULL DEFAULT 0, commentsLink TEXT DEFAULT NULL, read INTEGER AS Boolean NOT NULL DEFAULT 0, + rawContent TEXT, FOREIGN KEY(feedLink) REFERENCES feed(link) ON DELETE CASCADE ); CREATE INDEX post_feed_link_index ON post(feedLink); upsert: -INSERT INTO post(title, description, imageUrl, date, feedLink, link, commentsLink) -VALUES (:title, :description, :imageUrl, :date, :feedLink, :link, :commnetsLink) +INSERT INTO post(title, description, rawContent, imageUrl, date, feedLink, link, commentsLink) +VALUES (:title, :description, :rawContent, :imageUrl, :date, :feedLink, :link, :commnetsLink) ON CONFLICT(link) DO -UPDATE SET title = excluded.title, description = excluded.description, imageUrl = excluded.imageUrl, date = excluded.date; +UPDATE SET title = excluded.title, description = excluded.description, rawContent = excluded.rawContent, imageUrl = excluded.imageUrl, date = excluded.date; count: SELECT COUNT(*) FROM post diff --git a/shared/src/commonMain/sqldelight/migrations/8.sqm b/shared/src/commonMain/sqldelight/migrations/8.sqm new file mode 100644 index 000000000..5ca98c78f --- /dev/null +++ b/shared/src/commonMain/sqldelight/migrations/8.sqm @@ -0,0 +1 @@ +ALTER TABLE post ADD COLUMN rawContent TEXT; diff --git a/shared/src/commonTest/kotlin/dev/sasikanth/rss/reader/FeedParserTest.kt b/shared/src/commonTest/kotlin/dev/sasikanth/rss/reader/FeedParserTest.kt index e5ff374a8..aa26a64fa 100644 --- a/shared/src/commonTest/kotlin/dev/sasikanth/rss/reader/FeedParserTest.kt +++ b/shared/src/commonTest/kotlin/dev/sasikanth/rss/reader/FeedParserTest.kt @@ -42,6 +42,7 @@ class FeedParserTest { title = "Post with image", link = "https://example.com/first-post", description = "First post description.", + rawContent = "First post description.", imageUrl = "https://example.com/first-post-media-url", date = 1685005200000, commentsLink = null @@ -50,6 +51,7 @@ class FeedParserTest { title = "Post without image", link = "https://example.com/second-post", description = "Second post description.", + rawContent = "Second post description.", imageUrl = null, date = 1684999800000, commentsLink = null @@ -58,6 +60,7 @@ class FeedParserTest { title = "Podcast post", link = "https://example.com/third-post", description = "Third post description.", + rawContent = "Third post description.", imageUrl = null, date = 1684924200000, commentsLink = null @@ -66,6 +69,7 @@ class FeedParserTest { title = "Post with enclosure image", link = "https://example.com/fourth-post", description = "Fourth post description.", + rawContent = "Fourth post description.", imageUrl = "https://example.com/enclosure-image", date = 1684924200000, commentsLink = null @@ -74,6 +78,12 @@ class FeedParserTest { title = "Post with description and encoded content", link = "https://example.com/fifth-post", description = "Fourth post description in HTML syntax.", + rawContent = + """ +

Fourth post description in HTML syntax.

+ encoded image + """ + .trimIndent(), imageUrl = "https://example.com/encoded-image", date = 1684924200000, commentsLink = null @@ -82,6 +92,7 @@ class FeedParserTest { title = "Post with relative path image", link = "https://example.com/post-with-relative-image", description = "Relative image post description.", + rawContent = "Relative image post description.", imageUrl = "https://example.com/relative-media-url", date = 1685005200000, commentsLink = null @@ -90,6 +101,7 @@ class FeedParserTest { title = "Post with comments", link = "https://example.com/post-with-comments", description = "Really long post with comments.", + rawContent = "Really long post with comments.", imageUrl = null, date = 1685005200000, commentsLink = "https://example/post-with-comments/comments" @@ -120,6 +132,12 @@ class FeedParserTest { title = "Post with image", link = "https://example.com/first-post", description = "Post summary with an image.", + rawContent = + """ + First Image +

Post summary with an image.

+ """ + .trimIndent(), imageUrl = "https://example.com/image.jpg", date = 1685008800000, commentsLink = null @@ -128,6 +146,11 @@ class FeedParserTest { title = "Second post", link = "https://example.com/second-post", description = "Post summary of the second post.", + rawContent = + """ +

Post summary of the second post.

+ """ + .trimIndent(), imageUrl = null, date = 1684917000000, commentsLink = null @@ -136,6 +159,11 @@ class FeedParserTest { title = "Post without image", link = "https://example.com/third-post", description = "Post summary of the third post. click here.", + rawContent = + """ +

Post summary of the third post. click here.

+ """ + .trimIndent(), imageUrl = null, date = 1684936800000, commentsLink = null @@ -144,6 +172,12 @@ class FeedParserTest { title = "Post with relative image", link = "https://example.com/relative-image-post", description = "Post summary with an image.", + rawContent = + """ + Relative Image +

Post summary with an image.

+ """ + .trimIndent(), imageUrl = "https://example.com/resources/image.jpg", date = 1685008800000, commentsLink = null