Skip to content

Commit

Permalink
Refactor feed discovery and avoid an extra HTTP request if the url pr…
Browse files Browse the repository at this point in the history
…ovided is the feed
  • Loading branch information
fguillot committed Oct 22, 2023
1 parent 14e25ab commit e60989f
Show file tree
Hide file tree
Showing 12 changed files with 411 additions and 174 deletions.
19 changes: 12 additions & 7 deletions internal/api/subscription.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ import (
json_parser "encoding/json"
"net/http"

"miniflux.app/v2/internal/config"
"miniflux.app/v2/internal/http/request"
"miniflux.app/v2/internal/http/response/json"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/fetcher"
"miniflux.app/v2/internal/reader/subscription"
"miniflux.app/v2/internal/validator"
)
Expand All @@ -32,14 +34,17 @@ func (h *handler) discoverSubscriptions(w http.ResponseWriter, r *http.Request)
rssbridgeURL = intg.RSSBridgeURL
}

subscriptions, localizedError := subscription.FindSubscriptions(
requestBuilder := fetcher.NewRequestBuilder()
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
requestBuilder.WithUserAgent(subscriptionDiscoveryRequest.UserAgent)
requestBuilder.WithCookie(subscriptionDiscoveryRequest.Cookie)
requestBuilder.WithUsernameAndPassword(subscriptionDiscoveryRequest.Username, subscriptionDiscoveryRequest.Password)
requestBuilder.UseProxy(subscriptionDiscoveryRequest.FetchViaProxy)
requestBuilder.IgnoreTLSErrors(subscriptionDiscoveryRequest.AllowSelfSignedCertificates)

subscriptions, localizedError := subscription.NewSubscriptionFinder(requestBuilder).FindSubscriptions(
subscriptionDiscoveryRequest.URL,
subscriptionDiscoveryRequest.UserAgent,
subscriptionDiscoveryRequest.Cookie,
subscriptionDiscoveryRequest.Username,
subscriptionDiscoveryRequest.Password,
subscriptionDiscoveryRequest.FetchViaProxy,
subscriptionDiscoveryRequest.AllowSelfSignedCertificates,
rssbridgeURL,
)

Expand Down
18 changes: 14 additions & 4 deletions internal/googlereader/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"miniflux.app/v2/internal/integration"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/proxy"
"miniflux.app/v2/internal/reader/fetcher"
mff "miniflux.app/v2/internal/reader/handler"
mfs "miniflux.app/v2/internal/reader/subscription"
"miniflux.app/v2/internal/storage"
Expand Down Expand Up @@ -667,13 +668,22 @@ func (h *handler) quickAddHandler(w http.ResponseWriter, r *http.Request) {
return
}

url := r.Form.Get(ParamQuickAdd)
if !validator.IsValidURL(url) {
json.BadRequest(w, r, fmt.Errorf("googlereader: invalid URL: %s", url))
feedURL := r.Form.Get(ParamQuickAdd)
if !validator.IsValidURL(feedURL) {
json.BadRequest(w, r, fmt.Errorf("googlereader: invalid URL: %s", feedURL))
return
}

subscriptions, localizedError := mfs.FindSubscriptions(url, "", "", "", "", false, false, "")
requestBuilder := fetcher.NewRequestBuilder()
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())

var rssBridgeURL string
if intg, err := h.store.Integration(userID); err == nil && intg != nil && intg.RSSBridgeEnabled {
rssBridgeURL = intg.RSSBridgeURL
}

subscriptions, localizedError := mfs.NewSubscriptionFinder(requestBuilder).FindSubscriptions(feedURL, rssBridgeURL)
if localizedError != nil {
json.ServerError(w, r, localizedError.Error())
return
Expand Down
26 changes: 26 additions & 0 deletions internal/model/feed.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package model // import "miniflux.app/v2/internal/model"

import (
"fmt"
"io"
"math"
"time"

Expand Down Expand Up @@ -144,6 +145,31 @@ type FeedCreationRequest struct {
UrlRewriteRules string `json:"urlrewrite_rules"`
}

type FeedCreationRequestFromSubscriptionDiscovery struct {
Content io.ReadSeeker
ETag string
LastModified string

FeedURL string `json:"feed_url"`
CategoryID int64 `json:"category_id"`
UserAgent string `json:"user_agent"`
Cookie string `json:"cookie"`
Username string `json:"username"`
Password string `json:"password"`
Crawler bool `json:"crawler"`
Disabled bool `json:"disabled"`
NoMediaPlayer bool `json:"no_media_player"`
IgnoreHTTPCache bool `json:"ignore_http_cache"`
AllowSelfSignedCertificates bool `json:"allow_self_signed_certificates"`
FetchViaProxy bool `json:"fetch_via_proxy"`
ScraperRules string `json:"scraper_rules"`
RewriteRules string `json:"rewrite_rules"`
BlocklistRules string `json:"blocklist_rules"`
KeeplistRules string `json:"keeplist_rules"`
HideGlobally bool `json:"hide_globally"`
UrlRewriteRules string `json:"urlrewrite_rules"`
}

// FeedModificationRequest represents the request to update a feed.
type FeedModificationRequest struct {
FeedURL *string `json:"feed_url"`
Expand Down
82 changes: 80 additions & 2 deletions internal/reader/handler/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package handler // import "miniflux.app/v2/internal/reader/handler"

import (
"bytes"
"errors"
"log/slog"
"time"
Expand All @@ -25,6 +26,83 @@ var (
ErrDuplicatedFeed = errors.New("fetcher: duplicated feed")
)

func CreateFeedFromSubscriptionDiscovery(store *storage.Storage, userID int64, feedCreationRequest *model.FeedCreationRequestFromSubscriptionDiscovery) (*model.Feed, *locale.LocalizedErrorWrapper) {
slog.Debug("Begin feed creation process from subscription discovery",
slog.Int64("user_id", userID),
slog.String("feed_url", feedCreationRequest.FeedURL),
)

user, storeErr := store.UserByID(userID)
if storeErr != nil {
return nil, locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
}

if !store.CategoryIDExists(userID, feedCreationRequest.CategoryID) {
return nil, locale.NewLocalizedErrorWrapper(ErrCategoryNotFound, "error.category_not_found")
}

if store.FeedURLExists(userID, feedCreationRequest.FeedURL) {
return nil, locale.NewLocalizedErrorWrapper(ErrDuplicatedFeed, "error.duplicated_feed")
}

subscription, parseErr := parser.ParseFeed(feedCreationRequest.FeedURL, feedCreationRequest.Content)
if parseErr != nil {
return nil, locale.NewLocalizedErrorWrapper(parseErr, "error.unable_to_parse_feed", parseErr)
}

subscription.UserID = userID
subscription.UserAgent = feedCreationRequest.UserAgent
subscription.Cookie = feedCreationRequest.Cookie
subscription.Username = feedCreationRequest.Username
subscription.Password = feedCreationRequest.Password
subscription.Crawler = feedCreationRequest.Crawler
subscription.Disabled = feedCreationRequest.Disabled
subscription.IgnoreHTTPCache = feedCreationRequest.IgnoreHTTPCache
subscription.AllowSelfSignedCertificates = feedCreationRequest.AllowSelfSignedCertificates
subscription.FetchViaProxy = feedCreationRequest.FetchViaProxy
subscription.ScraperRules = feedCreationRequest.ScraperRules
subscription.RewriteRules = feedCreationRequest.RewriteRules
subscription.BlocklistRules = feedCreationRequest.BlocklistRules
subscription.KeeplistRules = feedCreationRequest.KeeplistRules
subscription.UrlRewriteRules = feedCreationRequest.UrlRewriteRules
subscription.EtagHeader = feedCreationRequest.ETag
subscription.LastModifiedHeader = feedCreationRequest.LastModified
subscription.FeedURL = feedCreationRequest.FeedURL
subscription.WithCategoryID(feedCreationRequest.CategoryID)
subscription.CheckedNow()

processor.ProcessFeedEntries(store, subscription, user, true)

if storeErr := store.CreateFeed(subscription); storeErr != nil {
return nil, locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
}

slog.Debug("Created feed",
slog.Int64("user_id", userID),
slog.Int64("feed_id", subscription.ID),
slog.String("feed_url", subscription.FeedURL),
)

requestBuilder := fetcher.NewRequestBuilder()
requestBuilder.WithUsernameAndPassword(feedCreationRequest.Username, feedCreationRequest.Password)
requestBuilder.WithUserAgent(feedCreationRequest.UserAgent)
requestBuilder.WithCookie(feedCreationRequest.Cookie)
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
requestBuilder.UseProxy(feedCreationRequest.FetchViaProxy)
requestBuilder.IgnoreTLSErrors(feedCreationRequest.AllowSelfSignedCertificates)

checkFeedIcon(
store,
requestBuilder,
subscription.ID,
subscription.SiteURL,
subscription.IconURL,
)

return subscription, nil
}

// CreateFeed fetch, parse and store a new feed.
func CreateFeed(store *storage.Storage, userID int64, feedCreationRequest *model.FeedCreationRequest) (*model.Feed, *locale.LocalizedErrorWrapper) {
slog.Debug("Begin feed creation process",
Expand Down Expand Up @@ -68,7 +146,7 @@ func CreateFeed(store *storage.Storage, userID int64, feedCreationRequest *model
return nil, locale.NewLocalizedErrorWrapper(ErrDuplicatedFeed, "error.duplicated_feed")
}

subscription, parseErr := parser.ParseFeed(responseHandler.EffectiveURL(), string(responseBody))
subscription, parseErr := parser.ParseFeed(responseHandler.EffectiveURL(), bytes.NewReader(responseBody))
if parseErr != nil {
return nil, locale.NewLocalizedErrorWrapper(parseErr, "error.unable_to_parse_feed", parseErr)
}
Expand Down Expand Up @@ -188,7 +266,7 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
return localizedError
}

updatedFeed, parseErr := parser.ParseFeed(responseHandler.EffectiveURL(), string(responseBody))
updatedFeed, parseErr := parser.ParseFeed(responseHandler.EffectiveURL(), bytes.NewReader(responseBody))
if parseErr != nil {
localizedError := locale.NewLocalizedErrorWrapper(parseErr, "error.unable_to_parse_feed")

Expand Down
13 changes: 9 additions & 4 deletions internal/reader/parser/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
package parser // import "miniflux.app/v2/internal/reader/parser"

import (
"bytes"
"encoding/xml"
"strings"
"io"

rxml "miniflux.app/v2/internal/reader/xml"
)
Expand All @@ -20,12 +21,16 @@ const (
)

// DetectFeedFormat tries to guess the feed format from input data.
func DetectFeedFormat(data string) string {
if strings.HasPrefix(strings.TrimSpace(data), "{") {
func DetectFeedFormat(r io.ReadSeeker) string {
data := make([]byte, 512)
r.Read(data)

if bytes.HasPrefix(bytes.TrimSpace(data), []byte("{")) {
return FormatJSON
}

decoder := rxml.NewDecoder(strings.NewReader(data))
r.Seek(0, io.SeekStart)
decoder := rxml.NewDecoder(r)

for {
token, _ := decoder.Token()
Expand Down
15 changes: 8 additions & 7 deletions internal/reader/parser/format_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
package parser // import "miniflux.app/v2/internal/reader/parser"

import (
"strings"
"testing"
)

func TestDetectRDF(t *testing.T) {
data := `<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://my.netscape.com/rdf/simple/0.9/"></rdf:RDF>`
format := DetectFeedFormat(data)
format := DetectFeedFormat(strings.NewReader(data))

if format != FormatRDF {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatRDF)
Expand All @@ -18,7 +19,7 @@ func TestDetectRDF(t *testing.T) {

func TestDetectRSS(t *testing.T) {
data := `<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`
format := DetectFeedFormat(data)
format := DetectFeedFormat(strings.NewReader(data))

if format != FormatRSS {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatRSS)
Expand All @@ -27,7 +28,7 @@ func TestDetectRSS(t *testing.T) {

func TestDetectAtom10(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`
format := DetectFeedFormat(data)
format := DetectFeedFormat(strings.NewReader(data))

if format != FormatAtom {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom)
Expand All @@ -36,7 +37,7 @@ func TestDetectAtom10(t *testing.T) {

func TestDetectAtom03(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?><feed version="0.3" xmlns="http://purl.org/atom/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en"></feed>`
format := DetectFeedFormat(data)
format := DetectFeedFormat(strings.NewReader(data))

if format != FormatAtom {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom)
Expand All @@ -45,7 +46,7 @@ func TestDetectAtom03(t *testing.T) {

func TestDetectAtomWithISOCharset(t *testing.T) {
data := `<?xml version="1.0" encoding="ISO-8859-15"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`
format := DetectFeedFormat(data)
format := DetectFeedFormat(strings.NewReader(data))

if format != FormatAtom {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom)
Expand All @@ -59,7 +60,7 @@ func TestDetectJSON(t *testing.T) {
"title" : "Example"
}
`
format := DetectFeedFormat(data)
format := DetectFeedFormat(strings.NewReader(data))

if format != FormatJSON {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
Expand All @@ -70,7 +71,7 @@ func TestDetectUnknown(t *testing.T) {
data := `
<!DOCTYPE html> <html> </html>
`
format := DetectFeedFormat(data)
format := DetectFeedFormat(strings.NewReader(data))

if format != FormatUnknown {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown)
Expand Down
19 changes: 12 additions & 7 deletions internal/reader/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ package parser // import "miniflux.app/v2/internal/reader/parser"

import (
"errors"
"strings"
"io"

"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/atom"
Expand All @@ -17,16 +17,21 @@ import (
var ErrFeedFormatNotDetected = errors.New("parser: unable to detect feed format")

// ParseFeed analyzes the input data and returns a normalized feed object.
func ParseFeed(baseURL, data string) (*model.Feed, error) {
switch DetectFeedFormat(data) {
func ParseFeed(baseURL string, r io.ReadSeeker) (*model.Feed, error) {
r.Seek(0, io.SeekStart)
switch DetectFeedFormat(r) {
case FormatAtom:
return atom.Parse(baseURL, strings.NewReader(data))
r.Seek(0, io.SeekStart)
return atom.Parse(baseURL, r)
case FormatRSS:
return rss.Parse(baseURL, strings.NewReader(data))
r.Seek(0, io.SeekStart)
return rss.Parse(baseURL, r)
case FormatJSON:
return json.Parse(baseURL, strings.NewReader(data))
r.Seek(0, io.SeekStart)
return json.Parse(baseURL, r)
case FormatRDF:
return rdf.Parse(baseURL, strings.NewReader(data))
r.Seek(0, io.SeekStart)
return rdf.Parse(baseURL, r)
default:
return nil, ErrFeedFormatNotDetected
}
Expand Down
Loading

0 comments on commit e60989f

Please sign in to comment.