-
Notifications
You must be signed in to change notification settings - Fork 736
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use bilibiliapi instead of web scraping to obtain the video duration … #2852
Conversation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How do I test this change? Can you provide an example of feed URL?
@@ -579,36 +580,45 @@ func fetchBilibiliWatchTime(websiteURL string) (int, error) { | |||
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout()) | |||
requestBuilder.WithProxy(config.Opts.HTTPClientProxy()) | |||
|
|||
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL)) | |||
bilibiliVideoId := bilibiliVideoIdRegex.FindStringSubmatch(websiteURL) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This function is getting too long. It would be better to extract this logic in a separate function.
bilibiliVideoId := bilibiliVideoIdRegex.FindStringSubmatch(websiteURL) | |
var ( | |
bilibiliURLRegex = regexp.MustCompile(`^https?://(?:www\.)?bilibili\.com`) | |
bilibiliVideoIdRegex = regexp.MustCompile(`(?i)/video/(?:av(\d+)|BV([a-zA-Z0-9]+))`) | |
) | |
func extractBilibiliVideoID(websiteURL string) (string, string, error) { | |
if !bilibiliURLRegex.MatchString(websiteURL) { | |
return "", "", fmt.Errorf("not a valid Bilibili URL: %s", websiteURL) | |
} | |
matches := bilibiliVideoIdRegex.FindStringSubmatch(websiteURL) | |
if matches == nil { | |
return "", "", fmt.Errorf("no video ID found in URL: %s", websiteURL) | |
} | |
if matches[1] != "" { | |
return "aid", matches[1], nil | |
} | |
if matches[2] != "" { | |
return "bvid", matches[2], nil | |
} | |
return "", "", fmt.Errorf("unexpected regex match result for URL: %s", websiteURL) | |
} | |
func fetchBilibiliWatchTime(websiteURL string) (int, error) { | |
// ... (other parts of the function) | |
idType, videoID, err := extractBilibiliVideoID(websiteURL) | |
if err != nil { | |
return 0, err | |
} | |
bilibiliApiURL := fmt.Sprintf("https://api.bilibili.com/x/web-interface/view?%s=%s", idType, videoID) | |
// ... (rest of the function) | |
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if !bilibiliURLRegex.MatchString(websiteURL) { return "", "", fmt.Errorf("not a valid Bilibili URL: %s", websiteURL) }
I think the URL has been checked by 'shouldFetchBilibiliWatchTime', so I removed it.
defer responseHandler.Close() | ||
|
||
if localizedError := responseHandler.LocalizedError(); localizedError != nil { | ||
slog.Warn("Unable to fetch Bilibili page", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error())) | ||
slog.Warn("Unable to fetch Bilibili page", slog.String("website_url", bilibiliApiURL), slog.Any("error", localizedError.Error())) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Many improvements can be made to handle the response.
slog.Warn("Unable to fetch Bilibili page", slog.String("website_url", bilibiliApiURL), slog.Any("error", localizedError.Error())) | |
slog.Warn("Unable to fetch Bilibili API", | |
slog.String("website_url", websiteURL), | |
slog.String("api_url", bilibiliApiURL), | |
slog.Any("error", localizedError.Error())) | |
return 0, localizedError.Error() | |
} | |
var result map[string]interface{} | |
doc := json.NewDecoder(responseHandler.Body(config.Opts.HTTPClientMaxBodySize())) | |
if docErr := doc.Decode(&result); docErr != nil { | |
return 0, fmt.Errorf("failed to decode API response: %v", docErr) | |
} | |
// Check API response status | |
if code, ok := result["code"].(float64); !ok || code != 0 { | |
return 0, fmt.Errorf("API returned error code: %v", result["code"]) | |
} | |
data, ok := result["data"].(map[string]interface{}) | |
if !ok { | |
return 0, fmt.Errorf("data field not found or not an object") | |
} | |
duration, ok := data["duration"].(float64) | |
if !ok { | |
return 0, fmt.Errorf("duration not found or not a number") | |
} | |
intDuration := int(duration) | |
durationMin := intDuration / 60 | |
if intDuration%60 != 0 { | |
durationMin++ | |
} | |
return durationMin, nil |
You can try using https://rsshub.app/bilibili/user/video/2267573, but since Bilibili does not provide an official RSS service, the public service implemented through RSSHub is prone to frequent blocking issues. I have resolved this by setting up my own RSSHub service and only allowing access to Miniflux to avoid such problems. If you need to test, I can open my RSSHub service for a short period, or you can also try setting up your own RSSHub service, which provides the possibility to subscribe to websites that do not have an official RSS service. |
I have tested the recent commit, and it works fine (DockerHub: qeynos/miniflux:test). |
…to avoid captchas.
Do you follow the guidelines?