From b69d671417349882085fb73796a7732d72ee1ee9 Mon Sep 17 00:00:00 2001 From: micheal-death <2664779@qq.com> Date: Thu, 7 Dec 2023 18:36:44 +0800 Subject: [PATCH 1/6] fix(route): picnob --- lib/v2/picnob/maintainer.js | 2 +- lib/v2/picnob/templates/desc.art | 6 +-- lib/v2/picnob/user.js | 76 ++++++++++++++++++++------------ lib/v2/picnob/utils.js | 13 ++++++ 4 files changed, 65 insertions(+), 32 deletions(-) create mode 100644 lib/v2/picnob/utils.js diff --git a/lib/v2/picnob/maintainer.js b/lib/v2/picnob/maintainer.js index 625e52ce659052..3962b619816ae7 100644 --- a/lib/v2/picnob/maintainer.js +++ b/lib/v2/picnob/maintainer.js @@ -1,3 +1,3 @@ module.exports = { - '/user/:id': ['TonyRL'], + '/user/:id': ['TonyRL', 'micheal-death'], }; diff --git a/lib/v2/picnob/templates/desc.art b/lib/v2/picnob/templates/desc.art index a380de17a9279e..9140d9f63fe728 100644 --- a/lib/v2/picnob/templates/desc.art +++ b/lib/v2/picnob/templates/desc.art @@ -3,11 +3,11 @@ {{ else if item.type === 'img_multi' }} - {{ each images i }} - + {{ each item.images i }} + {{ /each }} {{ else if item.type === 'img_sig' }} - + {{ /if }}
{{@ item.sum }} diff --git a/lib/v2/picnob/user.js b/lib/v2/picnob/user.js index 6c35449db2e1fc..e7a34964e72462 100644 --- a/lib/v2/picnob/user.js +++ b/lib/v2/picnob/user.js @@ -1,34 +1,46 @@ -const got = require('@/utils/got'); const cheerio = require('cheerio'); -const { parseDate } = require('@/utils/parse-date'); +// const { parseRelativeDate } = require('@/utils/parse-date'); const { art } = require('@/utils/render'); const path = require('path'); +const { puppeteerGet } = require('./utils'); module.exports = async (ctx) => { const baseUrl = 'https://www.picnob.com'; const { id } = ctx.params; const url = `${baseUrl}/profile/${id}/`; - const { data: response } = await got(url); - const $ = cheerio.load(response); + const data = await puppeteerGet(url); + const $ = cheerio.load(data); const profileName = $('h1.fullname').text(); - const userId = $('input[name=userid]').attr('value'); - const { data } = await got(`${baseUrl}/api/posts`, { - searchParams: { - userid: userId, - }, - }); + const posts = $('.posts .items .item') + .toArray() + .map((item) => { + const link = $(item).find('.cover_link').attr('href'); + const icon = $(item).find('.corner .icon'); + let type = 'img_sig'; + if (icon.hasClass('icon_video')) { + type = 'video'; + } + if (icon.hasClass('icon_multi')) { + type = 'img_multi'; + } + return { + link, + type, + }; + }); - const list = data.posts.items.map(async (item) => { - const { shortcode, type } = item; - const link = `${baseUrl}/post/${shortcode}/`; - let images = []; - if (type === 'img_multi') { - images = await ctx.cache.tryGet(link, async () => { - const { data } = await got(link); - const $ = cheerio.load(data); - return [ + const list = await Promise.all( + posts.slice(0, 10).map(async (item) => { + const link = `${baseUrl}${item.link}`; + const data = await ctx.cache.tryGet(link, async () => await puppeteerGet(link)); + const $ = cheerio.load(data); + item.sum = $('.sum_full').text(); + item.time = $('.time').find('.txt').text(); + + if (item.type === 'img_multi') { + item.images = [ ...new Set( $('.post_slide a') .toArray() @@ -41,15 +53,23 @@ module.exports = async (ctx) => { }) ), ]; - }); - } - return { - title: item.sum_pure, - description: art(path.join(__dirname, 'templates/desc.art'), { item, images }), - link, - pubDate: parseDate(item.time, 'X'), - }; - }); + } + if (item.type === 'video') { + item.pic = $('.video_img').find('img').attr('data-src'); + item.video = $('.downbtn').attr('href'); + } + if (item.type === 'img_sig') { + item.pic = $('.pic').find('img').attr('data-src'); + } + + return { + title: item.sum, + description: art(path.join(__dirname, 'templates/desc.art'), { item }), + link, + // pubDate: parseRelativeDate(item.time), + }; + }) + ); ctx.state.data = { title: `${profileName} (@${id}) - Picnob`, diff --git a/lib/v2/picnob/utils.js b/lib/v2/picnob/utils.js new file mode 100644 index 00000000000000..de72869c89a595 --- /dev/null +++ b/lib/v2/picnob/utils.js @@ -0,0 +1,13 @@ +const puppeteerGet = async (url) => { + const browser = await require('@/utils/puppeteer')(); + const page = await browser.newPage(); + await page.goto(url); + const html = await page.evaluate(() => document.documentElement.innerHTML); + await page.close(); + await browser.close(); + return html; +}; + +module.exports = { + puppeteerGet, +}; From 1ee7b2c8b058784c3b8c95e697499625e9f1f920 Mon Sep 17 00:00:00 2001 From: micheal-death <2664779@qq.com> Date: Fri, 8 Dec 2023 11:59:44 +0800 Subject: [PATCH 2/6] fix(route): picnob. Use one browser session to do all http requests. --- lib/v2/picnob/user.js | 6 ++++-- lib/v2/picnob/utils.js | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/v2/picnob/user.js b/lib/v2/picnob/user.js index e7a34964e72462..8bd79a6c55c07f 100644 --- a/lib/v2/picnob/user.js +++ b/lib/v2/picnob/user.js @@ -9,7 +9,8 @@ module.exports = async (ctx) => { const { id } = ctx.params; const url = `${baseUrl}/profile/${id}/`; - const data = await puppeteerGet(url); + const browser = await require('@/utils/puppeteer')(); + const data = await puppeteerGet(url, browser); const $ = cheerio.load(data); const profileName = $('h1.fullname').text(); @@ -34,7 +35,7 @@ module.exports = async (ctx) => { const list = await Promise.all( posts.slice(0, 10).map(async (item) => { const link = `${baseUrl}${item.link}`; - const data = await ctx.cache.tryGet(link, async () => await puppeteerGet(link)); + const data = await ctx.cache.tryGet(link, async () => await puppeteerGet(link, browser)); const $ = cheerio.load(data); item.sum = $('.sum_full').text(); item.time = $('.time').find('.txt').text(); @@ -70,6 +71,7 @@ module.exports = async (ctx) => { }; }) ); + await browser.close(); ctx.state.data = { title: `${profileName} (@${id}) - Picnob`, diff --git a/lib/v2/picnob/utils.js b/lib/v2/picnob/utils.js index de72869c89a595..c61c3d65071b0a 100644 --- a/lib/v2/picnob/utils.js +++ b/lib/v2/picnob/utils.js @@ -1,10 +1,10 @@ -const puppeteerGet = async (url) => { - const browser = await require('@/utils/puppeteer')(); +const puppeteerGet = async (url, browser) => { const page = await browser.newPage(); - await page.goto(url); + await page.goto(url, { + waitUntil: 'domcontentloaded', + }); const html = await page.evaluate(() => document.documentElement.innerHTML); await page.close(); - await browser.close(); return html; }; From fd91e7d1065074e7ce5a68f8d6589a7aba563888 Mon Sep 17 00:00:00 2001 From: micheal-death <2664779@qq.com> Date: Mon, 11 Dec 2023 17:44:44 +0800 Subject: [PATCH 3/6] fix(route): picnob. Use puppeteer as a fallback option when a normal request returns a 403 error. --- lib/v2/picnob/user.js | 114 ++++++++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 49 deletions(-) diff --git a/lib/v2/picnob/user.js b/lib/v2/picnob/user.js index 8bd79a6c55c07f..ea90d5094114b4 100644 --- a/lib/v2/picnob/user.js +++ b/lib/v2/picnob/user.js @@ -1,5 +1,6 @@ +const got = require('@/utils/got'); const cheerio = require('cheerio'); -// const { parseRelativeDate } = require('@/utils/parse-date'); +const { parseDate } = require('@/utils/parse-date'); const { art } = require('@/utils/render'); const path = require('path'); const { puppeteerGet } = require('./utils'); @@ -10,64 +11,79 @@ module.exports = async (ctx) => { const url = `${baseUrl}/profile/${id}/`; const browser = await require('@/utils/puppeteer')(); - const data = await puppeteerGet(url, browser); - const $ = cheerio.load(data); + // TODO: can't bypass cloudflare 403 error without puppeteer + let html; + let usePuppeteer = false; + try { + const { data } = await got(url, { + headers: { + accept: 'text/html', + referer: 'https://www.google.com/', + }, + }); + html = data.response; + } catch (e) { + if (e.message.includes('code 403')) { + html = await puppeteerGet(url, browser); + usePuppeteer = true; + } + } + const $ = cheerio.load(html); const profileName = $('h1.fullname').text(); + const userId = $('input[name=userid]').attr('value'); - const posts = $('.posts .items .item') - .toArray() - .map((item) => { - const link = $(item).find('.cover_link').attr('href'); - const icon = $(item).find('.corner .icon'); - let type = 'img_sig'; - if (icon.hasClass('icon_video')) { - type = 'video'; - } - if (icon.hasClass('icon_multi')) { - type = 'img_multi'; - } - return { - link, - type, - }; + let posts; + if (!usePuppeteer) { + const { data } = await got(`${baseUrl}/api/posts`, { + headers: { + accept: 'application/json', + }, + searchParams: { + userid: userId, + }, }); + posts = data.posts; + } else { + const html = await puppeteerGet(`${baseUrl}/api/posts?userid=${userId}`, browser); + const data = JSON.parse(html.replace('', '').replace('', '')); + posts = data.posts; + } const list = await Promise.all( - posts.slice(0, 10).map(async (item) => { - const link = `${baseUrl}${item.link}`; - const data = await ctx.cache.tryGet(link, async () => await puppeteerGet(link, browser)); - const $ = cheerio.load(data); - item.sum = $('.sum_full').text(); - item.time = $('.time').find('.txt').text(); - - if (item.type === 'img_multi') { - item.images = [ - ...new Set( - $('.post_slide a') - .toArray() - .map((a) => { - a = $(a); - return { - ori: a.attr('href'), - url: a.find('img').attr('data-src'), - }; - }) - ), - ]; - } - if (item.type === 'video') { - item.pic = $('.video_img').find('img').attr('data-src'); - item.video = $('.downbtn').attr('href'); - } - if (item.type === 'img_sig') { - item.pic = $('.pic').find('img').attr('data-src'); + posts.items.map(async (item) => { + const { shortcode, type } = item; + const link = `${baseUrl}/post/${shortcode}/`; + if (type === 'img_multi') { + item.images = await ctx.cache.tryGet(link, async () => { + let html; + if (!usePuppeteer) { + const { data } = await got(link); + html = data; + } else { + html = await puppeteerGet(link, browser); + } + const $ = cheerio.load(html); + return [ + ...new Set( + $('.post_slide a') + .toArray() + .map((a) => { + a = $(a); + return { + ori: a.attr('href'), + url: a.find('img').attr('data-src'), + }; + }) + ), + ]; + }); } return { - title: item.sum, + title: item.sum_pure, description: art(path.join(__dirname, 'templates/desc.art'), { item }), link, - // pubDate: parseRelativeDate(item.time), + pubDate: parseDate(item.time, 'X'), }; }) ); From e4d3f4da772071a4f45978d069993b8e31edab35 Mon Sep 17 00:00:00 2001 From: micheal-death <2664779@qq.com> Date: Tue, 12 Dec 2023 11:32:51 +0800 Subject: [PATCH 4/6] fix(route): picnob. Block unnecessary requests when using puppeteer. --- lib/v2/picnob/utils.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/v2/picnob/utils.js b/lib/v2/picnob/utils.js index c61c3d65071b0a..2c7dc7a21628b7 100644 --- a/lib/v2/picnob/utils.js +++ b/lib/v2/picnob/utils.js @@ -1,5 +1,9 @@ const puppeteerGet = async (url, browser) => { const page = await browser.newPage(); + await page.setRequestInterception(true); + page.on('request', (request) => { + request.resourceType() === 'document' ? request.continue() : request.abort(); + }); await page.goto(url, { waitUntil: 'domcontentloaded', }); From 22044164babd1aac72e40f501e231297688c7d6a Mon Sep 17 00:00:00 2001 From: micheal-death <2664779@qq.com> Date: Wed, 13 Dec 2023 16:32:50 +0800 Subject: [PATCH 5/6] fix(route): picnob. Adaptation of JSON responses when using puppeteer for http requests. --- lib/v2/picnob/user.js | 3 +-- lib/v2/picnob/utils.js | 11 +++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/v2/picnob/user.js b/lib/v2/picnob/user.js index ea90d5094114b4..fd4717f0d7a580 100644 --- a/lib/v2/picnob/user.js +++ b/lib/v2/picnob/user.js @@ -44,8 +44,7 @@ module.exports = async (ctx) => { }); posts = data.posts; } else { - const html = await puppeteerGet(`${baseUrl}/api/posts?userid=${userId}`, browser); - const data = JSON.parse(html.replace('', '').replace('', '')); + const data = await puppeteerGet(`${baseUrl}/api/posts?userid=${userId}`, browser); posts = data.posts; } diff --git a/lib/v2/picnob/utils.js b/lib/v2/picnob/utils.js index 2c7dc7a21628b7..2bf7f6d586c884 100644 --- a/lib/v2/picnob/utils.js +++ b/lib/v2/picnob/utils.js @@ -1,15 +1,22 @@ const puppeteerGet = async (url, browser) => { + let data; const page = await browser.newPage(); await page.setRequestInterception(true); page.on('request', (request) => { request.resourceType() === 'document' ? request.continue() : request.abort(); }); + page.on('response', async (response) => { + if (response.request().url().includes('/api/posts')) { + data = await response.json(); + } else { + data = await response.text(); + } + }); await page.goto(url, { waitUntil: 'domcontentloaded', }); - const html = await page.evaluate(() => document.documentElement.innerHTML); await page.close(); - return html; + return data; }; module.exports = { From fed625cb55a406635df60194662fc31bbf9fb8df Mon Sep 17 00:00:00 2001 From: MichealDeath <2664779@qq.com> Date: Wed, 13 Dec 2023 23:45:34 +0800 Subject: [PATCH 6/6] Update lib/v2/picnob/user.js Co-authored-by: Tony --- lib/v2/picnob/user.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/v2/picnob/user.js b/lib/v2/picnob/user.js index fd4717f0d7a580..6546809000f188 100644 --- a/lib/v2/picnob/user.js +++ b/lib/v2/picnob/user.js @@ -21,7 +21,7 @@ module.exports = async (ctx) => { referer: 'https://www.google.com/', }, }); - html = data.response; + html = data; } catch (e) { if (e.message.includes('code 403')) { html = await puppeteerGet(url, browser);