From ee6b64a1acf964854064c5d5428a98acba3bf119 Mon Sep 17 00:00:00 2001 From: linxin Date: Sun, 20 Oct 2024 20:38:31 +0800 Subject: [PATCH] feat: add hk_sfc workflow --- .github/workflows/hk_sfc.yml | 31 +++++++++++++++++++++++++++++ src/fs.ts | 3 +-- src/sites/hk_sfc/entry.ts | 38 +++++++++++++++++++++++++----------- src/sites/hk_sfc/index.ts | 27 ++++++++++++++++++++++++- src/sites/hk_sfc/list.ts | 6 +++--- 5 files changed, 88 insertions(+), 17 deletions(-) create mode 100644 .github/workflows/hk_sfc.yml diff --git a/.github/workflows/hk_sfc.yml b/.github/workflows/hk_sfc.yml new file mode 100644 index 0000000..06066f0 --- /dev/null +++ b/.github/workflows/hk_sfc.yml @@ -0,0 +1,31 @@ +name: Update SFC Companies List And Detail +on: + workflow_dispatch: + inputs: + ids: + description: "update SFC List with ids split by comma, e.g. 1,2,3" + required: false + repository_dispatch: + types: + - update_sfc_list + +jobs: + export: + name: Start update SFC Companies list + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + branch: main + + - uses: oven-sh/setup-bun@v1 + + - name: Run update SFC Companies list and detail + env: + SUPABASE_KEY: ${{ secrets.SUPABASE_KEY }} + SUPABASE_PASSWORD: ${{ secrets.SUPABASE_PASSWORD }} + SUPABASE_EMAIL: ${{ secrets.SUPABASE_EMAIL }} + run: | + bun install + bun run src/sites/hk_sfc/index.ts --ids=${{ inputs.ids }} diff --git a/src/fs.ts b/src/fs.ts index 56cbeb9..9c02e78 100644 --- a/src/fs.ts +++ b/src/fs.ts @@ -21,7 +21,7 @@ async function writeFileWithDirs(filePath: string, data: any) { export const writeFile = (data: any, filePath = "data") => { writeFileWithDirs(filePath, JSON.stringify(data, null, 2)) .then(() => { - console.log("File has been saved!"); + // console.log("File has been saved!"); }) .catch((err) => { console.error("Error writing file:", err); @@ -29,7 +29,6 @@ export const writeFile = (data: any, filePath = "data") => { }; export const readJsonFile = async (filePath: string, default_info = {}) => { - const fullPath = path.join(__dirname, filePath); let json = default_info; try { diff --git a/src/sites/hk_sfc/entry.ts b/src/sites/hk_sfc/entry.ts index c58af87..50041a1 100644 --- a/src/sites/hk_sfc/entry.ts +++ b/src/sites/hk_sfc/entry.ts @@ -11,9 +11,9 @@ import { export interface PARTIAL { ceref: string; - isCorp: boolean; - isRi: boolean; - isEo: boolean; + isCorp?: boolean; + isRi?: boolean; + isEo?: boolean; } export class HK_SFC { @@ -21,18 +21,25 @@ export class HK_SFC { public async check_list() { // 检查接口列表 + const start_time = new Date(); this.data_list = await getList(); + const end_time = new Date(); + console.log( + "🚀 ~ HK_SFC ~ check_list ~ duration:", + end_time.getTime() - start_time.getTime() + ); const previous_total_counts = await getPreviousListCount(); if (previous_total_counts !== this.data_list.length) { // 前后两次数量不一致就记录 const diff_list = await getDiffList(this.data_list); + if (diff_list.length > 0) { + this.insert_meta(diff_list); + this.insert_history(diff_list); - this.insert_meta(this.data_list); - this.insert_history(diff_list); - - // 增量更新 - this.get_full_detail_from_file(diff_list); + // 增量更新 + this.get_full_detail_from_file(diff_list); + } } } @@ -43,7 +50,15 @@ export class HK_SFC { * 生成完后会记录到 backup/hk_sfc/xxx 目录下 */ public async get_detail_from_page(data_list: any[], batch_size = 3) { + const start_time = new Date(); await processInBatches(data_list, batch_size); + const end_time = new Date(); + console.log( + "🚀 ~ HK_SFC ~ get_detail_from_page ~ duration: ", + end_time.getTime() - start_time.getTime(), + " list_length:", + data_list.length + ); } public async get_full_detail_from_file( @@ -62,9 +77,10 @@ export class HK_SFC { } } - public async update_by_partial(partial: PARTIAL[]) { - const partial_list = await get_partial_list(partial); - await processInBatches(partial_list, 3); + public async update_by_partial(partial_ids: string) { + const partial_list = await get_partial_list(partial_ids); + + await this.get_detail_from_page(partial_list, 3); await this.get_full_detail_from_file(partial_list, true); } diff --git a/src/sites/hk_sfc/index.ts b/src/sites/hk_sfc/index.ts index e1ed285..677aeb0 100644 --- a/src/sites/hk_sfc/index.ts +++ b/src/sites/hk_sfc/index.ts @@ -1,5 +1,30 @@ +import yargs from "yargs"; +import { hideBin } from "yargs/helpers"; import { HK_SFC } from "./entry"; +import { uniq } from "lodash-es"; const hk_sfc = new HK_SFC(); -hk_sfc.check_list(); +// 使用 yargs 解析命令行参数 +const argv = yargs(hideBin(process.argv)) + .option("ids", { + alias: "d", + type: "string", + describe: "JSON formatted data", + coerce: (arg) => { + try { + return arg.split(",").filter(Boolean); + } catch (e) { + return []; + } + }, + }) + .help().argv; +// @ts-ignore +const init_ids = argv.ids || []; + +if (init_ids.length > 0) { + hk_sfc.update_by_partial(uniq(init_ids).join(",")); +} else { + hk_sfc.check_list(); +} diff --git a/src/sites/hk_sfc/list.ts b/src/sites/hk_sfc/list.ts index c215c5d..67c19af 100644 --- a/src/sites/hk_sfc/list.ts +++ b/src/sites/hk_sfc/list.ts @@ -294,14 +294,14 @@ function filter_u0000(str: string) { return str ? str.replace(/\u0000/g, "") : ""; } -export const get_partial_list = async (partial: PARTIAL[]) => { +export const get_partial_list = async (partial_ids: string) => { let data_list: any[] = []; - for (const item of partial) { + for (const item of partial_ids.split(",")) { const body = getBody({ licstatus: "all", lictype: "all", searchbyoption: "byceref", - searchtext: get(item, "ceref"), + searchtext: item, page: 1, start: 0, limit: 20,