diff --git a/packages/hub/README.md b/packages/hub/README.md index 0633f0627..625cd5e4e 100644 --- a/packages/hub/README.md +++ b/packages/hub/README.md @@ -93,6 +93,35 @@ for await (const fileInfo of hub.listFiles({repo})) { await hub.deleteRepo({ repo, accessToken: "hf_..." }); ``` +## CLI usage + +You can use `@huggingface/hub` in CLI mode to upload files and folders to your repo. + +```console +npx @huggingface/hub upload coyotte508/test-model . +npx @huggingface/hub upload datasets/coyotte508/test-dataset . +# Same thing +npx @huggingface/hub upload --repo-type dataset coyotte508/test-dataset . +# Upload new data with 0 history in a separate branch +npx @huggingface/hub create-branch coyotte508/test-model release --empty +npx @huggingface/hub upload coyotte508/test-model . --revision release + +npx @huggingface/hub --help +npx @huggingface/hub upload --help +``` + +You can also instal globally with `npm install -g @huggingface/hub`. Then you can do: + +```console +hfjs upload coyotte508/test-model . + +hfjs create-branch --repo-type dataset coyotte508/test-dataset release --empty +hfjs upload --repo-type dataset coyotte508/test-dataset . --revision release + +hfjs --help +hfjs upload --help +``` + ## OAuth Login It's possible to login using OAuth (["Sign in with HF"](https://huggingface.co/docs/hub/oauth)). diff --git a/packages/hub/cli.ts b/packages/hub/cli.ts new file mode 100644 index 000000000..00935645c --- /dev/null +++ b/packages/hub/cli.ts @@ -0,0 +1,375 @@ +#! /usr/bin/env node + +import { parseArgs } from "node:util"; +import { typedEntries } from "./src/utils/typedEntries"; +import { createBranch, uploadFilesWithProgress } from "./src"; +import { pathToFileURL } from "node:url"; +import { stat } from "node:fs/promises"; +import { basename, join } from "node:path"; +import { HUB_URL } from "./src/consts"; + +// Didn't find the import from "node:util", so duplicated it here +type OptionToken = + | { kind: "option"; index: number; name: string; rawName: string; value: string; inlineValue: boolean } + | { + kind: "option"; + index: number; + name: string; + rawName: string; + value: undefined; + inlineValue: undefined; + }; + +const command = process.argv[2]; +const args = process.argv.slice(3); + +type Camelize = T extends `${infer A}-${infer B}` ? `${A}${Camelize>}` : T; + +interface ArgDef { + name: string; + short?: string; + positional?: boolean; + description?: string; + required?: boolean; + boolean?: boolean; + enum?: Array; + default?: string | (() => string); +} + +const commands = { + upload: { + description: "Upload a folder to a repo on the Hub", + args: [ + { + name: "repo-name" as const, + description: "The name of the repo to upload to", + positional: true, + required: true, + }, + { + name: "local-folder" as const, + description: "The local folder to upload. Defaults to the current working directory", + positional: true, + default: () => process.cwd(), + }, + { + name: "path-in-repo" as const, + description: "The path in the repo to upload the folder to. Defaults to the root of the repo", + positional: true, + default: ".", + }, + { + name: "quiet" as const, + short: "q", + description: "Suppress all output", + boolean: true, + }, + { + name: "repo-type" as const, + enum: ["dataset", "model", "space"], + default: "model", + description: + "The type of repo to upload to. Defaults to model. You can also prefix the repo name with the type, e.g. datasets/username/repo-name", + }, + { + name: "revision" as const, + description: "The revision to upload to. Defaults to the main branch", + default: "main", + }, + { + name: "commit-message" as const, + description: "The commit message to use. Defaults to 'Add [x] files'", + }, + { + name: "token" as const, + description: + "The access token to use for authentication. If not provided, the HF_TOKEN environment variable will be used.", + default: process.env.HF_TOKEN, + }, + ], + }, + "create-branch": { + description: "Create a new branch in a repo, or update an existing one", + args: [ + { + name: "repo-name" as const, + description: "The name of the repo to create the branch in", + positional: true, + required: true, + }, + { + name: "branch" as const, + description: "The name of the branch to create", + positional: true, + required: true, + }, + { + name: "repo-type" as const, + enum: ["dataset", "model", "space"], + default: "model", + description: + "The type of repo to create. Defaults to model. You can also prefix the repo name with the type, e.g. datasets/username/repo-name", + }, + { + name: "revision" as const, + description: + "The revision to create the branch from. Defaults to the main branch, or existing branch if it exists.", + }, + { + name: "empty" as const, + boolean: true, + description: "Create an empty branch. This will erase all previous commits on the branch if it exists.", + }, + { + name: "force" as const, + short: "f", + boolean: true, + description: + "Overwrite the branch if it already exists. Otherwise, throws an error if the branch already exists. No-ops if no revision is provided and the branch exists.", + }, + { + name: "token" as const, + description: + "The access token to use for authentication. If not provided, the HF_TOKEN environment variable will be used.", + default: process.env.HF_TOKEN, + }, + ], + } as const, +} satisfies Record< + string, + { + description: string; + args?: ArgDef[]; + } +>; + +type Command = keyof typeof commands; + +async function run() { + switch (command) { + case undefined: + case "--help": + case "help": { + const positionals = parseArgs({ allowPositionals: true, args }).positionals; + + if (positionals.length > 0 && positionals[0] in commands) { + const commandName = positionals[0] as Command; + console.log(detailedUsage(commandName)); + break; + } + + console.log( + `Available commands\n\n` + + typedEntries(commands) + .map(([name, { description }]) => `- ${usage(name)}: ${description}`) + .join("\n") + ); + + console.log("\nTo get help on a specific command, run `hfjs help ` or `hfjs --help`"); + + if (command === undefined) { + process.exitCode = 1; + } + break; + } + + case "upload": { + if (args[0] === "--help" || args[0] === "-h") { + console.log(detailedUsage("upload")); + break; + } + const parsedArgs = advParseArgs(args, "upload"); + const { repoName, localFolder, repoType, revision, token, quiet, commitMessage, pathInRepo } = parsedArgs; + + const isFile = (await stat(localFolder)).isFile(); + const files = isFile + ? [ + { + content: pathToFileURL(localFolder), + path: join(pathInRepo, `${basename(localFolder)}`).replace(/^[.]?\//, ""), + }, + ] + : [{ content: pathToFileURL(localFolder), path: pathInRepo.replace(/^[.]?\//, "") }]; + + for await (const event of uploadFilesWithProgress({ + repo: repoType ? { type: repoType as "model" | "dataset" | "space", name: repoName } : repoName, + files, + branch: revision, + accessToken: token, + commitTitle: commitMessage?.trim().split("\n")[0], + commitDescription: commitMessage?.trim().split("\n").slice(1).join("\n").trim(), + hubUrl: process.env.HF_ENDPOINT ?? HUB_URL, + })) { + if (!quiet) { + console.log(event); + } + } + break; + } + case "create-branch": { + if (args[0] === "--help" || args[0] === "-h") { + console.log(detailedUsage("create-branch")); + break; + } + const parsedArgs = advParseArgs(args, "create-branch"); + const { repoName, branch, revision, empty, repoType, token, force } = parsedArgs; + + await createBranch({ + repo: repoType ? { type: repoType as "model" | "dataset" | "space", name: repoName } : repoName, + branch, + accessToken: token, + revision, + empty: empty ? true : undefined, + overwrite: force ? true : undefined, + hubUrl: process.env.HF_ENDPOINT ?? HUB_URL, + }); + break; + } + default: + throw new Error("Command not found: " + command); + } +} +run(); + +function usage(commandName: Command) { + const command = commands[commandName]; + + return `${commandName} ${((command.args as ArgDef[]) || []) + .map((arg) => { + if (arg.positional) { + if (arg.required) { + return `<${arg.name}>`; + } else { + return `[${arg.name}]`; + } + } + return `[--${arg.name}${ + arg.enum ? ` {${arg.enum.join(",")}}` : arg.boolean ? "" : " " + arg.name.toLocaleUpperCase() + }]`; + }) + .join(" ")}`.trim(); +} + +function detailedUsage(commandName: Command) { + let ret = `usage: ${usage(commandName)}\n\n`; + const command = commands[commandName]; + + if ((command.args as ArgDef[]).some((p) => p.positional)) { + ret += `Positional arguments:\n`; + + for (const arg of command.args as ArgDef[]) { + if (arg.positional) { + ret += ` ${arg.name}: ${arg.description}\n`; + } + } + + ret += `\n`; + } + + if ((command.args as ArgDef[]).some((p) => !p.positional)) { + ret += `Options:\n`; + + for (const arg of command.args as ArgDef[]) { + if (!arg.positional) { + ret += ` --${arg.name}${arg.short ? `, -${arg.short}` : ""}${ + arg.enum ? ` {${arg.enum.join(",")}}` : arg.boolean ? "" : " " + arg.name.toLocaleUpperCase() + }: ${arg.description}\n`; + } + } + + ret += `\n`; + } + + return ret; +} + +function advParseArgs( + args: string[], + commandName: C +): { + // Todo : better typing + [key in Camelize<(typeof commands)[C]["args"][number]["name"]>]: string; +} { + const { tokens } = parseArgs({ + options: Object.fromEntries( + (commands[commandName].args as ArgDef[]) + .filter((arg) => !arg.positional) + .map((arg) => { + const option = { + name: arg.name, + ...(arg.short && { short: arg.short }), + type: arg.boolean ? "boolean" : "string", + default: typeof arg.default === "function" ? arg.default() : arg.default, + } as const; + return [arg.name, option]; + }) + ), + args, + allowPositionals: true, + strict: false, + tokens: true, + }); + + const command = commands[commandName]; + const expectedPositionals = (command.args as ArgDef[]).filter((arg) => arg.positional); + const requiredPositionals = expectedPositionals.filter((arg) => arg.required).length; + const providedPositionals = tokens.filter((token) => token.kind === "positional").length; + + if (providedPositionals < requiredPositionals) { + throw new Error( + `Missing required positional arguments. Expected: ${requiredPositionals}, Provided: ${providedPositionals}` + ); + } + + if (providedPositionals > expectedPositionals.length) { + throw new Error( + `Too many positional arguments. Expected: ${expectedPositionals.length}, Provided: ${providedPositionals}` + ); + } + + const positionals = Object.fromEntries( + tokens + .filter((token): token is { kind: "positional"; index: number; value: string } => token.kind === "positional") + .map((token, i) => [expectedPositionals[i].name, token.value]) + ); + + const options = Object.fromEntries( + tokens + .filter((token): token is OptionToken => token.kind === "option") + .map((token) => { + const arg = (command.args as ArgDef[]).find((arg) => arg.name === token.name || arg.short === token.name); + if (!arg) { + throw new Error(`Unknown option: ${token.name}`); + } + + if (!arg.boolean) { + if (!token.value) { + throw new Error(`Missing value for option: ${token.name}: ${JSON.stringify(token)}`); + } + + if (arg.enum && !arg.enum.includes(token.value)) { + throw new Error(`Invalid value for option ${token.name}. Expected one of: ${arg.enum.join(", ")}`); + } + } + + return [arg.name, arg.boolean ? true : token.value]; + }) + ); + const defaults = Object.fromEntries( + (commands[commandName].args as ArgDef[]) + .filter((arg) => arg.default) + .map((arg) => { + const value = typeof arg.default === "function" ? arg.default() : arg.default; + return [arg.name, value]; + }) + ); + return Object.fromEntries( + Object.entries({ ...defaults, ...positionals, ...options }).map(([name, val]) => [kebabToCamelCase(name), val]) + ) as { + [key in Camelize<(typeof commands)[C]["args"][number]["name"]>]: string; + }; +} + +function kebabToCamelCase(str: string) { + return str.replace(/-./g, (match) => match[1].toUpperCase()); +} diff --git a/packages/hub/package.json b/packages/hub/package.json index 9f7fd3ca3..98248dc28 100644 --- a/packages/hub/package.json +++ b/packages/hub/package.json @@ -57,6 +57,9 @@ "hugging", "face" ], + "bin": { + "hfjs": "./dist/cli.js" + }, "author": "Hugging Face", "license": "MIT", "dependencies": { diff --git a/packages/hub/src/lib/create-branch.spec.ts b/packages/hub/src/lib/create-branch.spec.ts new file mode 100644 index 000000000..b616fb4ce --- /dev/null +++ b/packages/hub/src/lib/create-branch.spec.ts @@ -0,0 +1,159 @@ +import { assert, it, describe } from "vitest"; +import { TEST_ACCESS_TOKEN, TEST_HUB_URL, TEST_USER } from "../test/consts"; +import type { RepoId } from "../types/public"; +import { insecureRandomString } from "../utils/insecureRandomString"; +import { createRepo } from "./create-repo"; +import { deleteRepo } from "./delete-repo"; +import { createBranch } from "./create-branch"; +import { uploadFile } from "./upload-file"; +import { downloadFile } from "./download-file"; + +describe("createBranch", () => { + it("should create a new branch from the default branch", async () => { + const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`; + const repo = { type: "model", name: repoName } satisfies RepoId; + + try { + await createRepo({ + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + repo, + }); + + await uploadFile({ + repo, + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + file: { + path: "file.txt", + content: new Blob(["file content"]), + }, + }); + + await createBranch({ + repo, + branch: "new-branch", + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + }); + + const content = await downloadFile({ + repo, + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + path: "file.txt", + revision: "new-branch", + }); + + assert.equal(await content?.text(), "file content"); + } finally { + await deleteRepo({ + repo, + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + }); + } + }); + + it("should create an empty branch", async () => { + const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`; + const repo = { type: "model", name: repoName } satisfies RepoId; + + try { + await createRepo({ + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + repo, + }); + + await uploadFile({ + repo, + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + file: { + path: "file.txt", + content: new Blob(["file content"]), + }, + }); + + await createBranch({ + repo, + branch: "empty-branch", + empty: true, + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + }); + + const content = await downloadFile({ + repo, + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + path: "file.txt", + revision: "empty-branch", + }); + + assert.equal(content, null); + } finally { + await deleteRepo({ + repo, + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + }); + } + }); + + it("should overwrite an existing branch", async () => { + const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`; + const repo = { type: "model", name: repoName } satisfies RepoId; + + try { + await createRepo({ + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + repo, + }); + + await uploadFile({ + repo, + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + file: { + path: "file.txt", + content: new Blob(["file content"]), + }, + }); + + await createBranch({ + repo, + branch: "overwrite-branch", + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + }); + + await createBranch({ + repo, + branch: "overwrite-branch", + overwrite: true, + empty: true, + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + }); + + const content = await downloadFile({ + repo, + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + path: "file.txt", + revision: "overwrite-branch", + }); + + assert.equal(content, null); + } finally { + await deleteRepo({ + repo, + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + }); + } + }); +}); diff --git a/packages/hub/src/lib/create-branch.ts b/packages/hub/src/lib/create-branch.ts new file mode 100644 index 000000000..100e4d1b9 --- /dev/null +++ b/packages/hub/src/lib/create-branch.ts @@ -0,0 +1,54 @@ +import { HUB_URL } from "../consts"; +import { createApiError } from "../error"; +import type { AccessToken, RepoDesignation } from "../types/public"; +import { toRepoId } from "../utils/toRepoId"; + +export async function createBranch(params: { + repo: RepoDesignation; + /** + * Revision to create the branch from. Defaults to the default branch. + * + * Use empty: true to create an empty branch. + */ + revision?: string; + hubUrl?: string; + accessToken?: AccessToken; + fetch?: typeof fetch; + /** + * The name of the branch to create + */ + branch: string; + /** + * Use this to create an empty branch, with no commits. + */ + empty?: boolean; + /** + * Use this to overwrite the branch if it already exists. + * + * If you only specify `overwrite` and no `revision`/`empty`, and the branch already exists, it will be a no-op. + */ + overwrite?: boolean; +}): Promise { + const repoId = toRepoId(params.repo); + const res = await (params.fetch ?? fetch)( + `${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/branch/${encodeURIComponent(params.branch)}`, + { + method: "POST", + headers: { + "Content-Type": "application/json", + ...(params.accessToken && { + Authorization: `Bearer ${params.accessToken}`, + }), + }, + body: JSON.stringify({ + startingPoint: params.revision, + ...(params.empty && { emptyBranch: true }), + overwrite: params.overwrite, + }), + } + ); + + if (!res.ok) { + throw await createApiError(res); + } +} diff --git a/packages/hub/src/lib/delete-branch.spec.ts b/packages/hub/src/lib/delete-branch.spec.ts new file mode 100644 index 000000000..dcd253214 --- /dev/null +++ b/packages/hub/src/lib/delete-branch.spec.ts @@ -0,0 +1,43 @@ +import { it, describe } from "vitest"; +import { TEST_ACCESS_TOKEN, TEST_HUB_URL, TEST_USER } from "../test/consts"; +import type { RepoId } from "../types/public"; +import { insecureRandomString } from "../utils/insecureRandomString"; +import { createRepo } from "./create-repo"; +import { deleteRepo } from "./delete-repo"; +import { createBranch } from "./create-branch"; +import { deleteBranch } from "./delete-branch"; + +describe("deleteBranch", () => { + it("should delete an existing branch", async () => { + const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`; + const repo = { type: "model", name: repoName } satisfies RepoId; + + try { + await createRepo({ + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + repo, + }); + + await createBranch({ + repo, + branch: "branch-to-delete", + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + }); + + await deleteBranch({ + repo, + branch: "branch-to-delete", + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + }); + } finally { + await deleteRepo({ + repo, + accessToken: TEST_ACCESS_TOKEN, + hubUrl: TEST_HUB_URL, + }); + } + }); +}); diff --git a/packages/hub/src/lib/delete-branch.ts b/packages/hub/src/lib/delete-branch.ts new file mode 100644 index 000000000..70227b185 --- /dev/null +++ b/packages/hub/src/lib/delete-branch.ts @@ -0,0 +1,32 @@ +import { HUB_URL } from "../consts"; +import { createApiError } from "../error"; +import type { AccessToken, RepoDesignation } from "../types/public"; +import { toRepoId } from "../utils/toRepoId"; + +export async function deleteBranch(params: { + repo: RepoDesignation; + /** + * The name of the branch to delete + */ + branch: string; + hubUrl?: string; + accessToken?: AccessToken; + fetch?: typeof fetch; +}): Promise { + const repoId = toRepoId(params.repo); + const res = await (params.fetch ?? fetch)( + `${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/branch/${encodeURIComponent(params.branch)}`, + { + method: "DELETE", + headers: { + ...(params.accessToken && { + Authorization: `Bearer ${params.accessToken}`, + }), + }, + } + ); + + if (!res.ok) { + throw await createApiError(res); + } +} diff --git a/packages/hub/src/lib/index.ts b/packages/hub/src/lib/index.ts index 24e239bdc..d4b771f2b 100644 --- a/packages/hub/src/lib/index.ts +++ b/packages/hub/src/lib/index.ts @@ -3,7 +3,9 @@ export * from "./check-repo-access"; export * from "./commit"; export * from "./count-commits"; export * from "./create-repo"; +export * from "./create-branch"; export * from "./dataset-info"; +export * from "./delete-branch"; export * from "./delete-file"; export * from "./delete-files"; export * from "./delete-repo"; diff --git a/packages/hub/src/utils/createBlobs.ts b/packages/hub/src/utils/createBlobs.ts index 1a261c4c4..625bef5fd 100644 --- a/packages/hub/src/utils/createBlobs.ts +++ b/packages/hub/src/utils/createBlobs.ts @@ -38,7 +38,10 @@ export async function createBlobs( return Promise.all( paths.map(async (path) => ({ - path: `${destPath}/${path.relativePath}`.replace(/\/[.]$/, "").replaceAll("//", "/"), + path: `${destPath}/${path.relativePath}` + .replace(/\/[.]$/, "") + .replaceAll("//", "/") + .replace(/^[.]?\//, ""), blob: await FileBlob.create(new URL(path.path)), })) ); diff --git a/packages/hub/tsconfig.json b/packages/hub/tsconfig.json index 254606a30..9dd335c6b 100644 --- a/packages/hub/tsconfig.json +++ b/packages/hub/tsconfig.json @@ -15,6 +15,6 @@ "declaration": true, "declarationMap": true }, - "include": ["src", "index.ts"], + "include": ["src", "index.ts", "cli.ts"], "exclude": ["dist"] } diff --git a/packages/hub/tsup.config.ts b/packages/hub/tsup.config.ts index 6be4e128a..adbb9fdfb 100644 --- a/packages/hub/tsup.config.ts +++ b/packages/hub/tsup.config.ts @@ -1,14 +1,15 @@ import type { Options } from "tsup"; -const baseConfig: Options = { +const baseConfig = { entry: ["./index.ts"], format: ["cjs", "esm"], outDir: "dist", clean: true, -}; +} satisfies Options; const nodeConfig: Options = { ...baseConfig, + entry: [...baseConfig.entry, "./cli.ts"], platform: "node", };