diff --git a/binderhub/config.py b/binderhub/config.py index 3fd655839..df152c88d 100644 --- a/binderhub/config.py +++ b/binderhub/config.py @@ -10,6 +10,12 @@ def generate_config(self): "repo_providers" ].items(): config[repo_provider_class_alias] = repo_provider_class.labels + config[repo_provider_class_alias][ + "display_name" + ] = repo_provider_class.display_name + config[repo_provider_class_alias][ + "regex_detect" + ] = repo_provider_class.regex_detect return config async def get(self): diff --git a/binderhub/repoproviders.py b/binderhub/repoproviders.py index 38fb05de1..c5367e18c 100644 --- a/binderhub/repoproviders.py +++ b/binderhub/repoproviders.py @@ -99,6 +99,10 @@ class RepoProvider(LoggingConfigurable): config=True, ) + # Not a traitlet because the class property is serialised in + # config.ConfigHandler.generate_config() + regex_detect = None + unresolved_ref = Unicode() git_credentials = Unicode( @@ -192,6 +196,15 @@ def is_valid_sha1(sha1): class FakeProvider(RepoProvider): """Fake provider for local testing of the UI""" + name = Unicode("Fake") + + display_name = "Fake GitHub" + + regex_detect = [ + r"^https://github\.com/(?[^/]+/[^/]+)(/blob/(?[^/]+)(/(?.+))?)?$", + r"^https://github\.com/(?[^/]+/[^/]+)(/tree/(?[^/]+)(/(?.+))?)?$", + ] + labels = { "text": "Fake Provider", "tag_text": "Fake Ref", @@ -627,6 +640,13 @@ def _default_git_credentials(self): return rf"username=binderhub\npassword={self.private_token}" return "" + # Gitlab repos can be nested under projects + _regex_detect_base = r"^https://gitlab\.com/(?[^/]+/[^/]+(/[^/-][^/]+)*)" + regex_detect = [ + _regex_detect_base + r"(/-/blob/(?[^/]+)(/(?.+))?)?$", + _regex_detect_base + r"(/-/tree/(?[^/]+)(/(?.+))?)?$", + ] + labels = { "text": "GitLab.com repository or URL", "tag_text": "Git ref (branch, tag, or commit)", @@ -780,6 +800,11 @@ def _default_git_credentials(self): return rf"username={self.access_token}\npassword=x-oauth-basic" return "" + regex_detect = [ + r"^https://github\.com/(?[^/]+/[^/]+)(/blob/(?[^/]+)(/(?.+))?)?$", + r"^https://github\.com/(?[^/]+/[^/]+)(/tree/(?[^/]+)(/(?.+))?)?$", + ] + labels = { "text": "GitHub repository name or URL", "tag_text": "Git ref (branch, tag, or commit)", @@ -973,6 +998,10 @@ class GistRepoProvider(GitHubRepoProvider): help="Flag for allowing usages of secret Gists. The default behavior is to disallow secret gists.", ) + regex_detect = [ + r"^https://gist\.github\.com/(?[^/]+/[^/]+)(/(?[^/]+))?$" + ] + labels = { "text": "Gist ID (username/gistId) or URL", "tag_text": "Git commit SHA", diff --git a/binderhub/static/js/index.js b/binderhub/static/js/index.js index 536a4e278..629b48021 100644 --- a/binderhub/static/js/index.js +++ b/binderhub/static/js/index.js @@ -19,7 +19,7 @@ import "../index.css"; import { setUpLog } from "./src/log"; import { updateUrls } from "./src/urls"; import { getBuildFormValues } from "./src/form"; -import { updateRepoText } from "./src/repo"; +import { detectPastedRepo, updateRepoText } from "./src/repo"; /** * @type {URL} @@ -166,7 +166,19 @@ function indexMain() { updatePathText(); updateRepoText(BASE_URL); - $("#repository").on("keyup paste change", function () { + // If the user pastes a URL into the repository field try to autodetect + // In all other cases don't do anything to avoid overwriting the user's input + // We need to wait for the paste to complete before we can read the input field + // https://stackoverflow.com/questions/10972954/javascript-onpaste/10972973#10972973 + $("#repository").on("paste", () => { + setTimeout(() => { + detectPastedRepo(BASE_URL).then(() => { + updateUrls(BADGE_BASE_URL); + }); + }, 0); + }); + + $("#repository").on("keyup change", function () { updateUrls(BADGE_BASE_URL); }); diff --git a/binderhub/static/js/src/repo.js b/binderhub/static/js/src/repo.js index e848b49ed..d0ce58499 100644 --- a/binderhub/static/js/src/repo.js +++ b/binderhub/static/js/src/repo.js @@ -1,9 +1,10 @@ +import { detect, getRepoProviders } from "@jupyterhub/binderhub-client"; +import { updatePathText } from "./path"; + /** - * Dict holding cached values of API request to _config endpoint + * @param {Object} configDict Dict holding cached values of API request to _config endpoint */ -let configDict = {}; - -function setLabels() { +function setLabels(configDict) { const provider = $("#provider_prefix").val(); const text = configDict[provider]["text"]; const tagText = configDict[provider]["tag_text"]; @@ -23,15 +24,42 @@ function setLabels() { * @param {URL} baseUrl Base URL to use for constructing path to _config endpoint */ export function updateRepoText(baseUrl) { - if (Object.keys(configDict).length === 0) { - const configUrl = new URL("_config", baseUrl); - fetch(configUrl).then((resp) => { - resp.json().then((data) => { - configDict = data; - setLabels(); - }); - }); - } else { - setLabels(); + getRepoProviders(baseUrl).then(setLabels); +} + +/** + * Attempt to fill in all fields by parsing a pasted repository URL + * + * @param {URL} baseUrl Base URL to use for constructing path to _config endpoint + */ +export async function detectPastedRepo(baseUrl) { + const repoField = $("#repository").val().trim(); + const fields = await detect(baseUrl, repoField); + // Special case: The BinderHub UI supports https://git{hub,lab}.com/ in the + // repository (it's stripped out later in the UI). + // To keep the UI consistent insert it back if it was originally included. + console.log(fields); + if (fields) { + let repo = fields.repository; + if (repoField.startsWith("https://github.com/")) { + repo = "https://github.com/" + repo; + } + if (repoField.startsWith("https://gitlab.com/")) { + repo = "https://gitlab.com/" + repo; + } + $("#provider_prefix-selected").text(fields.providerName); + $("#provider_prefix").val(fields.providerPrefix); + $("#repository").val(repo); + if (fields.ref) { + $("#ref").val(fields.ref); + } + if (fields.path) { + $("#filepath").val(fields.path); + $("#url-or-file-selected").text( + fields.pathType === "filepath" ? "File" : "URL", + ); + } + updatePathText(); + updateRepoText(baseUrl); } } diff --git a/binderhub/tests/test_repoproviders.py b/binderhub/tests/test_repoproviders.py index df5f63e0e..b0f111f16 100644 --- a/binderhub/tests/test_repoproviders.py +++ b/binderhub/tests/test_repoproviders.py @@ -539,3 +539,114 @@ def test_gist_secret(): provider = GistRepoProvider(spec=spec, allow_secret_gist=True) assert IOLoop().run_sync(provider.get_resolved_ref) is not None + + +@pytest.mark.parametrize( + "provider,url,groupdict", + [ + ( + GitHubRepoProvider, + "https://github.com/binder-examples/conda", + {"repo": "binder-examples/conda", "filepath": None, "ref": None}, + ), + ( + GitHubRepoProvider, + "https://github.com/binder-examples/conda/blob/main/index.ipynb", + {"repo": "binder-examples/conda", "ref": "main", "filepath": "index.ipynb"}, + ), + ( + GitHubRepoProvider, + "https://github.com/binder-examples/conda/tree/main/.github/workflows", + { + "repo": "binder-examples/conda", + "ref": "main", + "urlpath": ".github/workflows", + }, + ), + (GitHubRepoProvider, "https://github.com/binder-examples/conda/pulls", None), + ( + GitLabRepoProvider, + "https://gitlab.com/owner/repo", + { + "repo": "owner/repo", + "ref": None, + "filepath": None, + }, + ), + ( + GitLabRepoProvider, + "https://gitlab.com/owner/repo/-/tree/branch/folder?ref_type=heads", + {"repo": "owner/repo", "ref": "branch", "urlpath": "folder?ref_type=heads"}, + ), + ( + GitLabRepoProvider, + "https://gitlab.com/owner/repo/-/blob/branch/README.md?ref_type=heads", + { + "repo": "owner/repo", + "ref": "branch", + "filepath": "README.md?ref_type=heads", + }, + ), + ( + GitLabRepoProvider, + "https://gitlab.com/owner/project/repo", + { + "repo": "owner/project/repo", + "ref": None, + "filepath": None, + }, + ), + ( + GitLabRepoProvider, + "https://gitlab.com/owner/project/repo/-/tree/branch/folder?ref_type=heads", + { + "repo": "owner/project/repo", + "ref": "branch", + "urlpath": "folder?ref_type=heads", + }, + ), + ( + GitLabRepoProvider, + "https://gitlab.com/owner/project/repo/-/blob/branch/README.md?ref_type=heads", + { + "repo": "owner/project/repo", + "ref": "branch", + "filepath": "README.md?ref_type=heads", + }, + ), + ( + GitLabRepoProvider, + "https://gitlab.com/owner/repo/-/merge_requests/123", + None, + ), + ( + GistRepoProvider, + "https://gist.github.com/owner/0123456789abcde0123456789abcde01", + { + "repo": "owner/0123456789abcde0123456789abcde01", + "ref": None, + }, + ), + ( + GistRepoProvider, + "https://gist.github.com/owner/0123456789abcde0123456789abcde01/sha", + { + "repo": "owner/0123456789abcde0123456789abcde01", + "ref": "sha", + }, + ), + (GistRepoProvider, "https://gist.github.com/owner", None), + ], +) +def test_provider_regex_detect(provider, url, groupdict): + regex_js = provider.regex_detect + regex_py = [r.replace("(?<", "(?P<") for r in regex_js] + m = None + for r in regex_py: + m = re.match(r, url) + if m: + break + if groupdict: + assert m.groupdict() == groupdict + else: + assert not m diff --git a/js/packages/binderhub-client/lib/autodetect.js b/js/packages/binderhub-client/lib/autodetect.js new file mode 100644 index 000000000..6127f7200 --- /dev/null +++ b/js/packages/binderhub-client/lib/autodetect.js @@ -0,0 +1,60 @@ +/** + * Dict holding cached values of API request to _config endpoint for base URL + */ +let repoProviders = {}; + +/** + * Get the repo provider configurations supported by the BinderHub instance + * + * @param {URL} baseUrl Base URL to use for constructing path to _config endpoint + */ +export async function getRepoProviders(baseUrl) { + if (!repoProviders[baseUrl]) { + const configUrl = new URL("_config", baseUrl); + const resp = await fetch(configUrl); + repoProviders[baseUrl] = resp.json(); + } + return repoProviders[baseUrl]; +} + +/** + * Attempt to parse a string (typically a repository URL) into a BinderHub + * provider/repository/reference/path + * + * @param {URL} baseUrl Base URL to use for constructing path to _config endpoint + * @param {string} text Repository URL or similar to parse + * @returns {Object} An object if the repository could be parsed with fields + * - providerPrefix Prefix denoting what provider was selected + * - repository Repository to build + * - ref Ref in this repo to build (optional) + * - path Path to launch after this repo has been built (optional) + * - pathType Type of thing to open path with (raw url, notebook file) (optional) + * - providerName User friendly display name of the provider (optional) + * null otherwise + */ +export async function detect(baseUrl, text) { + const config = await getRepoProviders(baseUrl); + + for (const provider in config) { + const regex_detect = config[provider].regex_detect || []; + for (const regex of regex_detect) { + const m = text.match(regex); + if (m?.groups.repo) { + return { + providerPrefix: provider, + repository: m.groups.repo, + ref: m.groups.ref || null, + path: m.groups.filepath || m.groups.urlpath || null, + pathType: m.groups.filepath + ? "filepath" + : m.groups.urlpath + ? "urlpath" + : null, + providerName: config[provider].display_name, + }; + } + } + } + + return null; +} diff --git a/js/packages/binderhub-client/lib/index.js b/js/packages/binderhub-client/lib/index.js index 9401eea7f..4e4e4ecf9 100644 --- a/js/packages/binderhub-client/lib/index.js +++ b/js/packages/binderhub-client/lib/index.js @@ -1,6 +1,8 @@ import { NativeEventSource, EventSourcePolyfill } from "event-source-polyfill"; import { EventIterator } from "event-iterator"; +import { detect, getRepoProviders } from "./autodetect"; + // Use native browser EventSource if available, and use the polyfill if not available const EventSource = NativeEventSource || EventSourcePolyfill; @@ -211,3 +213,5 @@ export function makeBadgeMarkup(publicBaseUrl, url, syntax) { ); } } + +export { detect, getRepoProviders }; diff --git a/js/packages/binderhub-client/tests/autodetect.test.js b/js/packages/binderhub-client/tests/autodetect.test.js new file mode 100644 index 000000000..73ca91929 --- /dev/null +++ b/js/packages/binderhub-client/tests/autodetect.test.js @@ -0,0 +1,119 @@ +import { getRepoProviders, detect } from "../lib/autodetect"; +import { readFileSync } from "node:fs"; + +const mybinderConfig = JSON.parse( + readFileSync(`${__dirname}/fixtures/repoprovider-config.json`, { + encoding: "utf-8", + }), +); + +// Mock fetch() +// https://www.leighhalliday.com/mock-fetch-jest +global.fetch = jest.fn((url) => { + if (url == "https://binder.example.org/_config") { + return Promise.resolve({ + json: () => Promise.resolve(mybinderConfig), + }); + } + return Promise.reject(`Unexpected URL ${url}`); +}); + +beforeEach(() => { + fetch.mockClear(); +}); + +test("getRepoProviders requests and caches the repo provider configs", async () => { + const config = await getRepoProviders("https://binder.example.org"); + expect(config).toEqual(mybinderConfig); + + await getRepoProviders("https://binder.example.org"); + expect(fetch).toHaveBeenCalledTimes(1); +}); + +test("detect returns null if no provider matches", async () => { + const result = await detect( + "https://binder.example.org", + "https://github.com/binder-examples/conda/pulls", + ); + expect(result).toBeNull(); +}); + +test("detect parses a repo with no path", async () => { + const expected = { + providerPrefix: "gh", + repository: "binder-examples/conda", + ref: null, + path: null, + pathType: null, + providerName: "GitHub", + }; + const result = await detect( + "https://binder.example.org", + "https://github.com/binder-examples/conda", + ); + expect(result).toEqual(expected); +}); + +test("detect parses a repo with a ref but no path", async () => { + const expected = { + providerPrefix: "gh", + repository: "binder-examples/conda", + ref: "abc", + path: null, + pathType: null, + providerName: "GitHub", + }; + const result = await detect( + "https://binder.example.org", + "https://github.com/binder-examples/conda/tree/abc", + ); + expect(result).toEqual(expected); +}); + +test("detect parses a repo with a ref and file path", async () => { + const expected = { + providerPrefix: "gh", + repository: "binder-examples/conda", + ref: "f00a783", + path: "index.ipynb", + pathType: "filepath", + providerName: "GitHub", + }; + const result = await detect( + "https://binder.example.org", + "https://github.com/binder-examples/conda/blob/f00a783/index.ipynb", + ); + expect(result).toEqual(expected); +}); + +test("detect parses a repo with a ref and directory path", async () => { + const expected = { + providerPrefix: "gh", + repository: "binder-examples/conda", + ref: "f00a783", + path: ".github/workflows", + pathType: "urlpath", + providerName: "GitHub", + }; + const result = await detect( + "https://binder.example.org", + "https://github.com/binder-examples/conda/tree/f00a783/.github/workflows", + ); + expect(result).toEqual(expected); +}); + +test("detect checks other repo providers", async () => { + const expected = { + providerPrefix: "gl", + repository: "gitlab-org/gitlab-foss", + ref: "v16.4.4", + path: "README.md", + pathType: "filepath", + providerName: "GitLab.com", + }; + const result = await detect( + "https://binder.example.org", + "https://gitlab.com/gitlab-org/gitlab-foss/-/blob/v16.4.4/README.md", + ); + expect(result).toEqual(expected); +}); diff --git a/js/packages/binderhub-client/tests/fixtures/repoprovider-config.json b/js/packages/binderhub-client/tests/fixtures/repoprovider-config.json new file mode 100644 index 000000000..b9cbab818 --- /dev/null +++ b/js/packages/binderhub-client/tests/fixtures/repoprovider-config.json @@ -0,0 +1,74 @@ +{ + "gh": { + "text": "GitHub repository name or URL", + "tag_text": "Git ref (branch, tag, or commit)", + "ref_prop_disabled": false, + "label_prop_disabled": false, + "display_name": "GitHub", + "regex_detect": [ + "^https://github\\.com/(?[^/]+/[^/]+)(/blob/(?[^/]+)(/(?.+))?)?$", + "^https://github\\.com/(?[^/]+/[^/]+)(/tree/(?[^/]+)(/(?.+))?)?$" + ] + }, + "gist": { + "text": "Gist ID (username/gistId) or URL", + "tag_text": "Git commit SHA", + "ref_prop_disabled": false, + "label_prop_disabled": false, + "display_name": "Gist", + "regex_detect": [ + "^https://gist\\.github\\.com/(?[^/]+/[^/]+)(/(?[^/]+))?$" + ] + }, + "git": { + "text": "Arbitrary git repository URL (http://git.example.com/repo)", + "tag_text": "Git ref (branch, tag, or commit)", + "ref_prop_disabled": false, + "label_prop_disabled": false, + "display_name": "Git repository", + "regex_detect": null + }, + "gl": { + "text": "GitLab.com repository or URL", + "tag_text": "Git ref (branch, tag, or commit)", + "ref_prop_disabled": false, + "label_prop_disabled": false, + "display_name": "GitLab.com", + "regex_detect": [ + "^https://gitlab\\.com/(?[^/]+/[^/]+(/[^/-][^/]+)*)(/-/blob/(?[^/]+)(/(?.+))?)?$", + "^https://gitlab\\.com/(?[^/]+/[^/]+(/[^/-][^/]+)*)(/-/tree/(?[^/]+)(/(?.+))?)?$" + ] + }, + "zenodo": { + "text": "Zenodo DOI (10.5281/zenodo.3242074)", + "tag_text": "Git ref (branch, tag, or commit)", + "ref_prop_disabled": true, + "label_prop_disabled": true, + "display_name": "Zenodo DOI", + "regex_detect": null + }, + "figshare": { + "text": "Figshare DOI (10.6084/m9.figshare.9782777.v1)", + "tag_text": "Git ref (branch, tag, or commit)", + "ref_prop_disabled": true, + "label_prop_disabled": true, + "display_name": "Figshare DOI", + "regex_detect": null + }, + "hydroshare": { + "text": "Hydroshare resource id or URL", + "tag_text": "Git ref (branch, tag, or commit)", + "ref_prop_disabled": true, + "label_prop_disabled": true, + "display_name": "Hydroshare resource", + "regex_detect": null + }, + "dataverse": { + "text": "Dataverse DOI (10.7910/DVN/TJCLKP)", + "tag_text": "Git ref (branch, tag, or commit)", + "ref_prop_disabled": true, + "label_prop_disabled": true, + "display_name": "Dataverse DOI", + "regex_detect": null + } +} diff --git a/testing/local-binder-mocked-hub/binderhub_config.py b/testing/local-binder-mocked-hub/binderhub_config.py index 142b36c34..bab664c3f 100644 --- a/testing/local-binder-mocked-hub/binderhub_config.py +++ b/testing/local-binder-mocked-hub/binderhub_config.py @@ -8,13 +8,13 @@ from binderhub.build import FakeBuild from binderhub.registry import FakeRegistry -from binderhub.repoproviders import FakeProvider +from binderhub.repoproviders import FakeProvider, GitLabRepoProvider c.BinderHub.debug = True c.BinderHub.use_registry = True c.BinderHub.registry_class = FakeRegistry c.BinderHub.builder_required = False -c.BinderHub.repo_providers = {"gh": FakeProvider} +c.BinderHub.repo_providers = {"gh": FakeProvider, "gl": GitLabRepoProvider} c.BinderHub.build_class = FakeBuild # Uncomment the following line to enable BinderHub's API only mode