-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1153 from research-software-directory/1117-disabl…
…e-scraping 1117 disable scraping
- Loading branch information
Showing
12 changed files
with
125 additions
and
64 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
-- SPDX-FileCopyrightText: 2021 - 2023 Ewan Cahen (Netherlands eScience Center) <[email protected]> | ||
-- SPDX-FileCopyrightText: 2021 - 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]> | ||
-- SPDX-FileCopyrightText: 2021 - 2024 Netherlands eScience Center | ||
-- SPDX-FileCopyrightText: 2022 - 2023 Dusan Mijatovic (dv4all) | ||
-- SPDX-FileCopyrightText: 2022 - 2024 dv4all | ||
|
@@ -34,7 +34,8 @@ CREATE TABLE repository_url ( | |
commit_history_scraped_at TIMESTAMPTZ, | ||
contributor_count INTEGER, | ||
contributor_count_last_error VARCHAR(500), | ||
contributor_count_scraped_at TIMESTAMPTZ | ||
contributor_count_scraped_at TIMESTAMPTZ, | ||
scraping_disabled_reason VARCHAR(200) | ||
); | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,10 @@ | ||
// SPDX-FileCopyrightText: 2022 - 2023 dv4all | ||
// SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) | ||
// SPDX-FileCopyrightText: 2023 - 2024 Dusan Mijatovic (Netherlands eScience Center) | ||
// SPDX-FileCopyrightText: 2023 - 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]> | ||
// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center | ||
// SPDX-FileCopyrightText: 2023 Christian Meeßen (GFZ) <[email protected]> | ||
// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (dv4all) (dv4all) | ||
// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) <[email protected]> | ||
// SPDX-FileCopyrightText: 2023 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
@@ -59,14 +59,14 @@ async function suggestPlatform(repositoryUrl: string | null) { | |
} | ||
|
||
export default function AutosaveRepositoryUrl() { | ||
const {token} = useSession() | ||
const {token, user} = useSession() | ||
const {showErrorMessage} = useSnackbar() | ||
const {control, watch, resetField} = useFormContext<EditSoftwareItem>() | ||
const {fieldState: {error: urlError}, field: {value: repository_url}} = useController({ | ||
control, | ||
name: 'repository_url' | ||
}) | ||
const [id, repository_platform] = watch(['id', 'repository_platform']) | ||
const [id, repository_platform, scraping_disabled_reason] = watch(['id', 'repository_platform', 'scraping_disabled_reason']) | ||
const [platform, setPlatform] = useState<{ | ||
id: CodePlatform | null | ||
disabled: boolean | ||
|
@@ -128,6 +128,24 @@ export default function AutosaveRepositoryUrl() { | |
} | ||
}, [urlError, repository_url, platform.id]) | ||
|
||
async function saveScrapingDisabledReason({value}: {value: string | null}) { | ||
try { | ||
const resp = await fetch(`/api/v1/repository_url?software=eq.${id}`, { | ||
method: 'PATCH', | ||
body: JSON.stringify({scraping_disabled_reason: value}), | ||
headers: { | ||
'Content-Type': 'application/json', | ||
Authorization: `Bearer ${token}` | ||
} | ||
}) | ||
if (!resp.ok) { | ||
showErrorMessage(`Failed to save the disabling reason with status code ${resp.status} and body ${JSON.stringify(resp.body)}`) | ||
} | ||
} catch (e) { | ||
showErrorMessage(`Failed to save the disabling reason with an unknown error: ${e}`) | ||
} | ||
} | ||
|
||
async function saveRepositoryInfo({name, value}: OnSaveProps<EditSoftwareItem>) { | ||
// complete record for upsert | ||
const data: RepositoryUrl = { | ||
|
@@ -149,7 +167,8 @@ export default function AutosaveRepositoryUrl() { | |
commit_history_scraped_at: null, | ||
contributor_count: null, | ||
contributor_count_last_error: null, | ||
contributor_count_scraped_at: null | ||
contributor_count_scraped_at: null, | ||
scraping_disabled_reason: scraping_disabled_reason | ||
} | ||
if (name === 'repository_url') { | ||
data.url = value | ||
|
@@ -203,24 +222,43 @@ export default function AutosaveRepositoryUrl() { | |
// console.log('id...', id) | ||
// console.log('repository_url...', repository_url) | ||
// console.log('platform...', platform) | ||
// console.log('scraping_disabled_reason...', scraping_disabled_reason) | ||
// console.log('urlError...', urlError) | ||
// console.log('options...', options) | ||
// console.groupEnd() | ||
|
||
return ( | ||
<div className="flex gap-4 items-baseline"> | ||
<AutosaveControlledTextField | ||
options={options} | ||
control={control} | ||
rules={config.repository_url.validation} | ||
onSaveField={saveRepositoryInfo} | ||
/> | ||
<AutosaveRepositoryPlatform | ||
value={platform.id} | ||
disabled={platform.disabled} | ||
helperText={platform.helperText} | ||
onChange={(platform) => saveRepositoryInfo({name: 'repository_platform', value: platform})} | ||
/> | ||
</div> | ||
<> | ||
<div className="flex gap-4 items-baseline"> | ||
<AutosaveControlledTextField | ||
options={options} | ||
control={control} | ||
rules={config.repository_url.validation} | ||
onSaveField={saveRepositoryInfo} | ||
/> | ||
<AutosaveRepositoryPlatform | ||
value={platform.id} | ||
disabled={platform.disabled} | ||
helperText={platform.helperText} | ||
onChange={(platform) => saveRepositoryInfo({name: 'repository_platform', value: platform})} | ||
/> | ||
</div> | ||
{(user?.role === 'rsd_admin') | ||
? <AutosaveControlledTextField | ||
options={{ | ||
name: 'scraping_disabled_reason', | ||
label: config.repository_disabled_scraping_reason.label, | ||
useNull: true, | ||
defaultValue: scraping_disabled_reason, | ||
helperTextMessage: config.repository_url.help(repository_url), | ||
helperTextCnt: `${repository_url?.length || 0}/200`, | ||
disabled: user?.role !== 'rsd_admin', | ||
}} | ||
control={control} | ||
rules={config.repository_disabled_scraping_reason.validation} | ||
onSaveField={saveScrapingDisabledReason} | ||
/> | ||
: null} | ||
</> | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
// SPDX-FileCopyrightText: 2024 Dusan Mijatovic (Netherlands eScience Center) | ||
// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]> | ||
// SPDX-FileCopyrightText: 2024 Netherlands eScience Center | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
@@ -13,6 +14,7 @@ type EditSoftwareMetadataFormProps={ | |
get_started_url: string | null | ||
repository_url: string | null, | ||
repository_platform: CodePlatform | null | ||
scraping_disabled_reason: string | null | ||
concept_doi: string | null, | ||
licenses: AutocompleteOption<License>[] | ||
keywords: KeywordForSoftware[] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
// SPDX-FileCopyrightText: 2023 - 2024 Dusan Mijatovic (Netherlands eScience Center) | ||
// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center | ||
// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]> | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
|
@@ -17,21 +18,27 @@ export default function SoftwareRepoServices() { | |
if (loading) return <ContentLoader /> | ||
|
||
return ( | ||
<List> | ||
{repoServiceList.map(service=>{ | ||
const props = { | ||
title: service.name, | ||
desc: service.desc, | ||
scraped_at: services ? services[service.props.scraped_at] : null, | ||
last_error: services ? services[service.props.last_error] : null, | ||
url: services ? services[service.props.url] : null, | ||
platform: services ? services['code_platform'] : null | ||
} | ||
return ( | ||
<ServiceInfoListItem key={service.name} {...props} /> | ||
) | ||
})} | ||
</List> | ||
<> | ||
{services?.scraping_disabled_reason | ||
? <span style={{color: 'red'}}>The harvesters for this repo were disabled by the admins for the following reason: {services?.scraping_disabled_reason}</span> | ||
: null} | ||
<List> | ||
{repoServiceList.map(service=>{ | ||
const props = { | ||
title: service.name, | ||
desc: service.desc, | ||
scraped_at: services ? services[service.props.scraped_at] : null, | ||
last_error: services ? services[service.props.last_error] : null, | ||
url: services ? services[service.props.url] : null, | ||
platform: services ? services['code_platform'] : null | ||
} | ||
return ( | ||
<ServiceInfoListItem key={service.name} {...props} /> | ||
) | ||
})} | ||
</List> | ||
</> | ||
|
||
) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
// SPDX-FileCopyrightText: 2023 - 2024 Dusan Mijatovic (Netherlands eScience Center) | ||
// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center | ||
// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]> | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
|
@@ -13,14 +14,15 @@ import useSoftwareContext from '../useSoftwareContext' | |
|
||
export type SoftwareServices = { | ||
software:string, | ||
url:string, | ||
url:string, | ||
code_platform: CodePlatform, | ||
basic_data_scraped_at: string|null, | ||
basic_data_last_error: string|null, | ||
languages_scraped_at: string|null, | ||
languages_last_error: string|null, | ||
commit_history_scraped_at: string|null, | ||
commit_history_last_error: string|null | ||
basic_data_scraped_at: string|null, | ||
basic_data_last_error: string|null, | ||
languages_scraped_at: string|null, | ||
languages_last_error: string|null, | ||
commit_history_scraped_at: string|null, | ||
commit_history_last_error: string|null, | ||
scraping_disabled_reason: string|null, | ||
} | ||
|
||
export type PackageManagerService = { | ||
|
@@ -35,7 +37,7 @@ export type PackageManagerService = { | |
|
||
async function getSoftwareServices(id:string,token:string){ | ||
try{ | ||
const select='select=software,url,code_platform,basic_data_scraped_at,basic_data_last_error,languages_scraped_at,languages_last_error,commit_history_scraped_at,commit_history_last_error' | ||
const select='select=software,url,code_platform,basic_data_scraped_at,basic_data_last_error,languages_scraped_at,languages_last_error,commit_history_scraped_at,commit_history_last_error,scraping_disabled_reason' | ||
const query = `${select}&software=eq.${id}` | ||
const url = `${getBaseUrl()}/repository_url?${query}` | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,8 @@ | ||
// SPDX-FileCopyrightText: 2022 - 2023 Christian Meeßen (GFZ) <[email protected]> | ||
// SPDX-FileCopyrightText: 2022 - 2023 Dusan Mijatovic (dv4all) | ||
// SPDX-FileCopyrightText: 2022 - 2023 Ewan Cahen (Netherlands eScience Center) <[email protected]> | ||
// SPDX-FileCopyrightText: 2022 - 2023 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences | ||
// SPDX-FileCopyrightText: 2022 - 2023 dv4all | ||
// SPDX-FileCopyrightText: 2022 - 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]> | ||
// SPDX-FileCopyrightText: 2022 - 2024 Netherlands eScience Center | ||
// SPDX-FileCopyrightText: 2023 - 2024 Dusan Mijatovic (Netherlands eScience Center) | ||
// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (dv4all) (dv4all) | ||
|
@@ -42,7 +42,8 @@ export type RepositoryUrl = { | |
commit_history_scraped_at?: string | null, | ||
contributor_count?: number | null, | ||
contributor_count_last_error?: string | null, | ||
contributor_count_scraped_at?: string | null | ||
contributor_count_scraped_at?: string | null, | ||
scraping_disabled_reason: string | null | ||
} | ||
|
||
export type NewSoftwareItem = { | ||
|
@@ -67,6 +68,7 @@ export type SoftwareTableItem = NewSoftwareItem & { | |
export type SoftwareItem = SoftwareTableItem & { | ||
repository_url: string | null, | ||
repository_platform: CodePlatform | null | ||
scraping_disabled_reason: string | null | ||
} | ||
|
||
export type SoftwareItemFromDB = SoftwareTableItem & { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
// SPDX-FileCopyrightText: 2022 - 2023 Dusan Mijatovic (dv4all) | ||
// SPDX-FileCopyrightText: 2022 - 2023 dv4all | ||
// SPDX-FileCopyrightText: 2022 Ewan Cahen (Netherlands eScience Center) <[email protected]> | ||
// SPDX-FileCopyrightText: 2022 Netherlands eScience Center | ||
// SPDX-FileCopyrightText: 2022 - 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]> | ||
// SPDX-FileCopyrightText: 2022 - 2024 Netherlands eScience Center | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
|
@@ -53,7 +53,7 @@ export async function getSoftwareToEdit({slug, token}: | |
{ slug: string, token: string }) { | ||
try { | ||
// GET | ||
const select = '*,repository_url!left(url,code_platform)' | ||
const select = '*,repository_url!left(url,code_platform,scraping_disabled_reason)' | ||
const url = `${getBaseUrl()}/software?select=${select}&slug=eq.${slug}` | ||
const resp = await fetch(url, { | ||
method: 'GET', | ||
|
@@ -71,6 +71,7 @@ export async function getSoftwareToEdit({slug, token}: | |
software.repository_url = null | ||
software.repository_platform = null | ||
} | ||
software.scraping_disabled_reason = data[0]?.repository_url?.scraping_disabled_reason | ||
return software | ||
} | ||
} catch (e: any) { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
// SPDX-FileCopyrightText: 2022 - 2023 Ewan Cahen (Netherlands eScience Center) <[email protected]> | ||
// SPDX-FileCopyrightText: 2022 - 2023 Netherlands eScience Center | ||
// SPDX-FileCopyrightText: 2022 - 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]> | ||
// SPDX-FileCopyrightText: 2022 - 2024 Netherlands eScience Center | ||
// SPDX-FileCopyrightText: 2022 Christian Meeßen (GFZ) <[email protected]> | ||
// SPDX-FileCopyrightText: 2022 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences | ||
// | ||
|
@@ -55,7 +55,7 @@ public static Optional<GithubScraper> create(String url) { | |
* Example URL: https://api.github.com/repos/research-software-directory/RSD-as-a-service | ||
*/ | ||
@Override | ||
public BasicGitData basicData() throws IOException, InterruptedException, RsdResponseException { | ||
public BasicGitData basicData() throws IOException, InterruptedException, RsdResponseException { | ||
Optional<String> apiCredentials = Config.apiCredentialsGithub(); | ||
HttpResponse<String> response; | ||
if (apiCredentials.isPresent()) { | ||
|
@@ -144,6 +144,7 @@ public CommitsPerWeek contributions() throws IOException, InterruptedException, | |
} | ||
} | ||
|
||
// Example URL: https://api.github.com/repos/research-software-directory/RSD-as-a-service/contributors?per_page=1 | ||
@Override | ||
public Integer contributorCount() throws IOException, InterruptedException, RsdResponseException { | ||
// we request one contributor per page and just extract the number of pages from the headers | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
// SPDX-FileCopyrightText: 2022 - 2023 Ewan Cahen (Netherlands eScience Center) <[email protected]> | ||
// SPDX-FileCopyrightText: 2022 - 2023 Netherlands eScience Center | ||
// SPDX-FileCopyrightText: 2022 - 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]> | ||
// SPDX-FileCopyrightText: 2022 - 2024 Netherlands eScience Center | ||
// SPDX-FileCopyrightText: 2022 Christian Meeßen (GFZ) <[email protected]> | ||
// SPDX-FileCopyrightText: 2022 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences | ||
// | ||
|
@@ -43,6 +43,7 @@ public GitlabScraper(String gitLabApiUrl, String projectPath) { | |
* returned. If the license could not be detected, returns "Other". API endpoint: | ||
* https://docs.gitlab.com/ee/api/projects.html#get-single-project NOTE: A GraphQL request here | ||
* might be more efficient since less data would be sent. | ||
* Example URL: https://gitlab.com/api/v4/projects/gitlab-org%2Fgitlab-shell?license=True | ||
* | ||
* @return The basic data | ||
*/ | ||
|
@@ -55,6 +56,8 @@ public BasicGitData basicData() throws IOException, InterruptedException, RsdRes | |
/** | ||
* Returns the languages used in a project with percentage values. Uses the API Endpoint | ||
* https://docs.gitlab.com/ee/api/projects.html#languages GET /projects/:id/languages | ||
* <p> | ||
* Example URL: https://gitlab.com/api/v4/projects/gitlab-org%2Fgitlab-shell/languages | ||
* | ||
* @return A JSON as a String | ||
*/ | ||
|
@@ -104,6 +107,7 @@ public CommitsPerWeek contributions() throws IOException, InterruptedException, | |
return commits; | ||
} | ||
|
||
// Example URL: https://gitlab.com/api/v4/projects/gitlab-org%2Fgitlab-shell/repository/contributors | ||
@Override | ||
public Integer contributorCount() throws IOException, InterruptedException, RsdResponseException { | ||
HttpResponse<String> httpResponse = Utils.getAsHttpResponse(apiUri + "/projects/" + Utils.urlEncode(projectPath) + "/repository/contributors"); | ||
|
Oops, something went wrong.