From f1402c5cbf7307795ee67140b938fb5ac6f99b88 Mon Sep 17 00:00:00 2001 From: Ewan Cahen Date: Thu, 1 Aug 2024 16:22:56 +0200 Subject: [PATCH] feat: allow admins to disable scraping package managers --- .../005-create-relations-for-software.sql | 2 + .../docs/03-rsd-instance/03-administration.md | 32 ++++++++-- .../package-managers/PackageManagerItem.tsx | 63 ++++++++++++++++++- .../package-managers/apiPackageManager.ts | 16 +++-- .../edit/services/PackageManagerServices.tsx | 3 + .../edit/services/ServiceInfoListItem.tsx | 19 ++++-- .../edit/services/SoftwareRepoServices.tsx | 2 +- .../edit/services/apiSoftwareServices.tsx | 6 +- .../package_manager/PostgrestConnector.java | 4 +- 9 files changed, 118 insertions(+), 29 deletions(-) diff --git a/database/005-create-relations-for-software.sql b/database/005-create-relations-for-software.sql index 647c31ee5..8e39691c2 100644 --- a/database/005-create-relations-for-software.sql +++ b/database/005-create-relations-for-software.sql @@ -65,9 +65,11 @@ CREATE TABLE package_manager ( download_count BIGINT, download_count_last_error VARCHAR(500), download_count_scraped_at TIMESTAMPTZ, + download_count_scraping_disabled_reason VARCHAR(200), reverse_dependency_count INTEGER, reverse_dependency_count_last_error VARCHAR(500), reverse_dependency_count_scraped_at TIMESTAMPTZ, + reverse_dependency_count_scraping_disabled_reason VARCHAR(200), position INTEGER, created_at TIMESTAMPTZ NOT NULL, updated_at TIMESTAMPTZ NOT NULL diff --git a/documentation/docs/03-rsd-instance/03-administration.md b/documentation/docs/03-rsd-instance/03-administration.md index 3b4bfb680..fd9afbfb8 100644 --- a/documentation/docs/03-rsd-instance/03-administration.md +++ b/documentation/docs/03-rsd-instance/03-administration.md @@ -3,8 +3,8 @@ This section describes administration options available in the RSD. :::tip -To be able to log in as RSD administrator you first need to define a list of rsd admin users in the .env file. -See [Login as rsd administrator in the getting started section](/rsd-instance/getting-started/#log-in-as-rsd-administrator). +To be able to log in as an RSD administrator, you first need to grant an existing user admin privileges in the database. +See [Log in as rsd administrator in the getting started section](/rsd-instance/getting-started/#log-in-as-rsd-administrator). ::: ## Public pages @@ -65,7 +65,7 @@ You can add, search and delete ORCIDs from the RSD. Use the bulk import button t ## RSD users -This section shows all RSD users who logged in to RSD at least once. You can search for users, assign the administrator role (rsd_admin) or delete user accounts. +This section shows all RSD users who logged in to RSD at least once. You can search for users, assign the administrator role (`rsd_admin`) or delete user accounts. :::danger @@ -93,7 +93,7 @@ Use the search box to find organisations in the ROR database. This is the prefer ### Define organisation primary maintainer -The primary maintainer of an organisation is defined by an RSD administrator. You need to provide the user id in the general settings section. The user id is unique, and it is automatically created by RSD after a user is logged in for the first time. +The primary maintainer of an organisation is defined by an RSD administrator. You need to provide the user ID in the general settings section. The user ID is unique, and it is automatically created by RSD after a user is logged in for the first time. ![animation](img/organisation-maintainers-primary-invite.gif) @@ -140,7 +140,7 @@ Only RSD administrators can create communities. ### Add community -To create new community use "Add" button. Provide name, short description and logo in the modal. +To create new community use "Add" button. Provide a name, short description and logo in the modal. ### Edit community @@ -205,6 +205,26 @@ This section is used to show public announcements to all users of the RSD. It is ![animation](img/admin-announcement.gif) +## Software + +### Slug + +When editing a software page, the **slug** of the page (called **RSD path**) can be changed by admins under the **Description** tab. + +### Disable Git harvesting + +If you want to disable the harvesting of a Git repo, you can do so by providing a reason under the **Links & metadata** tab. Page maintainers will be able to see if and why the harvesting is disabled under the **Background services** tab. + +### Disable package manager harvesting + +If you want to disable the harvesting of a package manager, you can do so by providing a reason under the **Package managers** tab. Page maintainers will be able to see if and why the harvesting is disabled under the **Background services** tab. + +## Project + +### Slug + +When editing a project page, the **slug** of the page (called **RSD path**) can be changed by admins under the **Project details** tab. + ## News RSD administrators are able to create news items. The additional option "Add news" will appear in the "+" menu at the top right of the page header. @@ -232,7 +252,7 @@ After news item is created you will be redirected to edit news item page. Here y - Publication date is shown in the header of the news title. It can be changed at any time. Note that changing the publication title also changes public url of the news item. - First uploaded image is used in the news card. - Using "Copy link" button you can copy the Markdown syntax to the clipboard and the paste the link at the desired location of the body. -- Using "Delete" button will delete image and the Markdown link syntax from the news body. +- Using "Delete" button will delete the image and the Markdown link syntax from the news body. ::: diff --git a/frontend/components/software/edit/package-managers/PackageManagerItem.tsx b/frontend/components/software/edit/package-managers/PackageManagerItem.tsx index b9f718bf2..91c1bf878 100644 --- a/frontend/components/software/edit/package-managers/PackageManagerItem.tsx +++ b/frontend/components/software/edit/package-managers/PackageManagerItem.tsx @@ -12,6 +12,13 @@ import ListItemAvatar from '@mui/material/ListItemAvatar' import Avatar from '@mui/material/Avatar' import {PackageManager, packageManagerSettings} from './apiPackageManager' +import List from '@mui/material/List' +import ListItem from '@mui/material/ListItem' +import TextField from '@mui/material/TextField' +import {useSession} from '~/auth' +import {createJsonHeaders, getBaseUrl} from '~/utils/fetchHelpers' +import useSnackbar from '~/components/snackbar/useSnackbar' +import logger from '~/utils/logger' type PackageManagerItemProps = { pos: number, @@ -33,7 +40,7 @@ function RsdScraperStatus({services,download_count,download_count_scraped_at,rev if (services?.length===0) { return RSD scraper services not available } - if (services.includes('downloads')===true){ + if (services.includes('downloads')){ if (download_count_scraped_at && Number.isInteger(download_count)){ html.push(Downloads: {download_count}) @@ -41,7 +48,7 @@ function RsdScraperStatus({services,download_count,download_count_scraped_at,rev html.push(Downloads: no info) } } - if (services.includes('dependents')===true){ + if (services.includes('dependents')){ if (reverse_dependency_count_scraped_at && Number.isInteger(reverse_dependency_count)){ html.push(Dependents: {reverse_dependency_count}) }else{ @@ -51,11 +58,41 @@ function RsdScraperStatus({services,download_count,download_count_scraped_at,rev return html } - export default function PackageManagerItem({pos, item, onDelete, onEdit}: PackageManagerItemProps) { + const {showErrorMessage} = useSnackbar() + const {user, token} = useSession() + const isAdmin = user?.role === 'rsd_admin' // get package manager info const info = packageManagerSettings[item.package_manager ?? 'other'] const url = new URL(item.url) + + async function saveReason(reason: string, field: 'download_count_scraping_disabled_reason' | 'reverse_dependency_count_scraping_disabled_reason') { + let sanitisedReason: string | null = reason.trim() + + if (sanitisedReason.length === 0) { + sanitisedReason = null + } + + const patchUrl = `${getBaseUrl()}/package_manager?id=eq.${item.id}` + fetch(patchUrl, { + method: 'PATCH', + headers: { + ...createJsonHeaders(token) + }, + body: JSON.stringify({[field]: sanitisedReason}) + }) + .then(async resp => { + if (!resp.ok) { + showErrorMessage('Failed to update the reason, please try again or contact us') + logger(`PackageManagerItem.tsx.saveReason: status ${resp.status}, body: ${await resp.text()}`, 'error') + } + }) + .catch((e) => { + showErrorMessage('Failed to update the reason, please try again or contact us') + logger(`PackageManagerItem.tsx.saveReason: error when saving reason: ${e}`, 'error') + }) + } + return ( + { + isAdmin && + + + saveReason(e.target.value, 'download_count_scraping_disabled_reason')} + /> + + + saveReason(e.target.value, 'reverse_dependency_count_scraping_disabled_reason')} + /> + + + } + ) } diff --git a/frontend/components/software/edit/package-managers/apiPackageManager.ts b/frontend/components/software/edit/package-managers/apiPackageManager.ts index aabd3acce..b8e97d22d 100644 --- a/frontend/components/software/edit/package-managers/apiPackageManager.ts +++ b/frontend/components/software/edit/package-managers/apiPackageManager.ts @@ -9,10 +9,7 @@ // SPDX-License-Identifier: Apache-2.0 import logger from '~/utils/logger' -import { - createJsonHeaders, extractErrorMessages, - extractReturnMessage, getBaseUrl -} from '~/utils/fetchHelpers' +import {createJsonHeaders, extractErrorMessages, extractReturnMessage, getBaseUrl} from '~/utils/fetchHelpers' export type PackageManagerSettings={ name: string, @@ -126,7 +123,7 @@ export type NewPackageManager = { id: string|null software: string, url: string, - package_manager: PackageManagerTypes|null, + package_manager: PackageManagerTypes | null, position: number } @@ -138,11 +135,13 @@ export type PackageManager = NewPackageManager & { id: string, download_count: number | null, download_count_scraped_at: string | null, + download_count_scraping_disabled_reason: string | null, reverse_dependency_count: number | null, - reverse_dependency_count_scraped_at: string | null + reverse_dependency_count_scraped_at: string | null, + reverse_dependency_count_scraping_disabled_reason: string | null, } -export async function getPackageManagers({software, token}: { software: string, token?: string }) { +export async function getPackageManagers({software, token}: { software: string, token?: string }): Promise { try { const query = `software=eq.${software}&order=position.asc,package_manager.asc` const url = `${getBaseUrl()}/package_manager?${query}` @@ -156,8 +155,7 @@ export async function getPackageManagers({software, token}: { software: string, }) if (resp.status === 200) { - const json:PackageManager[] = await resp.json() - return json + return await resp.json() } logger(`getPackageManagers...${resp.status} ${resp.statusText}`,'warn') return [] diff --git a/frontend/components/software/edit/services/PackageManagerServices.tsx b/frontend/components/software/edit/services/PackageManagerServices.tsx index 849c84284..1186a1dac 100644 --- a/frontend/components/software/edit/services/PackageManagerServices.tsx +++ b/frontend/components/software/edit/services/PackageManagerServices.tsx @@ -1,5 +1,6 @@ // SPDX-FileCopyrightText: 2023 - 2024 Dusan Mijatovic (Netherlands eScience Center) // SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center +// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) // // SPDX-License-Identifier: Apache-2.0 @@ -35,6 +36,7 @@ export default function PackageManagerServices() { last_error={service.download_count_last_error} url={service.url} platform={null} + scraping_disabled_reason={service.download_count_scraping_disabled_reason} /> : null } @@ -46,6 +48,7 @@ export default function PackageManagerServices() { last_error={service.reverse_dependency_count_last_error} url={service.url} platform={null} + scraping_disabled_reason={service.reverse_dependency_count_scraping_disabled_reason} /> : null } diff --git a/frontend/components/software/edit/services/ServiceInfoListItem.tsx b/frontend/components/software/edit/services/ServiceInfoListItem.tsx index 88aef2849..bbabdeefd 100644 --- a/frontend/components/software/edit/services/ServiceInfoListItem.tsx +++ b/frontend/components/software/edit/services/ServiceInfoListItem.tsx @@ -1,5 +1,6 @@ // SPDX-FileCopyrightText: 2023 - 2024 Dusan Mijatovic (Netherlands eScience Center) // SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center +// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) // // SPDX-License-Identifier: Apache-2.0 @@ -15,14 +16,15 @@ import DoDisturbOnIcon from '@mui/icons-material/DoDisturbOn' import {CodePlatform} from '~/types/SoftwareTypes' type ServiceInfoListItemProps={ - title:string - scraped_at: string|null - last_error: string|null - url: string|null - platform: CodePlatform|null + readonly title:string + readonly scraped_at: string|null + readonly last_error: string|null + readonly url: string|null + readonly platform: CodePlatform|null + readonly scraping_disabled_reason: string|null } -export function ServiceInfoListItem({title,scraped_at,last_error,url,platform}:ServiceInfoListItemProps){ +export function ServiceInfoListItem({title,scraped_at,last_error,url,platform,scraping_disabled_reason}:ServiceInfoListItemProps){ let status:'error'|'success'|'not_active'|'scheduled'|'not_supported' = 'not_active' // set service status @@ -38,6 +40,7 @@ export function ServiceInfoListItem({title,scraped_at,last_error,url,platform}:S if (status==='not_active') color='warning.main' function getStatusIcon(){ + if (scraping_disabled_reason !== null) return if (status === 'error') return if (status === 'success') return if (status === 'scheduled') return @@ -46,6 +49,10 @@ export function ServiceInfoListItem({title,scraped_at,last_error,url,platform}:S } function getStatusMsg(){ + if (scraping_disabled_reason !== null) { + return ({`This harvester was disabled by the admins for the following reason: ${scraping_disabled_reason}`}) + } + if (last_error) return ( {last_error} ) diff --git a/frontend/components/software/edit/services/SoftwareRepoServices.tsx b/frontend/components/software/edit/services/SoftwareRepoServices.tsx index ce315b92a..e9cd53229 100644 --- a/frontend/components/software/edit/services/SoftwareRepoServices.tsx +++ b/frontend/components/software/edit/services/SoftwareRepoServices.tsx @@ -33,7 +33,7 @@ export default function SoftwareRepoServices() { platform: services ? services['code_platform'] : null } return ( - + ) })} diff --git a/frontend/components/software/edit/services/apiSoftwareServices.tsx b/frontend/components/software/edit/services/apiSoftwareServices.tsx index f1819fa9f..6d49bb047 100644 --- a/frontend/components/software/edit/services/apiSoftwareServices.tsx +++ b/frontend/components/software/edit/services/apiSoftwareServices.tsx @@ -31,8 +31,10 @@ export type PackageManagerService = { package_manager: PackageManagerTypes, download_count_scraped_at: string|null, download_count_last_error: string|null, + download_count_scraping_disabled_reason: string|null, reverse_dependency_count_scraped_at: string|null, - reverse_dependency_count_last_error: string|null + reverse_dependency_count_last_error: string|null, + reverse_dependency_count_scraping_disabled_reason: string|null, } async function getSoftwareServices(id:string,token:string){ @@ -61,7 +63,7 @@ async function getSoftwareServices(id:string,token:string){ async function getPackageManagerServices(id:string,token:string){ try{ - const select='select=software,url,package_manager,download_count_scraped_at,download_count_last_error,reverse_dependency_count_scraped_at,reverse_dependency_count_last_error' + const select='select=software,url,package_manager,download_count_scraped_at,download_count_last_error,download_count_scraping_disabled_reason,reverse_dependency_count_scraped_at,reverse_dependency_count_last_error,reverse_dependency_count_scraping_disabled_reason' const query = `${select}&software=eq.${id}&order=position` const url = `${getBaseUrl()}/package_manager?${query}` diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/package_manager/PostgrestConnector.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/package_manager/PostgrestConnector.java index 71a331b0f..f5eae01cf 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/package_manager/PostgrestConnector.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/package_manager/PostgrestConnector.java @@ -27,7 +27,7 @@ public PostgrestConnector(String backendUrl) { } public Collection oldestDownloadCounts(int limit) { - String filter = "or=(package_manager.eq.dockerhub)"; + String filter = "download_count_scraping_disabled_reason=is.null&or=(package_manager.eq.dockerhub)"; String data = Utils.getAsAdmin(backendUrl + "?" + filter + "&select=id,url,package_manager&order=download_count_scraped_at.asc.nullsfirst&limit=" + limit + "&" + Utils.atLeastOneHourAgoFilter("download_count_scraped_at") ); @@ -35,7 +35,7 @@ public Collection oldestDownloadCounts(int limit) { } public Collection oldestReverseDependencyCounts(int limit) { - String filter = "or=(package_manager.eq.anaconda,package_manager.eq.cran,package_manager.eq.crates,package_manager.eq.golang,package_manager.eq.maven,package_manager.eq.npm,package_manager.eq.pypi,package_manager.eq.sonatype)"; + String filter = "reverse_dependency_count_scraping_disabled_reason=is.null&or=(package_manager.eq.anaconda,package_manager.eq.cran,package_manager.eq.crates,package_manager.eq.golang,package_manager.eq.maven,package_manager.eq.npm,package_manager.eq.pypi,package_manager.eq.sonatype)"; String data = Utils.getAsAdmin(backendUrl + "?" + filter + "&select=id,url,package_manager&order=reverse_dependency_count_scraped_at.asc.nullsfirst&limit=" + limit + "&" + Utils.atLeastOneHourAgoFilter("reverse_dependency_count_scraped_at")); return parseBasicJsonData(data); }