Skip to content

Commit

Permalink
Merge pull request #1356 from research-software-directory/1287-remote…
Browse files Browse the repository at this point in the history
…-rsd-search

1287 remote rsd search (RSD v3)
  • Loading branch information
dmijatovic authored Jan 28, 2025
2 parents d55cdd1 + c4fb195 commit 5c9051f
Show file tree
Hide file tree
Showing 242 changed files with 12,337 additions and 6,921 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/frontend_tests.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# SPDX-FileCopyrightText: 2022 - 2023 Dusan Mijatovic (dv4all)
# SPDX-FileCopyrightText: 2022 - 2023 dv4all
# SPDX-FileCopyrightText: 2023 - 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]>
# SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center
# SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center)
# SPDX-FileCopyrightText: 2023 - 2025 Dusan Mijatovic (Netherlands eScience Center)
# SPDX-FileCopyrightText: 2023 - 2025 Netherlands eScience Center
#
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -26,10 +26,10 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
- name: "install node v20.10 and cache npm"
- name: "install node v22.13 and cache npm"
uses: actions/setup-node@v4
with:
node-version: 20.10
node-version: 22.13
cache: 'npm'
cache-dependency-path: frontend/package-lock.json
- name: "install dependencies"
Expand Down
89 changes: 89 additions & 0 deletions database/024-remote-rsd.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
-- SPDX-FileCopyrightText: 2024 Dusan Mijatovic (Netherlands eScience Center)
-- SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]>
-- SPDX-FileCopyrightText: 2024 Netherlands eScience Center
--
-- SPDX-License-Identifier: Apache-2.0

-- REMOTE_RSD
-- Table for remote rsd to scrape
CREATE TABLE remote_rsd (
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
label VARCHAR (50) NOT NULL UNIQUE CHECK (LENGTH(label) >= 3),
domain VARCHAR(200) NOT NULL UNIQUE,
active BOOLEAN DEFAULT TRUE,
scrape_interval_minutes BIGINT DEFAULT 5 CHECK (scrape_interval_minutes >= 5),
scraped_at TIMESTAMPTZ,
last_err_msg VARCHAR(1000),
created_at TIMESTAMPTZ NOT NULL,
updated_at TIMESTAMPTZ NOT NULL
);

-- SANITIZE REMOTE_RSD
CREATE FUNCTION sanitise_insert_remote_rsd() RETURNS TRIGGER LANGUAGE plpgsql AS
$$
BEGIN
NEW.id = gen_random_uuid();
NEW.created_at = LOCALTIMESTAMP;
NEW.updated_at = NEW.created_at;
return NEW;
END
$$;

CREATE TRIGGER sanitise_insert_remote_rsd BEFORE INSERT ON remote_rsd
FOR EACH ROW EXECUTE PROCEDURE sanitise_insert_remote_rsd();

CREATE FUNCTION sanitise_update_remote_rsd() RETURNS TRIGGER LANGUAGE plpgsql AS
$$
BEGIN
NEW.id = OLD.id;
NEW.created_at = OLD.created_at;
NEW.updated_at = LOCALTIMESTAMP;
return NEW;
END
$$;

CREATE TRIGGER sanitise_update_remote_rsd BEFORE UPDATE ON remote_rsd
FOR EACH ROW EXECUTE PROCEDURE sanitise_update_remote_rsd();

-- RLS REMOTE_RSD

ALTER TABLE remote_rsd ENABLE ROW LEVEL SECURITY;

CREATE POLICY anyone_can_read ON remote_rsd FOR SELECT TO rsd_web_anon, rsd_user
USING (TRUE);

CREATE POLICY admin_all_rights ON remote_rsd TO rsd_admin
USING (TRUE)
WITH CHECK (TRUE);


-- REMOTE_SOFTWARE
-- Table for scraped remote software
-- Results are returned from software_overview RPC from remote RSD and enriched with remote_rsd id
CREATE TABLE remote_software (
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
remote_rsd_id UUID NOT NULL REFERENCES remote_rsd(id),
remote_software_id UUID NOT NULL,
slug VARCHAR(200) NOT NULL CHECK (slug ~ '^[a-z0-9]+(-[a-z0-9]+)*$'),
is_published BOOLEAN DEFAULT FALSE NOT NULL,
brand_name VARCHAR(200) NOT NULL,
short_statement VARCHAR(300),
image_id VARCHAR(40),
updated_at TIMESTAMPTZ,
contributor_cnt BIGINT,
mention_cnt BIGINT,
keywords CITEXT[],
keywords_text TEXT,
prog_lang TEXT[],
licenses VARCHAR[],
scraped_at TIMESTAMPTZ NOT NULL,
UNIQUE(remote_rsd_id, remote_software_id)
);

CREATE POLICY anyone_can_read ON remote_software FOR SELECT TO rsd_web_anon, rsd_user
USING (is_published);

ALTER TABLE remote_software ENABLE ROW LEVEL SECURITY;
CREATE POLICY admin_all_rights ON remote_software TO rsd_admin
USING (TRUE)
WITH CHECK (TRUE);
53 changes: 53 additions & 0 deletions database/025-rsd-info.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
-- SPDX-FileCopyrightText: 2024 - 2025 Dusan Mijatovic (Netherlands eScience Center)
-- SPDX-FileCopyrightText: 2024 - 2025 Netherlands eScience Center
--
-- SPDX-License-Identifier: Apache-2.0

-- RSD info table
-- used to obtain RSD name to use for remotes
-- it should provide basic info about rsd instance
-- manually insert remote_name property
CREATE TABLE rsd_info (
key VARCHAR(100) PRIMARY KEY,
value VARCHAR(250) NOT NULL,
public BOOLEAN DEFAULT TRUE,
created_at TIMESTAMPTZ NOT NULL,
updated_at TIMESTAMPTZ NOT NULL
);

CREATE FUNCTION sanitise_insert_rsd_info() RETURNS TRIGGER LANGUAGE plpgsql AS
$$
BEGIN
NEW.created_at = LOCALTIMESTAMP;
NEW.updated_at = NEW.created_at;
return NEW;
END
$$;

CREATE TRIGGER sanitise_insert_rsd_info BEFORE INSERT ON
rsd_info FOR EACH ROW EXECUTE PROCEDURE sanitise_insert_rsd_info();

CREATE FUNCTION sanitise_update_rsd_info() RETURNS TRIGGER LANGUAGE plpgsql AS
$$
BEGIN
NEW.created_at = OLD.created_at;
NEW.updated_at = LOCALTIMESTAMP;
return NEW;
END
$$;

CREATE TRIGGER sanitise_update_rsd_info BEFORE UPDATE ON
rsd_info FOR EACH ROW EXECUTE PROCEDURE sanitise_update_rsd_info();

-- RLS
-- rsd info table
ALTER TABLE rsd_info ENABLE ROW LEVEL SECURITY;

-- anyone can read (SELECT) public keys
CREATE POLICY anyone_can_read ON rsd_info FOR SELECT TO rsd_web_anon, rsd_user
USING (public = TRUE);

-- rsd_admin has all rights
CREATE POLICY admin_all_rights ON rsd_info TO rsd_admin
USING (TRUE)
WITH CHECK (TRUE);
133 changes: 2 additions & 131 deletions database/100-create-api-views.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
-- SPDX-FileCopyrightText: 2022 - 2023 Christian Meeßen (GFZ) <[email protected]>
-- SPDX-FileCopyrightText: 2022 - 2023 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences
-- SPDX-FileCopyrightText: 2022 - 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]>
-- SPDX-FileCopyrightText: 2022 - 2024 Netherlands eScience Center
-- SPDX-FileCopyrightText: 2023 - 2024 Dusan Mijatovic (Netherlands eScience Center)
-- SPDX-FileCopyrightText: 2022 - 2025 Netherlands eScience Center
-- SPDX-FileCopyrightText: 2023 - 2025 Dusan Mijatovic (Netherlands eScience Center)
--
-- SPDX-License-Identifier: Apache-2.0

Expand Down Expand Up @@ -829,135 +829,6 @@ $$
$$;


-- GLOBAL SEARCH
CREATE FUNCTION global_search(query VARCHAR) RETURNS TABLE(
slug VARCHAR,
name VARCHAR,
source TEXT,
is_published BOOLEAN,
rank INTEGER,
index_found INTEGER
) LANGUAGE sql STABLE AS
$$
-- SOFTWARE search item
SELECT
software.slug,
software.brand_name AS name,
'software' AS "source",
software.is_published,
(CASE
WHEN software.slug ILIKE query OR software.brand_name ILIKE query THEN 0
WHEN BOOL_OR(keyword.value ILIKE query) THEN 1
WHEN software.slug ILIKE CONCAT(query, '%') OR software.brand_name ILIKE CONCAT(query, '%') THEN 2
WHEN software.slug ILIKE CONCAT('%', query, '%') OR software.brand_name ILIKE CONCAT('%', query, '%') THEN 3
ELSE 4
END) AS rank,
(CASE
WHEN software.slug ILIKE query OR software.brand_name ILIKE query THEN 0
WHEN BOOL_OR(keyword.value ILIKE query) THEN 0
WHEN software.slug ILIKE CONCAT(query, '%') OR software.brand_name ILIKE CONCAT(query, '%') THEN 0
WHEN software.slug ILIKE CONCAT('%', query, '%') OR software.brand_name ILIKE CONCAT('%', query, '%')
THEN LEAST(NULLIF(POSITION(query IN software.slug), 0), NULLIF(POSITION(query IN software.brand_name), 0))
ELSE 0
END) AS index_found
FROM
software
LEFT JOIN keyword_for_software ON keyword_for_software.software = software.id
LEFT JOIN keyword ON keyword.id = keyword_for_software.keyword
GROUP BY software.id
HAVING
software.slug ILIKE CONCAT('%', query, '%')
OR
software.brand_name ILIKE CONCAT('%', query, '%')
OR
software.short_statement ILIKE CONCAT('%', query, '%')
OR
BOOL_OR(keyword.value ILIKE CONCAT('%', query, '%'))
UNION ALL
-- PROJECT search item
SELECT
project.slug,
project.title AS name,
'projects' AS "source",
project.is_published,
(CASE
WHEN project.slug ILIKE query OR project.title ILIKE query THEN 0
WHEN BOOL_OR(keyword.value ILIKE query) THEN 1
WHEN project.slug ILIKE CONCAT(query, '%') OR project.title ILIKE CONCAT(query, '%') THEN 2
WHEN project.slug ILIKE CONCAT('%', query, '%') OR project.title ILIKE CONCAT('%', query, '%') THEN 3
ELSE 4
END) AS rank,
(CASE
WHEN project.slug ILIKE query OR project.title ILIKE query THEN 0
WHEN BOOL_OR(keyword.value ILIKE query) THEN 0
WHEN project.slug ILIKE CONCAT(query, '%') OR project.title ILIKE CONCAT(query, '%') THEN 0
WHEN project.slug ILIKE CONCAT('%', query, '%') OR project.title ILIKE CONCAT('%', query, '%')
THEN LEAST(NULLIF(POSITION(query IN project.slug), 0), NULLIF(POSITION(query IN project.title), 0))
ELSE 0
END) AS index_found
FROM
project
LEFT JOIN keyword_for_project ON keyword_for_project.project = project.id
LEFT JOIN keyword ON keyword.id = keyword_for_project.keyword
GROUP BY project.id
HAVING
project.slug ILIKE CONCAT('%', query, '%')
OR
project.title ILIKE CONCAT('%', query, '%')
OR
project.subtitle ILIKE CONCAT('%', query, '%')
OR
BOOL_OR(keyword.value ILIKE CONCAT('%', query, '%'))
UNION ALL
-- ORGANISATION search item
SELECT
organisation.slug,
organisation."name",
'organisations' AS "source",
TRUE AS is_published,
(CASE
WHEN organisation.slug ILIKE query OR organisation."name" ILIKE query THEN 0
WHEN organisation.slug ILIKE CONCAT(query, '%') OR organisation."name" ILIKE CONCAT(query, '%') THEN 2
ELSE 3
END) AS rank,
(CASE
WHEN organisation.slug ILIKE query OR organisation."name" ILIKE query THEN 0
WHEN organisation.slug ILIKE CONCAT(query, '%') OR organisation."name" ILIKE CONCAT(query, '%') THEN 0
ELSE
LEAST(NULLIF(POSITION(query IN organisation.slug), 0), NULLIF(POSITION(query IN organisation."name"), 0))
END) AS index_found
FROM
organisation
WHERE
-- ONLY TOP LEVEL ORGANISATIONS
organisation.parent IS NULL
AND
(organisation.slug ILIKE CONCAT('%', query, '%') OR organisation."name" ILIKE CONCAT('%', query, '%'))
UNION ALL
-- COMMUNITY search item
SELECT
community.slug,
community."name",
'communities' AS "source",
TRUE AS is_published,
(CASE
WHEN community.slug ILIKE query OR community."name" ILIKE query THEN 0
WHEN community.slug ILIKE CONCAT(query, '%') OR community."name" ILIKE CONCAT(query, '%') THEN 2
ELSE 3
END) AS rank,
(CASE
WHEN community.slug ILIKE query OR community."name" ILIKE query THEN 0
WHEN community.slug ILIKE CONCAT(query, '%') OR community."name" ILIKE CONCAT(query, '%') THEN 0
ELSE
LEAST(NULLIF(POSITION(query IN community.slug), 0), NULLIF(POSITION(query IN community."name"), 0))
END) AS index_found
FROM
community
WHERE
community.slug ILIKE CONCAT('%', query, '%') OR community."name" ILIKE CONCAT('%', query, '%');
$$;


-- Check whether user agreed on Terms of Service and read the Privacy Statement
CREATE FUNCTION user_agreements_stored(account_id UUID) RETURNS BOOLEAN LANGUAGE sql STABLE AS
$$
Expand Down
File renamed without changes.
Loading

0 comments on commit 5c9051f

Please sign in to comment.