From 1836081ef9855d632768987e1d11a05b9dcf9e9f Mon Sep 17 00:00:00 2001 From: Jolan Thomassin <98430140+JolanThomassin@users.noreply.github.com> Date: Thu, 14 Mar 2024 18:42:07 +0000 Subject: [PATCH 1/2] Fixes #79, SQL query getting random crawl and score --- ailab/db/finesse/test_queries/__init__.py | 36 +++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/ailab/db/finesse/test_queries/__init__.py b/ailab/db/finesse/test_queries/__init__.py index 6883e9a..cb5d916 100644 --- a/ailab/db/finesse/test_queries/__init__.py +++ b/ailab/db/finesse/test_queries/__init__.py @@ -44,3 +44,39 @@ def get_random_chunk(cursor, schema_version, seed=None): cursor.execute(query) return cursor.fetchall() + + +def get_random_document_score(cursor, schema_version, seed=None): + if seed is None: + seed = math.sin(time.time()) + + # Execute the SET commands separately + cursor.execute(f'SET SEARCH_PATH TO "{schema_version}", public;') + cursor.execute(f"SET SEED TO {seed};") + + query = """ + WITH random_crawl AS ( + SELECT id + FROM crawl + ORDER BY + floor(random() * ( + SELECT + COUNT(*) + FROM + Chunk + )) + LIMIT + 1 + ) + SELECT + cr.id AS crawl_id, cr.url AS crawl_url, sc.score, sc.score_type + FROM + crawl cr + INNER JOIN + score sc ON cr.id = sc.entity_id + WHERE + cr.id = (SELECT id FROM random_crawl) + """ + + cursor.execute(query) + return cursor.fetchall() \ No newline at end of file From 1510b8d06a44389316b7f84932d550cd743a26f1 Mon Sep 17 00:00:00 2001 From: Jolan Thomassin <98430140+JolanThomassin@users.noreply.github.com> Date: Thu, 14 Mar 2024 18:42:16 +0000 Subject: [PATCH 2/2] Fixes #79, New script printing random crawl --- bin/testing-current-score.py | 45 ++++++++++++++++++++++++++++++++++++ bin/testing-current-score.sh | 6 +++++ 2 files changed, 51 insertions(+) create mode 100644 bin/testing-current-score.py create mode 100755 bin/testing-current-score.sh diff --git a/bin/testing-current-score.py b/bin/testing-current-score.py new file mode 100644 index 0000000..20f9cc5 --- /dev/null +++ b/bin/testing-current-score.py @@ -0,0 +1,45 @@ +import ailab.db as db + +from ailab.db.finesse.test_queries import get_random_document_score + + +class NoChunkFoundError(Exception): + pass + + +## This is a comment. +def evaluate_random_document(project_db): + + if project_db is None: + print("Database connection failed.") + return None + + with project_db.cursor() as cursor: + + random_chunk = get_random_document_score(cursor, "louis_v005") + + if not random_chunk: + raise NoChunkFoundError("No chunk found in the database.") + + print("\n-------------") + print("crawl_id:", random_chunk[0]["crawl_id"]) + print("crawl_url:", random_chunk[0]["crawl_url"]) + print("\n") + + print("-------------") + for chunk in random_chunk: + print("score_type:", chunk["score_type"]) + print("score:", chunk["score"]) + print("\n") + + return + + + +def main(): + project_db = db.connect_db() + evaluate_random_document(project_db) + + +if __name__ == "__main__": + main() diff --git a/bin/testing-current-score.sh b/bin/testing-current-score.sh new file mode 100755 index 0000000..47d1dfd --- /dev/null +++ b/bin/testing-current-score.sh @@ -0,0 +1,6 @@ +#!/bin/bash +DIRNAME=$(dirname "$0") +. "$DIRNAME"/lib.sh + + +PYTHONPATH=$PROJECT_DIR python "$DIRNAME"/testing-current-score.py