Skip to content

Commit

Permalink
Merge branch 'dev' of https://github.com/BioData-PT/beacon2-ri-api in…
Browse files Browse the repository at this point in the history
…to dev
  • Loading branch information
FracassandoCasualmente committed May 23, 2024
2 parents cd284c1 + 24b0f96 commit 8112b99
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 15 deletions.
29 changes: 21 additions & 8 deletions beacon/db/extract_filtering_terms.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from utils import get_filtering_documents


ONTOLOGY_REGEX = re.compile(r"([_A-Za-z0-9]+):([_A-Za-z0-9^\-]+)")
ONTOLOGY_REGEX = re.compile(r"([_A-Za-z0-9]+):([_A-Za-z0-9\.^\-]+)")

database_password = os.getenv('DB_PASSWD')

Expand Down Expand Up @@ -190,12 +190,16 @@ def insert_all_ontology_terms_used():

# put genomicVariations at the end
collections = [c for c in collections if c != 'genomicVariations']
collections = [c for c in collections if c != 'access_tokens']
# REMOVED variations because the script is not optimized to handle this size of files
# TODO optimize the script to get ontologies from genomic variations too
collections.append('genomicVariations')

print("Collections:", collections)
for c_name in collections:
terms_ids = find_ontology_terms_used(c_name)
terms = get_filtering_object(terms_ids, c_name)
print(f"Concluded get_filtering_object() on {c_name}")
if len(terms) > 0:
client.beacon.filtering_terms.insert_many(terms)
print(f"Finished {c_name}")
Expand All @@ -204,31 +208,40 @@ def find_ontology_terms_used(collection_name: str) -> List[Dict]:
print(collection_name)
terms_ids = []
count = client.beacon.get_collection(collection_name).estimated_document_count()
if count < 10000:

#MAX_LIMIT = 100000
MAX_LIMIT = 5000
#STEP = 10000
STEP = 100

if count < MAX_LIMIT:
num_total=count
else:
num_total=10000
num_total=MAX_LIMIT

i=0
if count > 10000:
while i < 100001:
xs = client.beacon.get_collection(collection_name).find().skip(i).limit(10000)

if count > MAX_LIMIT:
while i < MAX_LIMIT:
xs = client.beacon.get_collection(collection_name).find().skip(i).limit(STEP)
for r in tqdm(xs, total=num_total):
matches = ONTOLOGY_REGEX.findall(str(r))
for ontology_id, term_id in matches:
term = ':'.join([ontology_id, term_id])
if term not in terms_ids:
terms_ids.append(term)
i += 10000
i += STEP
print(i)
else:
xs = client.beacon.get_collection(collection_name).find().skip(0).limit(10000)
xs = client.beacon.get_collection(collection_name).find().skip(0).limit(MAX_LIMIT)
for r in tqdm(xs, total=num_total):
matches = ONTOLOGY_REGEX.findall(str(r))
for ontology_id, term_id in matches:
term = ':'.join([ontology_id, term_id])
if term not in terms_ids:
terms_ids.append(term)

print("Concluded find_ontology_terms_used")
return terms_ids


Expand Down
2 changes: 1 addition & 1 deletion beacon/db/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

LOG = logging.getLogger(__name__)

CURIE_REGEX = r'^([a-zA-Z0-9]*):\/?[a-zA-Z0-9]*$'
CURIE_REGEX = r'^([a-zA-Z0-9]*):\/?[a-zA-Z0-9\.]*$'

def apply_filters(query: dict, filters: List[dict], collection: str) -> dict:
LOG.debug("")
Expand Down
1 change: 1 addition & 0 deletions deploy/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ services:
beacon:
build: ..
image: pt/beacon:2.0
restart: always
hostname: beacon
container_name: beacon
volumes:
Expand Down
8 changes: 4 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
# That's the case for the HTTPFound redirections
# It seems the cookies are not sent in the response, and therefore, the browser does not store them
# Obviously, sessions do not work then, and Openid Connect auth.py code is broken
aiohttp==3.8.1
aiohttp==3.9.4
aiohttp-cors==0.7.0
asyncpg~=0.24.0
pyyaml~=6.0
cryptography==39.0.1
cryptography==42.0.4
jinja2~=3.0.2
#aiohttp_csrf
pymongo~=4.0.1
Expand All @@ -22,11 +22,11 @@ pyhumps
progressbar~=2.5
fastobo~=0.11.1
pronto~=2.4.4
requests==2.25.1
requests==2.32.0
pandas==1.5.3
scipy==1.10.0
numpy==1.24.2
urllib3==1.26.13
urllib3==1.26.18
#beautifulsoup4==4.11.2
#torch==1.11.0
obonet==0.3.1
Expand Down
3 changes: 1 addition & 2 deletions training-ui-files/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,5 @@ COPY secret.py /usr/share/training-ui/beacon-2.x-training-ui/app/app/secret.py
COPY generateRandomSecretKey.py /usr/share/training-ui/beacon-2.x-training-ui/app/app/generateRandomSecretKey.py

#CMD git pull && python3 app/manage.py runserver 0.0.0.0:8080
CMD git pull origin dev && git checkout dev && python3 app/manage.py runserver 0.0.0.0:8080


CMD git pull origin main && git checkout main && python3 app/manage.py runserver 0.0.0.0:8080

0 comments on commit 8112b99

Please sign in to comment.