Skip to content

Commit

Permalink
Merge pull request #4381 from broadinstitute/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
hanars authored Sep 19, 2024
2 parents 4d44c7d + e204ced commit 8d474f7
Show file tree
Hide file tree
Showing 43 changed files with 772 additions and 506 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## dev

## 9/19/24
* Update Biosample choices (REQUIRES DB MIGRATION)
* Add support for Azure OAuth

## 8/14/24
* Remove ONT support (REQUIRES DB MIGRATION)
* Add "Validated Name" functional tag (REQUIRES DB MIGRATION)
Expand Down
2 changes: 2 additions & 0 deletions hail_search/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,5 @@
('likely_disease_causing', 'DM?', 'DM?'),
('hgmd_other', 'DP', None),
]

MAX_LOAD_INTERVALS = 1000
8 changes: 5 additions & 3 deletions hail_search/queries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@

from hail_search.constants import AFFECTED_ID, ALT_ALT, ANNOTATION_OVERRIDE_FIELDS, ANY_AFFECTED, COMP_HET_ALT, \
COMPOUND_HET, GENOME_VERSION_GRCh38, GROUPED_VARIANTS_FIELD, ALLOWED_TRANSCRIPTS, ALLOWED_SECONDARY_TRANSCRIPTS, HAS_ANNOTATION_OVERRIDE, \
HAS_ALT, HAS_REF,INHERITANCE_FILTERS, PATH_FREQ_OVERRIDE_CUTOFF, MALE, RECESSIVE, REF_ALT, REF_REF, \
HAS_ALT, HAS_REF,INHERITANCE_FILTERS, PATH_FREQ_OVERRIDE_CUTOFF, MALE, RECESSIVE, REF_ALT, REF_REF, MAX_LOAD_INTERVALS, \
UNAFFECTED_ID, X_LINKED_RECESSIVE, XPOS, OMIM_SORT, FAMILY_GUID_FIELD, GENOTYPES_FIELD, AFFECTED_ID_MAP

DATASETS_DIR = os.environ.get('DATASETS_DIR', '/hail_datasets')
SSD_DATASETS_DIR = os.environ.get('SSD_DATASETS_DIR', DATASETS_DIR)

# Number of filtered genes at which pre-filtering a table by gene-intervals does not improve performance
# Estimated based on behavior for several representative gene lists
MAX_GENE_INTERVALS = int(os.environ.get('MAX_GENE_INTERVALS', 100))
MAX_GENE_INTERVALS = int(os.environ.get('MAX_GENE_INTERVALS', MAX_LOAD_INTERVALS))

# Optimal number of entry table partitions, balancing parallelization with partition overhead
# Experimentally determined based on compound het search performance:
Expand Down Expand Up @@ -92,6 +92,7 @@ def load_globals(cls):
ht_path = cls._get_table_path('annotations.ht')
ht_globals = hl.eval(hl.read_table(ht_path).globals.select(*cls.GLOBALS))
cls.LOADED_GLOBALS = {k: ht_globals[k] for k in cls.GLOBALS}
return cls.LOADED_GLOBALS

@classmethod
def _format_population_config(cls, pop_config):
Expand Down Expand Up @@ -237,7 +238,8 @@ def __init__(self, sample_data, sort=XPOS, sort_metadata=None, num_results=100,
self._has_secondary_annotations = False
self._is_multi_data_type_comp_het = False
self.max_unaffected_samples = None
self._load_table_kwargs = {'_n_partitions': min(MAX_PARTITIONS, (os.cpu_count() or 2)-1)}
self._n_partitions = min(MAX_PARTITIONS, (os.cpu_count() or 2)-1)
self._load_table_kwargs = {'_n_partitions': self._n_partitions}
self.entry_samples_by_family_guid = {}

if sample_data:
Expand Down
10 changes: 7 additions & 3 deletions hail_search/queries/mito.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@

from hail_search.constants import ABSENT_PATH_SORT_OFFSET, CLINVAR_KEY, CLINVAR_MITO_KEY, CLINVAR_LIKELY_PATH_FILTER, CLINVAR_PATH_FILTER, \
CLINVAR_PATH_RANGES, CLINVAR_PATH_SIGNIFICANCES, ALLOWED_TRANSCRIPTS, ALLOWED_SECONDARY_TRANSCRIPTS, PATHOGENICTY_SORT_KEY, CONSEQUENCE_SORT, \
PATHOGENICTY_HGMD_SORT_KEY
PATHOGENICTY_HGMD_SORT_KEY, MAX_LOAD_INTERVALS
from hail_search.queries.base import BaseHailTableQuery, PredictionPath, QualityFilterFormat

MAX_LOAD_INTERVALS = 1000

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -211,10 +210,15 @@ def _parse_variant_keys(self, variant_ids=None, **kwargs):
]

def _prefilter_entries_table(self, ht, parsed_intervals=None, exclude_intervals=False, **kwargs):
num_intervals = len(parsed_intervals or [])
if exclude_intervals and parsed_intervals:
ht = hl.filter_intervals(ht, parsed_intervals, keep=False)
elif len(parsed_intervals or []) >= MAX_LOAD_INTERVALS:
elif num_intervals >= MAX_LOAD_INTERVALS:
ht = hl.filter_intervals(ht, parsed_intervals)

if '_n_partitions' not in self._load_table_kwargs and num_intervals > self._n_partitions:
ht = ht.naive_coalesce(self._n_partitions)

return ht

def _get_allowed_consequence_ids(self, annotations):
Expand Down
6 changes: 4 additions & 2 deletions hail_search/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,7 @@ def lookup_variants(request):


def load_globals():
for cls in QUERY_CLASS_MAP.values():
cls.load_globals()
return {
str(k): v.load_globals()
for k, v in QUERY_CLASS_MAP.items()
}
27 changes: 27 additions & 0 deletions hail_search/test_search.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from aiohttp.test_utils import AioHTTPTestCase
import asyncio
from copy import deepcopy
import hail as hl
import time
from unittest import mock

Expand Down Expand Up @@ -219,6 +220,32 @@ async def test_status(self):
resp_json = await resp.json()
self.assertDictEqual(resp_json, {'success': True})

async def test_reload_globals(self):
async with self.client.request('POST', '/reload_globals') as resp:
resp_json = await resp.json()
self.assertTrue(
resp_json["('SNV_INDEL', 'GRCh38')"]['versions']['gnomad_genomes'],
)
with mock.patch('hail_search.queries.base.hl.read_table') as mock_read_table:
mock_read_table.return_value = hl.Table.parallelize(
[],
hl.tstruct(),
globals=hl.Struct(
enums=hl.Struct(reloaded_enum=1),
versions=hl.Struct(reloaded_version=2),
)
)
async with self.client.request('POST', '/reload_globals') as resp:
self.assertEqual(resp.status, 200)
resp_json = await resp.json()
self.assertDictEqual(
resp_json["('SNV_INDEL', 'GRCh38')"],
{
'enums': {'reloaded_enum': 1},
'versions': {'reloaded_version': 2},
},
)

async def _assert_expected_search(self, results, gene_counts=None, **search_kwargs):
search_body = get_hail_search_body(**search_kwargs)
async with self.client.request('POST', '/search', json=search_body) as resp:
Expand Down
9 changes: 9 additions & 0 deletions hail_search/web_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,14 @@ async def multi_lookup(request: web.Request) -> web.Response:
return web.json_response({'results': result}, dumps=hl_json_dumps)


async def reload_globals(request: web.Request) -> web.Response:
result = await sync_to_async_hail_query(request, lambda _: load_globals())
return web.json_response(
result,
dumps=hl_json_dumps
)


async def status(request: web.Request) -> web.Response:
# Make sure the hail backend process is still alive.
await sync_to_async_hail_query(request, lambda _: hl.eval(1 + 1))
Expand All @@ -116,6 +124,7 @@ async def init_web_app():
app = web.Application(middlewares=[error_middleware], client_max_size=(1024**2)*10)
app.add_routes([
web.get('/status', status),
web.post('/reload_globals', reload_globals),
web.post('/search', search),
web.post('/gene_counts', gene_counts),
web.post('/lookup', lookup),
Expand Down
3 changes: 2 additions & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ click==8.1.3
# via pip-tools
coverage==5.1
# via -r requirements-dev.in
django==4.2.15
django==4.2.16
# via
# -c requirements.txt
# django-appconf
Expand Down Expand Up @@ -83,3 +83,4 @@ wheel==0.38.4
# The following packages are considered to be unsafe in a requirements file:
# pip
# setuptools
zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ defusedxml==0.7.1
# via
# python3-openid
# social-auth-core
django==4.2.15
django==4.2.16
# via
# -r requirements.in
# django-anymail
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 4.2.15 on 2024-09-17 15:33

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('seqr', '0073_alter_variantfunctionaldata_functional_data_tag'),
]

operations = [
migrations.AlterField(
model_name='individual',
name='primary_biosample',
field=models.CharField(blank=True, choices=[
('IP', 'CL:0000034'), ('MO', 'CL:0000576'), ('LY', 'CL:0000542'), ('FI', 'CL:0000057'),
('EM', 'UBERON:0005291'), ('NP', 'CL:0011020'), ('CE', 'UBERON:0002037'), ('CA', 'UBERON:0001133')
], max_length=2, null=True),
),
]

8 changes: 8 additions & 0 deletions seqr/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,14 @@ class Individual(ModelWithGUID):
('CF', 'UBERON:0001359'), # cerebrospinal fluid
('U', 'UBERON:0001088'), # urine
('NE', 'UBERON:0019306'), # nose epithelium
('IP', 'CL:0000034'), # iPSC
('MO', 'CL:0000576'), # monocytes - PBMCs
('LY', 'CL:0000542'), # lymphocytes - LCLs
('FI', 'CL:0000057'), # fibroblasts
('EM', 'UBERON:0005291'), # embryonic tissue
('NP', 'CL:0011020'), # iPSC NPC
('CE', 'UBERON:0002037'), # cerebellum tissue
('CA', 'UBERON:0001133'), # cardiac tissue
]

ANALYTE_CHOICES = [
Expand Down
3 changes: 1 addition & 2 deletions seqr/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@
from seqr.views.apis.awesomebar_api import awesomebar_autocomplete_handler
from seqr.views.apis.auth_api import login_required_error, login_view, logout_view, policies_required_error
from seqr.views.apis.igv_api import fetch_igv_track, receive_igv_table_handler, update_individual_igv_sample, \
igv_genomes_proxy, receive_bulk_igv_table_handler
receive_bulk_igv_table_handler
from seqr.views.apis.analysis_group_api import update_analysis_group_handler, delete_analysis_group_handler, \
update_dynamic_analysis_group_handler, delete_dynamic_analysis_group_handler
from seqr.views.apis.project_api import create_project_handler, update_project_handler, delete_project_handler, \
Expand Down Expand Up @@ -277,7 +277,6 @@
'gene_info/(?P<gene_id>[^/]+)/note/(?P<note_guid>[^/]+)/delete': delete_gene_note_handler,

'hpo_terms/(?P<hpo_parent_id>[^/]+)': get_hpo_terms,
'igv_genomes/(?P<cloud_host>[^/]+)/(?P<file_path>.*)': igv_genomes_proxy,

'locus_lists/(?P<locus_list_guid>[^/]+)/update': update_locus_list_handler,
'locus_lists/(?P<locus_list_guid>[^/]+)/delete': delete_locus_list_handler,
Expand Down
6 changes: 6 additions & 0 deletions seqr/utils/social_auth_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,9 @@ def log_signed_in(backend, response, is_new=False, *args, **kwargs):
logger.info('Logged in {} ({})'.format(response['email'], backend.name), extra={'user_email': response['email']})
if is_new:
logger.info('Created user {} ({})'.format(response['email'], backend.name), extra={'user_email': response['email']})


def log_azure_signed_in(backend, details, is_new=False, *args, **kwargs):
logger.info('Logged in {} ({})'.format(details['email'], backend.name), extra={'user_email': details['email']})
if is_new:
logger.info('Created user {} ({})'.format(details['email'], backend.name), extra={'user_email': details['email']})
4 changes: 2 additions & 2 deletions seqr/views/apis/auth_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@

from seqr.utils.logging_utils import SeqrLogger
from seqr.views.utils.json_utils import create_json_response
from seqr.views.utils.terra_api_utils import google_auth_enabled, remove_token
from seqr.views.utils.terra_api_utils import oauth_enabled, remove_token
from settings import LOGIN_URL, POLICY_REQUIRED_URL

logger = SeqrLogger(__name__)


def login_view(request):
if google_auth_enabled():
if oauth_enabled():
raise PermissionDenied('Username/ password authentication is disabled')

request_json = json.loads(request.body)
Expand Down
4 changes: 3 additions & 1 deletion seqr/views/apis/auth_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from seqr.views.apis.auth_api import login_view, logout_view, login_required_error, policies_required_error
from django.contrib.auth.models import User

from seqr.views.utils.test_utils import TEST_OAUTH2_PROVIDER


class AuthAPITest(TestCase):
fixtures = ['users']
Expand Down Expand Up @@ -88,7 +90,7 @@ def test_login_view(self):
self.assertEqual(response.status_code, 401)
self.assertEqual(response.reason_phrase, 'Invalid credentials')

@mock.patch('seqr.views.utils.terra_api_utils.SOCIAL_AUTH_GOOGLE_OAUTH2_KEY', 'test_key')
@mock.patch('seqr.views.utils.terra_api_utils.SOCIAL_AUTH_PROVIDER', TEST_OAUTH2_PROVIDER)
def test_login_view_with_google(self):
url = reverse(login_view)
response = self.client.post(url)
Expand Down
20 changes: 15 additions & 5 deletions seqr/views/apis/data_manager_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@

from seqr.models import Sample, RnaSample, Individual, Project, PhenotypePrioritization

from settings import KIBANA_SERVER, KIBANA_ELASTICSEARCH_PASSWORD, SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL, BASE_URL, \
LOADING_DATASETS_DIR, PIPELINE_RUNNER_SERVER
from settings import KIBANA_SERVER, KIBANA_ELASTICSEARCH_PASSWORD, KIBANA_ELASTICSEARCH_USER, \
SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL, BASE_URL, LOADING_DATASETS_DIR, PIPELINE_RUNNER_SERVER

logger = SeqrLogger(__name__)

Expand Down Expand Up @@ -448,11 +448,19 @@ def load_phenotype_prioritization_data(request):
def validate_callset(request):
request_json = json.loads(request.body)
validate_vcf_exists(
request_json['filePath'], request.user, allowed_exts=DATA_TYPE_FILE_EXTS.get(request_json['datasetType'])
_callset_path(request_json), request.user, allowed_exts=DATA_TYPE_FILE_EXTS.get(request_json['datasetType']),
path_name=request_json['filePath'],
)
return create_json_response({'success': True})


def _callset_path(request_json):
file_path = request_json['filePath']
if not AirtableSession.is_airtable_enabled():
file_path = os.path.join(LOADING_DATASETS_DIR, file_path.lstrip('/'))
return file_path


@pm_or_data_manager_required
def get_loaded_projects(request, sample_type, dataset_type):
projects = get_internal_projects().filter(is_demo=False)
Expand Down Expand Up @@ -515,7 +523,7 @@ def load_data(request):
individual_ids = _get_valid_project_samples(project_samples, sample_type, request.user)

loading_args = (
project_models, sample_type, dataset_type, request_json['genomeVersion'], request_json['filePath'],
project_models, sample_type, dataset_type, request_json['genomeVersion'], _callset_path(request_json),
)
if has_airtable:
success_message = f'*{request.user.email}* triggered loading internal {sample_type} {dataset_type} data for {len(projects)} projects'
Expand All @@ -529,6 +537,8 @@ def load_data(request):
*loading_args, user=request.user, pedigree_dir=LOADING_DATASETS_DIR, raise_pedigree_error=True,
)
response = requests.post(f'{PIPELINE_RUNNER_SERVER}/loading_pipeline_enqueue', json=request_json, timeout=60)
if response.status_code == 409:
raise ErrorsWarningsException(['Loading pipeline is already running. Wait for it to complete and resubmit'])
response.raise_for_status()
logger.info('Triggered loading pipeline', request.user, detail=request_json)

Expand Down Expand Up @@ -618,7 +628,7 @@ def proxy_to_kibana(request):
headers = convert_django_meta_to_http_headers(request)
headers['Host'] = KIBANA_SERVER
if KIBANA_ELASTICSEARCH_PASSWORD:
token = base64.b64encode('kibana:{}'.format(KIBANA_ELASTICSEARCH_PASSWORD).encode('utf-8'))
token = base64.b64encode('{}:{}'.format(KIBANA_ELASTICSEARCH_USER, KIBANA_ELASTICSEARCH_PASSWORD).encode('utf-8'))
headers['Authorization'] = 'Basic {}'.format(token.decode('utf-8'))

url = "http://{host}{path}".format(host=KIBANA_SERVER, path=request.get_full_path())
Expand Down
Loading

0 comments on commit 8d474f7

Please sign in to comment.