Skip to content

Commit

Permalink
v0.3.1: Merge pull request #12 from hmgu-itg/dev
Browse files Browse the repository at this point in the history
v0.3.1
  • Loading branch information
youngchanpark authored Jul 22, 2021
2 parents 21d4d9f + 175a922 commit 2e36f2d
Show file tree
Hide file tree
Showing 14 changed files with 311 additions and 313 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ PeakPlotter has has non-python dependencies.
In order to run PeakPlotter you need to install the following tools and add the executables to your `PATH`:
* Plink 1.9 or newer ([available here](https://www.cog-genomics.org/plink2/index))
* LocusZoom Standalone 1.4 or newer ([available here](http://genome.sph.umich.edu/wiki/LocusZoom_Standalone))
* BedTools ([available here](http://bedtools.readthedocs.io/en/latest/))
* Tabix ([available here](https://github.com/samtools/htslib))
* Moreutils (for `sponge`)

Expand Down
8 changes: 0 additions & 8 deletions Singularity
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,7 @@ From: ubuntu:18.04
# Bedtools
apt install -y python-pip
apt-get install -y zlib1g zlib1g-dev firefox python-dev emacs

cd /opt
git clone https://github.com/arq5x/bedtools2.git
cd bedtools2
# gwava needs a version without "sam header" error messages
git checkout tags/v2.27.1
make && make install
pip install scipy pandas numpy scikit-learn==0.14.1
pip install pybedtools

# PLINK
cd /opt
Expand Down
7 changes: 1 addition & 6 deletions peakplotter/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
#!/usr/bin/env python3

from pathlib import Path

__version__ = '0.3.0'


PLOTPEAKS_SCRIPT = str(Path(__file__).parent.joinpath('plotpeaks.sh'))
__version__ = '0.3.1'
66 changes: 55 additions & 11 deletions peakplotter/_interactive_manh.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
import sys
import json
from typing import Tuple, Union, List, Dict

import requests
import pandas as pd

from peakplotter import helper
from peakplotter.data import CENTROMERE_B37, CENTROMERE_B38


def get_build_server(build: Union[int, str]) -> str:
B38_SERVER = "https://rest.ensembl.org"
B37_SERVER = "http://grch37.rest.ensembl.org"
mapper = {
'b38': B38_SERVER,
'38': B38_SERVER,
38: B38_SERVER,
'b37': B37_SERVER,
'37': B37_SERVER,
37: B37_SERVER
}
return mapper[build]


def _query(url, headers = None):
if headers is None:
headers = dict()
Expand Down Expand Up @@ -96,17 +111,18 @@ def make_resp(snps: pd.DataFrame, pheno: pd.DataFrame) -> pd.DataFrame:
return resp[['rs', 'ps', 'consequence', 'pheno']]


def get_rsid_in_region(chrom, start, end, server):
print(f"[DEBUG] get_variants_in_region({chrom}, {start}, {end}, {server}")
def get_rsid_in_region(chrom, start, end, server, logger):

logger.debug(f"get_variants_in_region({chrom}, {start}, {end}, {server}")
snps = get_variants_in_region(chrom, start, end, server)
print(f"[DEBUG] get_phenos_in_region({chrom}, {start}, {end}, {server}")
logger.debug(f"get_phenos_in_region({chrom}, {start}, {end}, {server}")
pheno = get_phenos_in_region(chrom, start, end, server)

resp = make_resp(snps, pheno)
return resp


def query_vep(chrom: pd.Series, pos: pd.Series, a1: pd.Series, a2: pd.Series, server: str) -> List[Dict]:
def query_vep(chrom: pd.Series, pos: pd.Series, a1: pd.Series, a2: pd.Series, server: str, logger) -> List[Dict]:
chrom = chrom.astype(str)
pos = pos.astype(int).astype(str)
a1 = a1.astype(str)
Expand All @@ -118,12 +134,12 @@ def query_vep(chrom: pd.Series, pos: pd.Series, a1: pd.Series, a2: pd.Series, se

r = requests.post(server+ext, headers=headers, data=data)
if not r.ok:
print(data)
logger.error(data)
r.raise_for_status()
return r.json()


def _get_csq_novel_variants(e: pd.DataFrame, chrcol: str, pscol: str, a1col: str, a2col: str, server: str) -> pd.DataFrame:
# TODO: Merge with get_csq_novel_variants function
def _get_csq_novel_variants(e: pd.DataFrame, chrcol: str, pscol: str, a1col: str, a2col: str, server: str, logger) -> pd.DataFrame:
"""
This function assumes that the input DataFrame object `e` has the following columns:
- ps
Expand All @@ -139,7 +155,7 @@ def _get_csq_novel_variants(e: pd.DataFrame, chrcol: str, pscol: str, a1col: str
novelsnps=copied_e.loc[(copied_e['ensembl_rs']=="novel") & (copied_e['ld']>0.1) & (copied_e['ensembl_consequence']!='double allele'),]
if novelsnps.empty:
return copied_e
jData = query_vep(novelsnps[chrcol], novelsnps[pscol], novelsnps[a1col], novelsnps[a2col], server)
jData = query_vep(novelsnps[chrcol], novelsnps[pscol], novelsnps[a1col], novelsnps[a2col], server, logger)

csq = pd.DataFrame(jData)

Expand All @@ -149,11 +165,39 @@ def _get_csq_novel_variants(e: pd.DataFrame, chrcol: str, pscol: str, a1col: str
copied_e['ensembl_consequence'].replace('_', ' ')
return copied_e

# TODO: Merge with _get_csq_novel_variants function
def get_csq_novel_variants(e, chrcol, pscol, a1col, a2col, server, logger):
copied_e = e.copy()
copied_e.loc[(copied_e['ensembl_rs']=="novel") & (copied_e[a1col]==copied_e[a2col]),'ensembl_consequence']='double allele'
novelsnps=copied_e.loc[(copied_e['ensembl_rs']=="novel") & (copied_e['ld']>0.1) & (copied_e['ensembl_consequence']!='double allele'),]
if novelsnps.empty:
return copied_e
novelsnps['query']=novelsnps[chrcol].astype(str)+" "+novelsnps[pscol].astype(int).astype(str)+" . "+novelsnps[a1col].astype(str)+" "+novelsnps[a2col].astype(str)+" . . ."
request='{ "variants" : ["'+'", "'.join(novelsnps['query'])+'" ] }'
ext = "/vep/homo_sapiens/region"
headers={ "Content-Type" : "application/json", "Accept" : "application/json"}
logger.info("\t\t\t🌐 Querying Ensembl VEP (POST) :"+server+ext)
r = requests.post(server+ext, headers=headers, data=request)

if not r.ok:
logger.error("headers :"+request)
r.raise_for_status()
sys.exit(1)

jData = json.loads(r.text)
csq=pd.DataFrame(jData)


for _, row in csq.iterrows():
copied_e.loc[copied_e['ps']==row['start'],'ensembl_consequence']=row['most_severe_consequence']

copied_e['ensembl_consequence'].replace('_', ' ')
return copied_e


def get_overlap_genes(chrom, start, end, server) -> pd.DataFrame:
def get_overlap_genes(chrom, start, end, server, logger) -> pd.DataFrame:
url = f'{server}/overlap/region/human/{chrom}:{start}-{end}?feature=gene'
helper.info("\t\t\t🌐 Querying Ensembl overlap (Genes, GET) :"+url)
logger.info("\t\t\t🌐 Querying Ensembl overlap (Genes, GET) :"+url)
decoded = _query(url)

df = pd.DataFrame(decoded).fillna('')
Expand Down
180 changes: 0 additions & 180 deletions peakplotter/helper.py

This file was deleted.

Loading

0 comments on commit 2e36f2d

Please sign in to comment.