Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cutoff selection #2

Open
wants to merge 25 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Number of intervals >= 3
  • Loading branch information
axolotlove committed May 25, 2021
commit e9b2c93afbfe5a3c480e683b23dd1dbade3792b8
17 changes: 8 additions & 9 deletions LSEA_2.1.py
Original file line number Diff line number Diff line change
@@ -17,15 +17,14 @@ def tsv_len(tsv_file):
pass
return i

def get_h2(tsv_file, ldsc_dir, column_names, ldsc_id):
def get_h2(N, tsv_file, ldsc_dir, column_names, ldsc_id):
global h2
subprocess.call(
'{0}/munge_sumstats.py --N {1} --snp {2} --p {3} --out for_h2 --sumstats {4}'.
format(ldsc_dir, N, ldsc_id, column_names[3], tsv_file), shell=True
)
format(ldsc_dir, N, ldsc_id, column_names[3], tsv_file), shell=True)
subprocess.call(
'{0}/ldsc.py --h2 for_h2.sumstats.gz --ref-ld {0}/eur_w_ld_chr_custom/genotypes_genome_hapgen.MAF_higher_0.05.with_cms \
--w-ld {0}/eur_w_ld_chr_custom/genotypes_genome_hapgen.MAF_higher_0.05.with_cms --out h2'.format(ldsc_dir),
'{0}/ldsc.py --h2 for_h2.sumstats.gz --ref-ld-chr {0}/eur_w_ld_chr/ \
--w-ld-chr {0}/eur_w_ld_chr/ --out h2'.format(ldsc_dir),
shell=True
)
pattern = re.compile('Total Observed scale h2:')
@@ -203,15 +202,15 @@ def calcuate_qvals(pvals):
heritability = args.h2
p_cutoff = args.p
out_name = args.out
if p_cutoff is None and samples_number is None:
if p_cutoff is None and samples_number is None and samples_number_col is None:
print('ERROR Either p-value cutoff or sample size should be provided')
sys.exit(1)
if col_names is None:
col_names = ["chr", "pos", "id", "p"]
if ldsc_name is None:
ldsc_name = col_names[2]
if p_cutoff is None and heritability is None:
heritability = get_h2(tsv_file, ldsc_path, col_names, ldsc_name)
heritability = get_h2(samples_number, tsv_file, ldsc_path, col_names, ldsc_name)
input_dict = get_snp_info(tsv_file, col_names)
clumped_file = run_plink(path_to_plink_dir, path_to_bfile, tsv_file, input_dict, p_cutoff,
samples_number, causal_number, heritability)
@@ -232,7 +231,7 @@ def calcuate_qvals(pvals):

pvals = []
for w in sorted(interval_counts, key=interval_counts.get, reverse=True):
if interval_counts[w] != 0:
if interval_counts[w] >= 3:
pvals.append(p_val_for_gene_set(universe["universe_intervals_number"],
interval_counts_for_universe[w], n_intervals, interval_counts[w]))

@@ -241,7 +240,7 @@ def calcuate_qvals(pvals):
my_writer = csv.writer(file, delimiter='\t')
my_writer.writerow(["Gene_set", "p-value", "q-value"])
for i, w in enumerate(sorted(interval_counts, key=interval_counts.get, reverse=True)):
if interval_counts[w] != 0:
if interval_counts[w] >= 3:
if qvals[i] <= qval_thresh:
row = [w, pvals[i], qvals[i]]
my_writer.writerow(row)