-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f8862e8
commit aca6881
Showing
1 changed file
with
147 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
#!/usr/bin/python | ||
|
||
# Prepare the data from HGDP to Smilefinder | ||
# Example of use : ./count.py chr1.geno hgdp_popfile.txt chr1.map > output.csv | ||
|
||
import sys | ||
import math | ||
|
||
# position map | ||
SnpPositionMap = {} | ||
positionFH = open(sys.argv[3], 'r') | ||
for line in positionFH: | ||
(snp, chromo, pos) = line.split('\t') | ||
pos = pos.strip() | ||
SnpPositionMap[snp] = (chromo, pos) | ||
positionFH.close() | ||
|
||
# populations map | ||
populationsFH = open(sys.argv[2], 'r') | ||
populationsMap = {} | ||
for line in populationsFH: | ||
(population, individual) = line.split('\t') | ||
population = population.strip() | ||
individual = individual.strip() | ||
if population in populationsMap: | ||
populationsMap[population].append(individual) | ||
else: | ||
populationsMap[population] = [individual] | ||
populationsFH.close() | ||
|
||
# extract desired indexes | ||
genoFile = sys.argv[1] | ||
genoFH = open(genoFile, 'r') | ||
line = genoFH.readline() | ||
individuals = line.split('\t')[1:] | ||
individuals = [x.strip() for x in individuals] | ||
|
||
indexMap = {} | ||
for population in populationsMap: | ||
indexMap[population] = [] | ||
for individual in populationsMap[population]: | ||
# extract index | ||
index = individuals.index(individual) | ||
indexMap[population].append(index) | ||
|
||
# indexMap contains the indexes of desired individuals | ||
# in each population. indexMap-> population:index | ||
|
||
|
||
# count | ||
sys.stdout.write('Name\tChr\tPos'); | ||
for population in populationsMap: | ||
sys.stdout.write('\t' + population + ' MA\t'+ ' Frequency\t' + ' n\t' + 'He\t' + 'Ho'); | ||
sys.stdout.write('\t'+'Fst'+'\n') | ||
|
||
for line in genoFH: | ||
l = line.split('\t') | ||
currentSnp = l[0] | ||
populationDataMap = {} | ||
for population in populationsMap: | ||
alleleCount = {} | ||
Ho = 0 | ||
|
||
for index in indexMap[population]: | ||
genotype = l[index + 1] | ||
allele = genotype[0] | ||
for x in range(2): | ||
if allele in alleleCount: | ||
alleleCount[allele] +=1 | ||
else: | ||
alleleCount[allele] = 1 | ||
allele = genotype[1] | ||
if str(genotype[0]) != str(genotype[1]): | ||
Ho = Ho + 1 | ||
|
||
# mission is now to find max allele | ||
maxAllele = '' | ||
maxValue = 0 | ||
for allele in alleleCount: | ||
if(alleleCount[allele] > maxValue): | ||
maxValue = alleleCount[allele] | ||
maxAllele = allele | ||
# max is in maxAllele | ||
|
||
n = len(indexMap[population]) | ||
maxFrequency = float(maxValue) / n / 2 | ||
He = float(maxFrequency) * 2 * ( 1 - float(maxFrequency)) | ||
Ho = float(Ho) / n | ||
populationDataMap[population] = (maxFrequency, n, He, Ho, maxAllele) | ||
|
||
|
||
# add chr:position here | ||
sys.stdout.write(currentSnp + '\t' + SnpPositionMap[currentSnp][0] + '\t' + SnpPositionMap[currentSnp][1]) | ||
|
||
Alleles = [] | ||
MAF = [] | ||
n = [] | ||
for population in populationDataMap: | ||
sys.stdout.write('\t' + str(populationDataMap[population][4])+'\t' + str(populationDataMap[population][0]) + '\t' + str(populationDataMap[population][1]) + '\t' + str(populationDataMap[population][2]) + '\t' + str(populationDataMap[population][3])) | ||
|
||
#Fst calculations | ||
|
||
Alleles[1:1] = [str(populationDataMap[population][4])] | ||
MAF[1:1] = [str(populationDataMap[population][0])] | ||
n[1:1] = [str(populationDataMap[population][1])] | ||
|
||
if Alleles[0] == Alleles[1]: | ||
MAF1 = float(MAF[0]) | ||
MAF2 = float(MAF[1]) | ||
else: | ||
MAF1 = float(MAF[0]) | ||
MAF2 = 1 - float(MAF[1]) | ||
|
||
#MSP | ||
|
||
n1 = float(n[0]) | ||
n2 = float(n[1]) | ||
|
||
MSP = n1*(MAF1-(MAF1+MAF2)/2)**2 + n2*(MAF2-(MAF1+MAF2)/2)**2 | ||
|
||
#MSG | ||
|
||
|
||
MSG = 1/(n1+n2-2)*((n1*MAF1*(1-MAF1))+(n2*MAF2*(1-MAF2))) | ||
|
||
#nc | ||
|
||
nc = (n1+n2)-(((n1)**2+(n2)**2)/(n1+n2)) | ||
|
||
#Fst | ||
|
||
|
||
if MSP == 0: | ||
Fst = 0 | ||
elif MSG == 0: | ||
Fst = 0 | ||
else: | ||
Fst = (MSP-MSG)/(MSP+(nc-1)*MSG) | ||
|
||
if Fst < 0: | ||
Fst = 0.00000001 | ||
|
||
|
||
sys.stdout.write('\t'+str(Fst)) | ||
sys.stdout.write('\n') | ||
|
||
# ya! |