angsdFST

####################Print region files from bed files in ANGSD format#############################

##Integenic regions

awk '{print $1":"$2"-"$3}' intergenic_regions_goodDP.bed > ../angsd/intergenic_regions_file.txt

##SI regions

awk '{print $1":"$2"-"$3}' captusSIgenes_150flanking.bed > ../angsd/SI_regions_file.txt

####################Print population names list##################################

awk '{print $2}' populationDefinitions.txt | sort | uniq > populationNames.txt

####################Print bam file list for each population#######################

for i in `cat populationNames.txt`; 
do pop=`echo "$i"`; awk -v var="$pop" '$2~var {print "/data/proj2/popgen/chilense/capture/06_map_calling/"$1"_prop-map.bam";}' \
populationDefinitions.txt > ${i}_accessions.txt ; 
done

####################Create ancestral fasta#######################################

angsd -i /data/proj2/popgen/chilense/capture/06_map_calling/Spenn_LA0716_ERR418107_prop-map.bam -doFasta 1 -doCounts 1 -out ancestralState

####################SFS calculation test command#################################

angsd -gl 1 -dosaf 1 -anc ancestralState.fa.gz -baq 1 -C 50 -minMapQ 30 -minQ 20 -out test \
-ref ../../S_chilense_reference_rename.fasta \
-bam ../populations/LA1958_accessions.txt -rf contigNames_ChrOnly_regions_file.txt

##test less stringent filter to avoid loss of singletons and biasing SFS estimation

angsd -gl 1 -dosaf 1 -anc ancestralState.fa.gz -minMapQ 1 -minQ 20 -out test \
-ref ../../S_chilense_reference_rename.fasta \
-bam ../populations/LA1958_accessions.txt -rf contigNames_ChrOnly_regions_file.txt -P 4

realSFS test.saf.idx -maxIter 100 > test.sfs

###################SFS calculation for loop for all populations in central valleys#######################

for i in `cat ../populations/populationNamesCentralValleys.txt`; do
POPULATION=`echo "$i"`;
angsd -gl 1 -dosaf 1 -anc ancestralState.fa.gz -minMapQ 1 -minQ 20 -out $POPULATION \
-ref ../../S_chilense_reference_rename.fasta \
-bam ../populations/${POPULATION}_accessions.txt -rf contigNames_ChrOnly_regions_file.txt -P 24;
done

##################Pairwise 2D SFS for all possible combinations using array loop#######################################

##Populate array with population names

mapfile -t populationArray < ../populations/populationNamesCentralValleys.txt

##Calculate the 2dsfs prior pairwise for all combinations using array elements in nested for loop

for ((i = 0; i < ${#populationArray[@]}; i++)); do 
for ((j = i + 1; j < ${#populationArray[@]}; j++)); do 
realSFS ${populationArray[i]}.saf.idx ${populationArray[j]}.saf.idx -P 24 > 2DSFS/${populationArray[i]}.${populationArray[j]}.ml
done;
done 

##"Prepare the fst for easy window analysis etc" 

cd 2Didx

for ((i = 0; i < ${#populationArray[@]}; i++)); do 
for ((j = i + 1; j < ${#populationArray[@]}; j++)); do 
realSFS fst index ../${populationArray[i]}.saf.idx ../${populationArray[j]}.saf.idx -whichFst 1 -sfs \
../2DSFS/${populationArray[i]}.${populationArray[j]}.ml \
-fstout ${populationArray[i]}.${populationArray[j]}
done;
done 

####################Test command to get FST per region#############################
realSFS fst stats 2Didx/LA1958.LA1960.fst.idx -r Scaffold_12628_Chr2:362910-364202

realSFS fst stats 2Didx/LA1958.LA1960.fst.idx -r Scaffold_12628_Chr2:362910-364202 | \
grep -o "[[:digit:]].[[:digit:]]*[[:space:]][[:digit:]].[[:digit:]]*"

for i in `cat intergenic_regions_file.txt`; do
REGION=`echo "$i"`;
realSFS fst stats 2Didx/LA1958.LA1960.fst.idx -r $REGION | \
grep -o "[[:digit:]].[[:digit:]]*[[:space:]][[:digit:]].[[:digit:]]*" >> test.FSTs;
done

##################Run FST per region for all pairwise combinations####################
cd 2Didx

##Intergenic regions

for j in `ls *.idx`; do
PAIR=`basename -s .fst.idx $j`;
for i in `cat ../intergenic_regions_file.txt`; do
REGION=`echo "$i"`;
realSFS fst stats ${j} -r $REGION | \
grep -o "[[:digit:]].[[:digit:]]*[[:space:]][[:digit:]].[[:digit:]]*" >> ../intergenicFST/${PAIR}.region.intergenic.FST;
done
done

##SI regions

for j in `ls *.idx`; do
PAIR=`basename -s .fst.idx $j`;
for i in `cat ../SI_regions_file.txt`; do
REGION=`echo "$i"`;
realSFS fst stats ${j} -r $REGION | \
grep -o "[[:digit:]].[[:digit:]]*[[:space:]][[:digit:]].[[:digit:]]*" >> ../SIFST/${PAIR}.region.SI.FST;
done
done

#################Concatenate all files with pair identifiers as first column for R visualization###################

###############Tests
awk '{print FILENAME"\t", $0}' ../FST/LA1958.LA1960.region.FST

for file in `ls LA1958.LA1960.region.FST`; do 
PAIR=`basename -s .region.FST $file`; \
POPULATION1=`echo $PAIR | cut -f 1 -d "."`; POPULATION2=`echo $PAIR | cut -f 2 -d "."`; \
awk -v pops=`echo $POPULATION1"-"$POPULATION2` '{print pops"\t", $0}' $file; 
done

for file in `ls *.FST`; do
PAIR=`basename -s .region.FST $file`;
POPULATION1=`cut -f 1 -d "." $PAIR`;
POPULATION2=`cut -f 2 -d "." $PAIR`;
awk '{print ${POPULATION1}"-"${POPULATION2}"\t", $0}' $file';
done

for file in `ls *FST`; do
PAIR=`basename -s .region.FST $file`;
POPULATION1=`echo $PAIR | cut -f 1 -d "."`; 
POPULATION2=`echo $PAIR | cut -f 2 -d "."`;
awk -v pops=`echo $POPULATION1"-"$POPULATION2` '{print pops"\t", $0}' $file; 
done

##########Functional commands intergenic regions

####intergenic regions

for file in `ls *FST`; do 
PAIR=`basename -s .region.intergenic.FST $file`; 
POPULATION1=`echo $PAIR | cut -f 1 -d "."`;  
POPULATION2=`echo $PAIR | cut -f 2 -d "."`; 
awk -v pair=`echo $POPULATION1"-"$POPULATION2` -v pop1=`echo $POPULATION1` -v pop2=`echo $POPULATION2` \
'{print pair"\t"pop1"\t"pop2"\t", $0}' $file >> ../popNames.fullPairwise.intergenic.FST;
done

####SI regions

for file in `ls *FST`; do 
PAIR=`basename -s .region.SI.FST $file`; 
POPULATION1=`echo $PAIR | cut -f 1 -d "."`;  
POPULATION2=`echo $PAIR | cut -f 2 -d "."`; 
awk -v pair=`echo $POPULATION1"-"$POPULATION2` -v pop1=`echo $POPULATION1` -v pop2=`echo $POPULATION2` \
'{print pair"\t"pop1"\t"pop2"\t", $0}' $file >> ../popNames.fullPairwise.SI.FST;
done

##############################Add column showing which regions the pairwise comparisons are between##########################

######Tests
awk 'NR==FNR { id[$1]=$5; next } ($2 in id){ print $6=id[$1], $0}' ../populations/selected_pops.txt popNames.fullPairwise.FST

awk 'BEGIN {FS = "\t"} NR==FNR { id[$1]=$5; next } ($2 in id){ print $0"\t", id[$2]}' ../populations/selected_pops.txt popNames.fullPairwise.FST

awk 'BEGIN {FS = "\t"} NR==FNR { id[$1]=$5; next } {if(id[$2]==id[$3]) {print $0"\t", id[$2]"\t"id[$3]"\t","Within group"}}' selected_pops_rename_UNIX.txt popNames.fullPairwise.FST

######Functional

####intergenic regions

awk 'BEGIN {FS=OFS ="\t"} NR==FNR { id[$1]=$5; next } {if(id[$2]==id[$3]) {print $0, id[$2],id[$3],"Within group"} \
}{if(id[$2] > id[$3]) {print $0, id[$2],id[$3],id[$3]"-"id[$2]}}{if(id[$2] < id[$3]) {print $0,id[$2],id[$3],id[$2]"-"id[$3]}}' \
selected_pops_rename_UNIX.txt popNames.fullPairwise.intergenic.FST > pairwisePopulations.intergenic.FST

####SI regions

awk 'BEGIN {FS=OFS ="\t"} NR==FNR { id[$1]=$5; next } {if(id[$2]==id[$3]) {print $0, id[$2],id[$3],"Within group"} \
}{if(id[$2] > id[$3]) {print $0, id[$2],id[$3],id[$3]"-"id[$2]}}{if(id[$2] < id[$3]) {print $0,id[$2],id[$3],id[$2]"-"id[$3]}}' \
selected_pops_rename_UNIX.txt popNames.fullPairwise.SI.FST > pairwisePopulations.SI.FST