Skip to content

Commit

Permalink
Merge pull request #11 from JLSteenwyk/new_python_and_biopython_versions
Browse files Browse the repository at this point in the history
New python and biopython versions
  • Loading branch information
JLSteenwyk authored Dec 20, 2023
2 parents e70c4d7 + 8199239 commit a863f33
Show file tree
Hide file tree
Showing 31 changed files with 1,201 additions and 8 deletions.
7 changes: 6 additions & 1 deletion docs/usage/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ To report inparalogs and specify which was kept per SNAP-OG, use the -rih, \-\-r
argument. The resulting file, which will have the suffix ".inparalog_report.txt," will have three columns: |br|
- col 1 is the orthogroup file |br|
- col 2 is the inparalog that was kept |br|
- col 3 is/are the inparalog/s that were trimmed separated by a semi-colon ";" |br|
- col 3 is/are the inparalog/s that were trimmed separated by a semi-colon ";"

To generate this file, use the following command:

Expand Down Expand Up @@ -125,3 +125,8 @@ All options
+-------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
*For genome-scale analyses, we recommend changing the -o/\-\-occupancy parameter to be the same for all large gene families so that the minimum SNAP-OG occupancy is the same
for all SNAP-OGs.


.. |br| raw:: html

<br/>
17 changes: 12 additions & 5 deletions orthosnap/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,6 @@ def write_output_fasta_and_account_for_assigned_tips_single_copy_case(
write_summary_file_with_inparalog_handling(
inparalog_handling, fasta,
output_path, subgroup_counter,
assigned_tips
)
subgroup_counter += 1

Expand All @@ -387,7 +386,6 @@ def write_summary_file_with_inparalog_handling(
fasta: str,
output_path: str,
subgroup_count: int,
assigned_tips: list
):
res_arr = []

Expand All @@ -406,10 +404,19 @@ def write_summary_file_with_inparalog_handling(
f"{output_path}/{fasta_path_stripped}.orthosnap.{subgroup_count}.fa"
)

if res_arr:
for i in res_arr:
try:
if res_arr[0][1] in open(output_fasta_file_name).read():
if string_exact_match(f">{i[1]}", output_fasta_file_name):
with open(f"{output_path}{inparalog_report_output_name}", "a") as file:
file.writelines('\t'.join(i) + '\n' for i in res_arr)
file.writelines('\t'.join(i) + '\n')
except FileNotFoundError:
1


def string_exact_match(string, filename):
with open(filename, 'r') as f:
for line in f:
line = line.rstrip()
if re.search(r'\b{}\b'.format(string), line):
return True
return False
1 change: 0 additions & 1 deletion orthosnap/orthosnap.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ def execute(
fasta,
output_path,
subgroup_counter,
assigned_tips,
)

write_output_stats(
Expand Down
2 changes: 1 addition & 1 deletion orthosnap/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.2.0"
__version__ = "1.3.0"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
fake_orthologous_group_of_genes.faa.orthosnap.0 species0|gene0-duplicate_copy_1 species0|gene0-duplicate_copy_2;species0|gene0-duplicate_copy_0
fake_orthologous_group_of_genes.faa.orthosnap.1 species4|gene2-duplicate_copy_1 species4|gene2-duplicate_copy_0
fake_orthologous_group_of_genes.faa.orthosnap.1 species2|gene2-duplicate_copy_1 species2|gene2-duplicate_copy_0
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
>species0|gene0-duplicate_copy_1
MFGAVAAGSEESPQAPRCISTRSSSFRVYLSAWNFGMSPERVTTEPLHSPDWDNDWLRQL
AGDIVAGSLSATIIAPITTVIDRSVVERLSSNRSILHTLRTHAICSILKPRKFYFSRPFF
IAWSLYAATYATANATDTSLEHLSKVTEKSTTASLVPTFSFLPTYVVNVCLGILKDIRFS
QIYGHPEGRLKQPPPIPRLAYMAFLFRDSITISSSFTLAPQVASLVPDWITADPHTKRTV
TQLALPALVQYVNTPFHMIALDVIARPQVATIAERSVTIRRGDLAEILNSPAYDYGQDVE
KKKNLDDTSPEDEDPFGNEEFAEVKYRTMGWWKTGILMVAENVSIGILSLPSAFATLGFV
PALIILIGISGISWYTAYILCQFKLRYPQVHSMGDAGEIIMGRFGRELLGIGQLLFLIFV
MASHVLTFTVLMNTITEHGTCTIVFGVIALIVSCVGALPRTMDKVYWMSIASFLSIVAAT
MATMIAVGVEYKGHIPLAVTTHLSFNEEFLAVSNLFFAYVGHASFFGFISEMDKPREFTK
SISVLQVIDTSLYIASAVVIYRYVGADVQSPALGSAGPLGKKIAYGLAIPTVLIAGIVNG
HVASKYVYVRVFRGTNHMHERTLLSIGSWVAIGLISWVVAWVIAESIPVFNNLLSLITAL
FGCWFAYGFPAIFWFTLNKGQWFASSRKIFLTLSNTFILAMAITLCGLGLYVSGDAISKD
SGSGVWTCANNAVTTTTTT
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
>species4|gene2-duplicate_copy_1
MAVSRDLEAPAVVNDPTADDAMVEKKEYADGTPANDPFGNEECGEVRYRVMSWWQCGTLM
VAENISLGILSLPSAVATLGIVPAVILLLGLSAISWYTGYIMGQFKLRFPQIHSMGDAGE
LLMGRFGRELFGIGQLLFLIFLMASHILTFTVVFNTITNHGTCTIVFGVVGLVVSFIGAL
PRTMGKVYWMSMASCISIVTATVVTMIAIGVQAPDHVHVDATTEVSFQDAFLAVTNIIFA
YIAHVAFFGFISEMHDPRDFPKSLTMLQVVDTSLYIVTAMVIYRYAGPDVASPALSSAGP
LMKKVAYGLAIPTVVIAGVVFGHVACKYIYVRIFRGSAHMHQNSFLAIGSWVAIALGVWV
VAWVIAESIPVFNELLSLISSLFGSWFSYGLPAIFWLVMNKGRWFSTRSKICLTIVNFLI
LAFACALCGMGLYVSGKSIHDSSSKASWTCKNNATTTT
>species2|gene2-duplicate_copy_1
MMLWLKRRNMLMGRRQMTRLEMKNAERSNIVSCRGVMVAENISLGILSLSSAVATLGIVP
AVILLLGLSAISWYTGYIMGQFKLRFPQIHSMGDAGELLMGRFGRELFGIGQLLFLIFLM
ASHILTFSVVFNTITNHGTCTIVFGVVGLVVSFIGALPRTMGKVYWMSMASCISIVTATV
VTMIAIGVQAPDHVHVNVTTKVSFQDAFLAVTNIIFAYIAHVAFFGFISEMHDPRDFPKS
LTMLQVVDTSLYIVTAMVIYRYAGPDVASPALSSAGPLMKKVAYGLAIPTVVIAGVVFGH
VACKYIYVRIFRGSAHMHQNSFLAIGSWVAIALGVWVVAWVIAESIPVFNELLSLISSLF
GSWFSYGLPAIFWLVMNKGRWFSTRSKICLTIVNFFILAFACALCGMGLYVSGKSIHDSS
SKASWTCKNNAT
>species1|gene2
MAVSRDLEAPAVVNDPTAYDATVEKKEYADGTPANDPFGNEECGEVKYRVMSWWQCGTLM
VAENISLGILSLPSAVATLGIVPAVILLLGLSAISWYTGYIMGQFKLRFPQVHSMGDAGE
LLMGRFGRELFGIGQLLFLIFLMASHILTFTVVFNTITNHGTCTIVFGVVGLVVSFIGAL
PRTMGKVYWMSMASCISIVTATVVTMIAIGVQAPEHVHVDATTEVSFQDAFLAVTNIIFA
YIAHVAFFGFISEMHDPRDFPKSLTMLQVVDTSLYIVTAMVIYRYAGPDVASPALSSAGP
VMKKVAYGLAIPTVVIAGVVFGHVACKYIYVRIFRGSAHMHQNSFLAIGSWVAIALSVWV
VAWVIAESIPVFNELLSLISSLFGSWFSYGLPAIFWLVMNKGRWFSTRSKICLTIVNFLI
LAFACALCGMGLYVSGKSIHDSSSKASWTCKNNAT
>species3|gene7
MAPTTRDLEALTVHHDSDIMADDLAEKKVSANESPPENDPFGNEECGEVKYRVMKWWHCG
ILMIAENISLGILSLPSAVATLGIVPSIFLILGLSGISWYTGYVIGQFKLRYPQVHSMGD
AGEILFGRIGREILFFGQLLFCIFLMSSHILTFTVLFNTITGHGTCTIVFGVVGLVVSFI
GALPRTMGKVYWMSLASCTSITVATIVTMVAIAMQAPDHVQVDITTHPSFSTAFLSVTNI
VFAFIAHVAFFGFASEMEDPRDFPKSLAMLQVTDTTMYIVTAMVIYRYAGPDVASPALSS
AGPLMSKVAYGLAIPTVIIAGVVFGHVASKYIYVRVWRGSPQMHTNSLAAVGSWVAIALG
VWVIAWIIAESIPVFNDLLSLISSLFGSWFSYGLPAMFWLVMNRGQYTASPRKIFLTIVN
LVIFGIACAICGLGLYVSGKAIHDSSSSASWTCANNAST
>species0|gene1
MAPTTRDLEALAVHHDSDIMADDLAEKKVSANESPPENDPFGNEECGEVKYRVMKWWHCG
ILMIAENISLGILSLPSAVATLGIVPSIFLILGLSGISWYTGYVIGQFKLRYPQVHSMGD
AGEILFGRIGREILFFGQLLFCIFLMSSHILTFTVLFNTITGHGTCTIVFGVVGLVVSFI
GALPRTMGKVYWMSLASCTSITVATIVTMVAIAVQAPDHVQVDITTHPSFSTAFLSVTNI
VFAFIAHVAFFGFASEMEDPRDFPKSLAMLQVTDTTMYIVTAMVIYRYAGPDVASPALSS
AGPLMSKVAYGLAIPTVIIAGVVFGHVASKYIYVRVWRGSPQMHTNSLAAVGSWVAIALG
VWVIAWIIAESIPVFNDLLSLISSLFGSWFSYGLPAMFWLVMNRGQYTASPRKIFLTIVN
LVIFGIACAICGLGLYVSGKAIHDSSSSASWTCANNAST
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
>species2|gene5
MSPDTSDLDLETRPAVSLNRGEEYKEQPETPDEEPFGDEDDAEVRYRTLEWWFVSPVMLA
GGTSLGILTLPSAVATLGIVPGVILIVGIAILTVYTGYVMGQFKQRYPHVHSIADGGEVL
FGWVGREILGAGLLLCLVFVMGGHILTFTVMMNTLTDHGTCSVVFGVVGLLISLILSLPR
TFKRMSWLSVISFASIVGAVLVTMIALGVQRPPNVRVEVTRPTSLYRAFLAVTDIVFAYA
AHPAFFGFISEMKTPTDWPKTLCFVEIINTTLYTVTGVVIYRFAGQHVASPALGSTSPLM
AKVAYGTAIPTIVIAGVINGHIACKYIYVRVFRGTEHMHRRSLFAIGTWVVISVVLWTVA
WVIAEAVPEFNNLLSLITSLFCSWFSYGLCGAFWLFINKGLWFSSPRKTFLTIVNFTLLG
MGACLCGLGLYASGRAISEESAGRIFSCASTA
>species4|gene1
MSPDTSDLDLETRPAVSLNRGEGYKEQPETPDEEPFGNEEGAEVRYRTLEWWFVSPGSAE
GRQSRSDVACEQEMRDSHAGWGHIARHPNASLGCGDTGNRPVGLPGVILIVGIAILTVYT
GCVMGQFKQRYPHVHSIADGGEVLFGWIGREVLGAGLLLCLVFVMGGHILTFTVMMNTLT
DHGTCSVVFGVVGLLISLILSLPRTFKRMSWLSVISFASIVAAVLVTMIALGVQRPPNVK
VEVTRPTSLYRAFLAVTDIVFAYAAHPAFFGYISEMKTPTDWPKTLCFVEVINTTLYTVT
GVVIYRFAGQHVASPALGSSSPLMAKVAYGIAIPTIVIAGVINGHIACKYIYVRLFRGTE
RMHQRSLFSIGTWVAISVVLWTIAWVIAEAVPEFNNLLSLITSLFCSWFSYGLCGAFWLF
INQGLWFSSPRKTFLTIVNFTLLGMGACLCGLGLYASGRAISEESAGRSFSCASTA
>species1|gene0
MSPDTSDLDLGTRPAVSLNRGEGYKEQPETPDEEPFGDEEGAEVRYRTLEWWFVSPGSAE
GRQSRSDVACEQEMRDRGVILIVGIAILTVYTGCVMGQFKQRYPHVHSIADGGEVLFGWI
GREVLGTGLLLCLVFVMGGHILTFTVMMNTLTDHGTCSIVFGVVGLLISLILSLPRTFKR
MSWLSVISFASIVAAVLVTMIALGVQRPPNVKVEVTRPTSLYRAFLAVTDIVFAYAAHPA
FFGYISEMKTPTDWPKTLCFVEVINTTLYTVTGVVIYRFAGQHVASPALGSSSPLMAKVA
YGIAIPTIVIAGVINGHIACKYIYVRLFRGTEHMHQRSLFAIGTWVAISVVLWTIAWVIA
EAVPEFNNLLSLVLVFVCVSF
>species3|gene3
MSPPSAINNPGDPLAEQEKPVGARNTTGTEDPFSHDGVGGVKYRTLAWWQCAMIMVAETI
SLGILSLPSAVASLGLVAAVILILGLGALATYTGYTLGQFKLRYPHVHSMGDAGEVLMGR
IGREVLGTAQLLFLIFIMGSHLLTFTVMMNTLTDHGTCSIVFGVIGLAVSFAFTLPRTLK
KVSWFSISSFISIIAAVLITMIAIAIQKPGGGRVDAIVDNSFYKAFLAVTNIVFAYAGHV
AFFGFISEMRTPTDYPKTLYMLQGIDTSMYTISAVVIYRYGGRDVASPALGSTSPLMSKI
AYGIAIPTIVIAGVINGHVACKYIYVRLFRGTDRMHQRGLVSIGTWVMIGLVLWTLAWII
AEAIPVFNDLLSLITALFASWFTYGLSGIFWLFLNWGRYSSSRRKILLTGLNLLVVVVGG
CLCALGLYVSGKSIHDHPRSSSFSCANNA
>species0|gene8
MSPPSAINNPGDPLAEQEKPAGARNTTGTEDPFSHDGVGGVKYRTLAWWQCAMIMVAETI
SLGILSLPSAVASLGLVAAVILIIGLGALATYTGYTLGQFKLRYPHVHSMGDAGEVLMGR
IGREVLGTAQLLFLIFIMGSHLLTFTVMMNTLTDHGTCSIVFGVIGLAVSFAFTLPRTLK
KVSWFSISSFISIIAAVLITMIAIAIQKPGGGRVDAIVDNSFYKAFLAVTNIVFAYAGHV
AFFGFISEMRTPTDYPKTLYMLQGIDTSMYTISAVVIYRYGGRDVASPALGSTSPLMSKI
AYGIAIPTIVIAGVINGHVACKYIYVRLFRGTDRMHQRGLVSIGTWVIIGLVLWTLAWII
AEAIPVFNDLLSLITALFASWFTYGLSGIFWLFLNWGRYSSSRRKILLTGLNLLVVVVGG
CLCALGLYVSGKSIHDHPRSSSFSCANNA
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
>species3|gene4
MSTLDVKDIENGPARRVEEEGGMWENDMEKTPSVERDPFGNEAVGEVHYKTLDWWQSGML
MIAETVSLGVLSLPATVAEVGLIPAIILIVGMGIIATYSGYVIGQFRARYPFIHSMADAG
EVLCGRYGRMFTEFAQLVFFMFASGXHLVTFTVMMNTLTNHGTCSVVFGVVGLVLSFACS
LPRTMKNVSWLAVTSFLSIFTAVLITMIGVAVEHPNPPPMQLTRSTSFVKGFSAVTNIAF
AYCGHPAFFGFIAEMKEPKDFPKSLCMLQGFEIVFYTVASAVIYRYAGQNVTSPALGSAG
IVVRKVAYGIAIPTIVIAGVVLGHVAIKNVYVRLFRGTDVMHKRSALGIGAWIGLAAGYW
IIAWVIAEAIPVFSDLVSLVSALFASWFSFGLPGVFWLYMYWGNYFTSVRKTLLTLANLA
LFGIGATICVCGLWVSGLSISSDSSGSSFSCANNA
>species0|gene7
MSTLDVKDIENGPARRVEEEGGMWENDMEKTPSVERDPFGNEAVGEVHYKTLDWWQSGML
MIAETVSLGVLSLPATVAEVGLIPAIILIVGMGIIATYSGYVIGQFRARYPFIHSMADAG
EVLCGRYGRMFTEFAQLVFFMFASGSHLVTFTVMMNTLTNHGTCSVVFGVVGLVLSFACS
LPRTMKNVSWLAVTSFLSIFTAVLITMIGVAVEHPNPPPMQLTRSTSFVKGFSAVTNIAF
AYCGHPAFFGFIAEMKEPKDFPKSLCMLQGFEIVFYTVASAVIYRYAGQNVTSPALGSAG
IIVRKVAYGIAIPTIVIAGVVLGHVAIKNVYVRLFRGTDVMHKRSALGIGAWIGLAAGYW
IIAWVIAEAIPVFSDLVSLVSALFASWFSFGLPGVFWLYMYWGNYFTSVRKTLLTLANLA
LFGIGATICVCGLWVSGLSISSDSSGSSFSCANNA
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
>species2|gene4
MEAINANPPPYRTEKVEETKYTSDYEEEGQLKTGQVADAFGNEESAEIKYKTLKWWQCGL
LMICESVSLGVLSLPAAVATLGLVPAVILIVGLGLLATYTGYNIGLFRERYPRIQNLGDA
GEILMGPIGREIFGLGQFLFFIFVMGSHILTFRVMMNTVTEHGTCSIVFSVVGMVISMVL
SIPRTMKGLTWISFASFLSIFGAVMITMISVGVQDHPGRIIEATVDTTLYSGFQAVSNIV
FAYCAHVAFFGLIAEMENPRDFKKSLFMLQSFEISLYLTAAVVIYYFVGKDVASPALISA
GPVMKKVAFGIAIPTIVGAGVVNGHVGLKYIYFRLCHKSDLIHRRSKRSVGIWIGLGLTC
WVVAWIIAEAIPVFSDLNGLISALFASWFSYGLSGIYWLHLNYGQWFASPRKILLTILNI
SIALFGLALCVLGLYASGTAIHNDTSSSSFSCANTDA
>species4|gene0
MEAIKANPPAYRTEKVEETKYTSDYEEEGQLKTGQVADAFGNEESAEIKYKTLKWWQCGL
LMICESVSLGVLSLPAAVATLGLVPAVILIVGLGLLATYTGYNIGLFRERYPKIQNLGDA
GEILMGPIGREIFGLGQFLFFIFVMGSHILTFRVMMNTITEHGTCSIVFSVVGMVISMVL
SIPRTMKGLTWISFASFLSIFGAVMITMISVGVQDHPDRIIEATVDTTLYSGFQAVSNIV
FAYCAHVAFFGLIAEMENPRDFKKSLFMLQSFEISLYLTAAVVIYYFVGKDVASPALISA
GPVMKKVAFGIAIPTIVGAGVVNGHVGLKYIYFRLCHKSDLIHSRSKRSVGIWIGLGLTC
WVVAWVIAEAIPVFSDLNGLISALFASWFSYGLSGIYWLHLNYGQWFASPRKILLTILNI
SIALFGLALCVLGLYASGTAIHNDTSSSSFSCANTDA
>species1|gene4
MEAINANPPAYRTEKVEETKYTSDYEEEGQLKTGQVADAFGNEESAEIKYKTLKWWQCGL
LMICESVSLGVLSLPAAVATLGLVPAVILIVGLGLLATYTGYNIGLFRERYPKIQNLGDA
GEILMGPIGREIFGLGQFLFFIFVMGSHILTFRVMMNTITEHGTCSIVFSVVGMVISMVL
SIPRTMKGLTWISFASFLSIFGAVMITMISVGVQDHPHRIIEATVDTTLYSGFQAVSNIV
FAYCAHVAFFGLIAEMENPRDFKKSLFMLQSFEICLYLTAAVVIYYFVGKDVASPALISA
GPVMKKVAFGIAIPTIVGAGVVNGHVGLKYIYFRLCHKSDLIHSRSKRSVGIWIGLGLTC
WVVAWVIAEAIPVFSDLNGLISALFASWFSYGLSGIYWLHLNYGHWFASPRKILLTILNI
SIALFGLALCVLGLYASGTAIHNDTSSSSFSCANTDA
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
>species3|gene0
MQDYLDYLYPLIPIVHRPSFQQSLQQDRDREDSGFLGLVTAIAAVVIATMPSRFHFYRSA
TPPLRFTSRRDMVRHCYDKILRLRDSTYFDHINFQKFAISYLLYAAFRQLGDHNWSRMLD
VEATQIARLLNLHRISEYDGLNCIETQLRKKGFWLIFYGFVHNQLQNVLGERLSYLDPIL
LHSINPEDLMPLEVDDEMIFENEVLMPPSHTPCLVTGFILHSRVFWAAIRSTCPESPAEP
CPCVRARDAAVQVAYIQDRLHSLRFLLEDIPPLLRPWQPPDSQAIAHEGGSTGVTEMTQS
HFASMRANLHVTHLWLQSLLVDQLEAAQAHKSEPSLVSTNHVQPMVDAKALWLQREGLCR
QLFCILYSLPQINLEANGLHLAYKVRDIAAGLLVCPFHAAGPEAERATEYLRQSTDILSR
LDSSEGMVTMHLQTWIDTDRIKSS
>species0|gene5
MEAVHDSPPPYATEGIDEKKEDISQVEQNLKPGLEESDAFGNEEFAEIKYKTLKWWQCGL
LMICESVSLGVLSLPAAVATLGFVPAVILIVGLGILATYTGYNIGLFRERYPHIQNLADA
GEILMGPFGRELFGLGQFLFCIFVMGSHLLTFRVMMNTITDHGTCSIVFSVVGMIISMVL
SIPRTMKGMTWISFASFLSIFSAVMITMIGVGVEKHPGRIIEATVDTTLYTAFTAVSNIV
FAYCAHVAFFGLIAEMEKPKDFKKSLFMLQAFEISLYVTAACVIYYYVGKDVQSPALSSA
GPLLKKVAYGIAIPTIVGAGVVNGHIGLKYIYFRTCSKSGLIHSRSRRSVAVWIALGLAC
WLVAWIIAEAIPVFSDLNSLISALFASWFSYGLSGIYWLHLNYGQWFASPRKIALTVLNA
AIAVFGLVLCVLGLYASGTAIHNDANSNKIGWPIECWHNREPFRVRHSVRFLLPQALKHT
GKYRAIRTNCWQPAICNHPESFPFGHRLRPNVAMHTRAELATQACDICRKRKVKCNVTSS
STDVPSRCGRCARLDLPCTFLSPSRTRGPKKRSRTGSPAQEQPDWGTGGSRASGAVNYPT
DDVCDRRMFSCIMQDYLDYLYPLIPIVHRPSFQQSLQEDRDREDSGFLGLVTAIAAVVIA
TMPSRFHFYRSATPPLRFTSRRDMVRHCYDKILRLRDSTYFDHINFQKFAISYLLYAAFR
QLGDHNWSRMLDVEATQIARLLNLHRISEYDGLNCIETQLRKKGFWLIFYGFVHNQLQNV
LGERLSYLDPILLHSINPEDLMPLEVDDEMIFENEVLMPPSHTPCLVTGFILHSRVFWAA
IRSTCPESPAEPCPCVRARDAAVQVAYIQDRLHSLRFLLEDIPPLLRPWQPPDSQAIAHE
GGSTGVTEMTQSHFASMRANLHVTHLWLQSLLVDQLEAAQAHKSEPSLVSTNHVQPMVDA
KALWLQREGLCRQLFCILYSLPQINLEANGLHLAYKVRDIAAGLLVCPFHPAGPEAERAT
EYLRQSTDILSRLDSSEGMVTMHLQTWIDTDRIKSS
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
>species4|gene4
MMLEGIPPPPEPIEAKQNDQEKALDDGADLKPIDNTPYIDPFGDEQNAEVKYKTLKWWQC
GMFMIAESVSLGVLSLPATLAALGLVPAIILIVGLGILALYTGYTIGQFRQCYPHIHNLA
DAGEILMGRFGRELFGLGQILFSIFIMGSHIVTFTVMMNTITDHGTCSIVFSIVGMLICM
VLSLPRTIKNLTYISFASFLSIFSAVMITMIGVAVQFKGGSNISITAETNLYHAFTGVTN
IVFAYCAHVAFFGLIAEMEDPKEFPKSLCMLQFFEIALYVTAAIVIYYYVGNDVVSPALG
SAGPLLKKVAYGIAIPTIVGAGVVNGHVGLKYIYVRIFRKTGRMHKRDLVSVGSWIAIGL
SCWIIAWIIAEGIPSFTNIVSLISSLFASWFSYGLPGVYWLHINWGRWFSSPRKICLTII
NLLVVGIGATMCGLGLYVSGKAIHDDSSNTSFTCANTAN
>species2|gene6
MMLDGVPQPPEPIEAKQNDQEKALDDGADLKPIDNTPYIDPFGDEQNAEVKYKTLKWWTC
AGNVECVSYGMGKRSSRLMIAESVSLGVLSLPATLASLGLVPAIILIVGLGILALYTGYT
IGQFRQCYPHIHNLADAGEILMGRFGRELFGLGQILFSIFIMGSHIVTFTVMMNTITDHG
TCSIVFSIVGMLICMVLSLPRTIKNLTYISFASFLSIFSAVMITMIGVAVQFKGGSNISV
TAETNLYHAFTGVTNIVFAYCAHVAFFGLIAEMEDPKEFPKALCMLQFFEIALYVTAAIV
IYYYVGNDVVSPALGSAGPLLKKVAYGIAIPTIVGAGVVNGHVGLKYIYVRIFRKTNRMH
KRDLVSVGSWIAIGLSCWIIAWIIAEGIPSFTNIVSLISSLFASWFSYGLPGVYWLHINW
GRWFSSPRKICLTIVNLLIVCIGATMCGLGLYVSGKAIHDDSSNTSFTCANTAS
>species1|gene1
MMLEGVPPPSEPIEAKQKDQEKALDDGADLKPIDNTPYVDPFGDEQNAEVKYKTLKWWQC
GMFMIAESVSLGVLSLPATLAALGLVPAIILIVGLGILALYTGYTIGQFRQCYPHIHNLA
DAGEILMGRFGRELFGLGQILFSIFIMGSHIVTFTVMMNTITDHGTCSIVFSIVGMLICM
VLSLPRTIKNLTYISFASFLSIFSAVMITMIGVAVQFKGGANISITTETNLYHAFTGVTN
IVFAYCAHVAFFGLIAEMEDPKEFPKSLCMLQFFEIALYVTAAIVIYYYVGNDVVSPALG
SAGPLLKKVAYGIAIPTIVGAGVVNGHVGLKYIYVRIFRKTGRMHKRDLVSVGSWIAIGL
SCWIIAWIIAEGIPSFTNIVSLISSLFASWFSYGLPGVYWLHINWGRWFSSPRKICLTII
NLLIVCIGATMCGLGLYVSGKAIHDDSSNTSFTCANTAN
>species3|gene8
MRLDGVAPPPDAVEPKSQREKDEDVEDLKAINNAPEVDAFGDEANAEVKYKTLKWWQCGM
FMIAESVSLGVLSLPATMTALGLVPSLILIIGLGILALYTGYVIGQFRERHPYIHNLADA
GEILMGSFGRELFGLGQILFSIFIMGSHIVTFTVMMNTITDHGTCSIVFSIVAFVICLVL
SLPRTIKNLTYISTASFLSIFSAVMITMIGVGVQYKGGQNISITTETNLYTAFSGVTQIM
FAYCAHVAFFGLIAEMEEPKDFPKALCLLQGFEISLYVTAAIVIYYYVGNGVDSPALGSA
GPVLKKVAYGMAIPTIIGAGVVNGHVGLKYIYVRIFRKSGRMHKNDWVSVGSWIGIGVTC
WVIAWIIGEGIPSFSNLVSLISSLFASWFSFGLPGAYWLHMNYGQWWSSPRKCALTIINM
LIFAIGGAMCGLGLYASGKAIHDDSSRSSFSCANNA
>species0|gene4
MRLLNKVALVTGSSSGIGRAIALRYAREGAKVACADITPTARSPVPNELDITTHDAISQE
GGQAFFLQTDVGDASQMENAVLKTAQQFGRLDIMVNNAGVSLESRTPARIHETTNELYDT
TMRINTRSVFLGSKYAITQMLKQDPHPSGDRGWIINLSSILGIVAATENPSYCASKGAVS
NLTRQVALDYARDRIHANAICPGYTRTAIYEETTEYMHAAADLIRRHPFNGPGLPDDIAR
VAVVLASEDASWMTGAVVPVDGGYTARFSFASIGTLVAVTTAALLRPSICNQTALSFPVT
MRLDGVAPPPDAVEPKSQREKDEDVEDLKAIDNAPEVDAFGDEANAEVKYKTLKWWQCGM
FMIAESVSLGVLSLPATMTALGLVPSLILIIGLGILALYTGYVIGQFRERHPYIHNLADA
GEILMGSFGRELFGLGQILFSIFIMGSHIVTFTVMMNTITDHGTCSIVFSIVAFVICLVL
SLPRTIKNLTYISTASFLSIFSAVMITMIGVGVQYKGGQNISITTETNLYTAFSGVTQIM
FAYCAHVAFFGLIAEMEEPKDFPKALCLLQGFEISLYVTAAIVIYYYVGNGVDSPALGSA
GPVLKKVAYGMAIPTIIGAGVVNGHVGLKYIYVRIFRKSGRMHKNDWVSVGSWIGIGVTC
WVIAWIIGEGIPSFSNLVSLISSLFASWFSFGLPGAYWLHMNYGQWWSSPRKCALTIINM
LIFAIGGAMCGLGLYASGKAIHDDSSRSSFSCANNA
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
>species4|gene6
MNAESQTQPQKPEDMDQKKEESMPPVRQDAFGDEEFAEVKYKVLKWWQGGLLMVAETISL
GILSLPAAVGTVGLAPGLAILISMGILASYNGYVIGQIKLRIPHISSMSDAGEVLLGPFG
RELLNAAQILLLIFIMASHILTFTVAFNVITGHATCSIVFGIVGAVISCLLSLPRTLEKV
SWLSLVSFVSIFVAVMVTMVSIGIIKPTSTWAVAKNTDLVTGFGGVTNMVFAYASHNSFF
TFIAELRDPREFPKALALLQSIDISLYIIAAVVIYYFAGDGVASPALGSAGPLISKIAYG
IALPTIIIAGVINGHIAAKAIYLRMFSGTDRIHKRDWVAVGSWIGIMAVLWTISWIIAEA
IPVFNDLIGLIAALFLSWFTFGLPGVFWLYMNKGMWFLSRRKIFLTVVNVSSVCIGLVVC
ALGLYASGVSIHQNPAGSVFSCGARS
>species2|gene1
MKAESQTQAQKPEDMDQKKEEPMPPVRQDAFGDEEFAEVKYKVLKWWQGGLLMVAETISL
GILSLPAAVGTVGLAPGLAILISMGILASYNGYVIGQIKLRFPHITSMSDAGEVLLGPFG
RELLNAAQILLLIFIMASHILTFTVAFNVMTGHATCSIVFGVVGAVISCLLSLPRTLEKV
SWLSLVSFVSIFAAVMVTMVSIGIIKPTSTWAVAKHTDLVTAFGGVTNMVFAYASHNSFF
TFIAELRDPREFPKALALLQSIDISLYVVAAVVIYYFAGDGVTSPALGSAGPLISKAAYG
IALPTIVIAGVINGHIAAKAIYLRMFSGTDRIHKRDWIAVGSWIGIMAVLWTISWIIAEA
IPVFNDLIGLIAALFLSWFTFGLPGVFWLYMNKGIWFLSRRKLFLTVVNVASVCIGLVVV
SIYILGSPTVLGLIDQCALGLYASGVSINHNPAGSVFSCGARS
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
>species4|gene3
MIQIVNDPPVFDPENPQEKGIASRDASLAEGEKKYAATPAYRQDAFGDESNAEVKYKVMK
WWQCGLLMVAETVSLGVLSLPAAVAGLGLVPSVILLVSLGIVATYTGYVLGQFKLKYPWV
HNMGLAGEVVFGSWGREILGAAQMLFLVFIMASHILTFVIAMNTLTDHGTCSIVFGVGGM
IISFILSLPRTLAKMSWLSLVSFISIISAVIICMIGVIIKHPGGKVMATVDTDLVHGFSA
VTNIVFAFSGHAAYFGLMAELKDPRDFPKALMLLQSVDVCLYIIAAIVIYVYGGDAISSP
ALGSADPIVSKVAYGIALPTIIIAGVINGHVAIKYVYLRIFADKKERIHKRDWVAVSSWV
AIALSLWTVAWIIAEAIPVFSNLLSLITALFASWFTYGLSGIFWLYLNWGKYLSSPRKMF
LTIVNLFCLVFGAVLCGLGLYVSGKAIHDNPSSVSFSCANNA
>species2|gene3
MIQSVNDPPLSNPENLQEKGIASRDASLAEDEKKYAATLAYRQDAFGDESNAEVKYKVMK
WWQCGLLMVAETVSLGVLSLPAAVAGLGLVPSVILLVSLGIIATYTGYVLGQFKLQYPWV
HNMGLAGEVVFGSWGREILGAAQMLLLVFIMASHILTFVIAMNTLTDHGTCSIVFGVAGL
IVSFILSLPRTLAKMSWLSLVSFISIISAVIICMIGVIIKHPGGKVMATVDTDLVHGFSA
VTNIVFAFSGHAAYFGLMAELKDPRDFPKALMLLQSVDVCLYIIAAIVIYVYGGDEIASP
ALGSADPLISKVAYGIALPTIIIAGVINGHVAIKYVYLRIFANKKERIHKRDWVAVSSWV
AIALSLWTVAWIIAEAIPVFSNLLSLITALFASWFTYGLSGIFWLYLNRGQYLSSPRKMF
LTIVNLFCLVFGAVLCGLGLYVSGKAIHDNPSSMSFSCANNA
>species1|gene3
MTQIVNVPPVSDLENPQEKGTSHDASLAEDEKKYDATPAYRQDAFGDESNAEVKYKVMKW
CIISAVIICMIGVIIKHPGGKVMATVDTDLVHGFSAVTNIVFAFSGHAAYFGLMAELKDP
RDFPKALMLLQSVDVCLYIIAAIVIYVYGGDAISSPALGSADPIVSKVAYGIALPTIIIA
GVINGHVAIKYVYIRIFAGKKERIHKRDWVAVSSWVAIALSLWTVAWIIAEAIPVFSNLL
SLITALFASWFTYGLSGIFWLYLNWGKYLSSPRKMFLTVVNLFCLVFGAVLCGLGLYVSG
KAIHDNPSSASFSCANNA
>species3|gene5
MRSSEIYPAPPTAADQELWEEKEVSTKQSSLEIGENKDFALHQTQDAFGNEEFAEVKYKV
LKWWQCGLLMVAETVSLGVLSLPAAVAGLGLVPSVIILVCLGALATYTGYVIGQFKWRYP
HICSMADAGEVLAGRFGRELLGFAQIIFLVFIMASHLLTFTIAMNDLTNHGTCSIVFGVV
GLAISFVCTLPRTLEKMSWLSLISFISILSSVFITMIGVGISHPGKVIEATVKTDLIHGF
TAVANIVFAFSGHAAFFSLAAELKNPADYPKALMLLQSVDITLYLVAAIVIYCYGGSTVT
SPALGSASTVVSKVAYGIALPTIIIAGVINGHVSAKSVYVRIFRGTDHMHKRSWIAVGSW
TAIVLALWVLAWIIAEAIPVFNKLLSLVTALFASWFTFGLSAIFWFYMNHGQWFSSPKKV
ALSAVNLLALGVGCCLCGLGLYVSGKAIHDDPHHASFTCMSTV
>species0|gene6
MRSSEIYPANPTAADQELWEEKEVSTKQSSLEIGENKDFALHQTQDAFGNEEFAEVKYKV
LKWWQCGLLMVAETVSLGVLSLPAAVAGLGLVPSVIILVCLGALATYTGYVIGQFKWRYP
HICSMADAGEVLAGRFGRELLGFAQIIFLVFIMASHLLTFTIAMNDLTNHGTCSIVFGVV
GLAISFVCTLPRTLEKMSWLSLISFISILSSVFITMIGVGISHPGKVIEATVKTDLIHGF
TAVANIVFAFSGHAAFFSLAAELKNPADYPKALMLLQSVDITLYLVAAIVIYCYGGSTVT
SPALGSASTVVSKVAYGIALPTIIIAGVINGHVSAKSVYVRIFRGTDHMHKRSWIAVGSW
TAIVLALWVLAWIIAEAIPVFNKLLSLVTALFASWFTFGLSAIFWFYMNHGQWFSSPKKV
ALSAVNLLALGVGCCLCGLGLYVSGKAIHDDPHHASFTCMSTV
Loading

0 comments on commit a863f33

Please sign in to comment.