diff --git a/bin/driver/Driver.class b/bin/driver/Driver.class
index fa0c929..44c561f 100644
Binary files a/bin/driver/Driver.class and b/bin/driver/Driver.class differ
diff --git a/interaction/input/alle.js b/interaction/input/alle.js
deleted file mode 100644
index 361b5bd..0000000
--- a/interaction/input/alle.js
+++ /dev/null
@@ -1,4127 +0,0 @@
-{
-  "items" : [
-    {
-      "file" : "bfh",
-      "org" : "Bern University of Applied Sciences and Arts - BFH",
-      "inst" : "BFH-TI",
-      "label" : "Dubois, Jean, Paul",
-      "title" : "Prof.",
-      "given-name" : "Jean-Paul",
-      "family-name" : "Dubois",
-      "home-page" : "https://prof.ti.bfh.ch/doj1/",
-      "group" : "Abteilung Informatik",
-      "group-page" : "http://www.ti.bfh.ch/de/bachelor/informatik.html",
-      "since" : "2012",
-      "email" : "jean-paul.dubois@bfh.ch"
-    },
-    {
-      "file" : "bfh",
-      "org" : "Bern University of Applied Sciences and Arts - BFH",
-      "inst" : "BFH-TI",
-      "representative" : "yes",
-      "role" : ["Board member"],
-      "label" : "Dubuis, Eric",
-      "title" : "Prof.",
-      "given-name" : "Eric",
-      "family-name" : "Dubuis",
-      "home-page" : "https://prof.ti.bfh.ch/due1/",
-      "fields" : ["Electronic Voting", "Software Engineering"],
-      "group" : "Abteilung Informatik",
-      "group-page" : "http://forschung.ti.bfh.ch/de/institute/research_institute_for_security_in_the_information_society.html",
-      "email" : "eric.dubuis@bfh.ch"
-    },
-    {
-      "file" : "bfh",
-      "org" : "Bern University of Applied Sciences and Arts - BFH",
-      "inst" : "BFH-TI",
-      "label" : "Haenni, Rolf",
-      "title" : "Prof.",
-      "given-name" : "Rolf",
-      "family-name" : "Haenni",
-      "home-page" : "https://prof.hti.bfh.ch/hnr1/",
-      "fields" : ["Privacy and Security"],
-      "group" : "Abteilung Informatik",
-      "group-page" : "http://forschung.ti.bfh.ch/de/institute/research_institute_for_security_in_the_information_society.html",
-      "email" : "rolf.haenni@bfh.ch",
-      "since" : "2005"
-    },
-    {
-      "file" : "bfh",
-      "org" : "Bern University of Applied Sciences and Arts - BFH",
-      "inst" : "BFH-TI",
-      "label" : "Anrig, Bernhard",
-      "title" : "Prof.",
-      "given-name" : "Bernhard",
-      "family-name" : "Anrig",
-      "home-page" : "https://prof.ti.bfh.ch/arb1/",
-      "fields" : ["Privacy and Security"],
-      "group" : "Abteilung Informatik",
-      "group-page" : "http://forschung.ti.bfh.ch/de/institute/research_institute_for_security_in_the_information_society.html",
-      "email" : "bernhard.anrig@bfh.ch"
-    },
-    {
-      "file" : "bfh",
-      "org" : "Bern University of Applied Sciences and Arts - BFH",
-      "inst" : "BFH-TI",
-      "label" : "Bangerter, Endre",
-      "title" : "Prof.",
-      "given-name" : "Endre",
-      "family-name" : "Bangerter",
-      "home-page" : "https://prof.ti.bfh.ch/bte1/",
-      "fields" : ["Privacy and Security"],
-      "group" : "Abteilung Informatik",
-      "group-page" : "http://forschung.ti.bfh.ch/de/institute/research_institute_for_security_in_the_information_society.html",
-      "email" : "endre.bangerter@bfh.ch"
-    },
-    {
-      "file" : "bfh",
-      "org" : "Bern University of Applied Sciences and Arts - BFH",
-      "inst" : "BFH-TI",
-      "label" : "Fiedler, Ulrich",
-      "title" : "Prof.",
-      "given-name" : "Ulrich",
-      "family-name" : "Fiedler",
-      "home-page" : "https://prof.ti.bfh.ch/flu1/",
-      "fields" : ["Mobile Computing"],
-      "group" : "Abteilung Informatik",
-      "group-page" : "http://forschung.ti.bfh.ch/de/institute/research_institute_for_security_in_the_information_society.html",
-      "email" : "ulrich.fiedler@bfh.ch"
-    },
-    {
-      "file" : "bfh",
-      "org" : "Bern University of Applied Sciences and Arts - BFH",
-      "inst" : "BFH-TI",
-      "label" : "Gasenzer, Rolf",
-      "title" : "Prof.",
-      "given-name" : "Rolf",
-      "family-name" : "Gasenzer",
-      "home-page" : "https://prof.ti.bfh.ch/gzr1/",
-      "fields" : ["ICT based Management"],
-      "group" : "Abteilung Informatik",
-      "group-page" : "http://forschung.ti.bfh.ch/de/institute/institute_for_ict_based_management.html",
-      "email" : "rolf.gasenzer@bfh.ch"
-    },
-     {
-      "file" : "bfh",
-      "org" : "Bern University of Applied Sciences and Arts - BFH",
-      "inst" : "BFH-TI",
-      "label" : "Holm, Jürgen",
-      "title" : "Prof.",
-      "given-name" : "Jürgen",
-      "family-name" : "Holm",
-      "home-page" : "https://prof.ti.bfh.ch/hoj2/",
-      "fields" : ["Medizininformatik"],
-      "group" : "Abteilung Informatik",
-      "group-page" : "http://www.ti.bfh.ch/de/bachelor/medizininformatik.html",
-      "email" : "juergen.holm@bfh.ch"
-    },
-     {
-      "file" : "bfh",
-      "org" : "Bern University of Applied Sciences and Arts - BFH",
-      "inst" : "BFH-TI",
-      "label" : "Künzler, Urs",
-      "title" : "Prof.",
-      "given-name" : "Urs",
-      "family-name" : "Künzler",
-      "home-page" : "https://prof.ti.bfh.ch/klu1/",
-      "fields" : ["Computer Perception and Virtual Reality"],
-      "group" : "Abteilung Informatik",
-      "group-page" : "http://forschung.ti.bfh.ch/de/institute/institut_fuer_human_centered_engineering/forschungsgruppen/huce_cpvrlab.html",
-      "email" : "urs.kuenzler@bfh.ch"
-    },
-     {
-      "file" : "bfh",
-      "org" : "Bern University of Applied Sciences and Arts - BFH",
-      "inst" : "BFH-TI",
-      "label" : "Sauter, Urs",
-      "title" : "Prof.",
-      "given-name" : "Urs",
-      "family-name" : "Sauter",
-      "home-page" : "https://prof.ti.bfh.ch/stu1/",
-      "fields" : ["ICT based management"],
-      "group" : "Abteilung Informatik",
-      "group-page" : "http://forschung.ti.bfh.ch/de/institute/institute_for_ict_based_management.html",
-      "email" : "urs.sauter@bfh.ch"
-    },
-{
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Aberer, Karl",
-      "title" : "Prof.",
-      "given-name" : "Karl",
-      "family-name" : "Aberer",
-      "home-page" : "http://people.epfl.ch/134136",
-      "fields" : ["Distributed Information Systems"],
-      "group" : "Distributed Information Systems",
-      "group-page" : "http://lsirwww.epfl.ch/",
-      "email" : "karl.aberer@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Ailamaki, Anastasia",
-      "title" : "Prof.",
-      "given-name" : "Anastasia",
-      "family-name" : "Ailamaki",
-      "home-page" : "http://people.epfl.ch/anastasia.ailamaki",
-      "fields" : ["Data-Intensive Applications and Systems"],
-      "group" : "Data-Intensive Applications and Systems Laboratory",
-      "group-page" : "http://dias.epfl.ch/",
-      "email" : "anastasia.ailamaki@epfl.ch",
-      "since" : "2008"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Argyraki, Katerina",
-      "title" : "Prof.",
-      "given-name" : "Katerina",
-      "family-name" : "Argyraki",
-      "home-page" : "http://people.epfl.ch/katerina.argyraki",
-      "fields" : ["Network Architecture"],
-      "group" : "Network Architecture Lab",
-      "group-page" : "http://nal.epfl.ch/",
-      "email" : "katerina.argyraki@epfl.ch",
-      "since" : "2011"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Le Boudec, Jean, Yves",
-      "title" : "Prof.",
-      "given-name" : "Jean-Yves",
-      "family-name" : "Le Boudec",
-      "home-page" : "http://icapeople.epfl.ch/leboudec",
-      "fields" : ["Performance Evaluation"],
-      "group" : "Computer Communications and Applications Laboratory 2",
-      "group-page" : "http://icawww.epfl.ch/",
-      "email" : "Jean-Yves.Leboudec@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Bugnion, Edouard",
-      "title" : "Prof.",
-      "given-name" : "Edouard",
-      "family-name" : "Bugnion",
-      "home-page" : "http://people.epfl.ch/edouard.bugnion",
-      "fields" : ["Computer Systems"],
-      "group" : "Data Center Systems Laboratory",
-      "group-page" : "http://dcsl.epfl.ch/",
-      "email" : "edouard.bugnion@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Candea, George",
-      "title" : "Prof.",
-      "given-name" : "George",
-      "family-name" : "Candea",
-      "home-page" : "http://people.epfl.ch/george.candea",
-      "fields" : ["Dependable Systems"],
-      "group" : "Dependable Systems Laboratory",
-      "group-page" : "http://dslab.epfl.ch/",
-      "email" : "George.Candea@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "De Micheli, Giovanni",
-      "title" : "Prof.",
-      "given-name" : "Giovanni",
-      "family-name" : "De Micheli",
-      "home-page" : "http://people.epfl.ch/giovanni.demicheli",
-      "fields" : ["Integrated Systems"],
-      "group" : "Integrated Systems Laboratory ",
-      "group-page" : "http://si.epfl.ch/",
-      "email" : "giovanni.demicheli@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Dillenbourg, Pierre",
-      "title" : "Prof.",
-      "given-name" : "Pierre",
-      "family-name" : "Dillenbourg",
-      "home-page" : "http://people.epfl.ch/pierre.dillenbourg",
-      "fields" : ["CSCW"],
-      "group" : "CRAFT Pedagogical Research and Support",
-      "group-page" : "http://craft.epfl.ch/",
-      "email" : "pierre.dillenbourg@epfl.ch",
-      "since" : "2009"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "representative" : "yes",
-      "label" : "Falsafi, Babak",
-      "title" : "Prof.",
-      "given-name" : "Babak",
-      "family-name" : "Falsafi",
-      "home-page" : "http://people.epfl.ch/babak.falsafi",
-      "fields" : ["Parallel Systems"],
-      "group" : "Parallel Systems Architecture Lab",
-      "group-page" : "http://parsa.epfl.ch/",
-      "email" : "babak.falsafi@epfl.ch",
-      "since" : "2008"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Faltings, Boi",
-      "title" : "Prof.",
-      "given-name" : "Boi",
-      "family-name" : "Faltings",
-      "home-page" : "http://liawww.epfl.ch/People/faltings/",
-      "fields" : ["Artificial Intelligence"],
-      "group" : "Artificial Intelligence",
-      "group-page" : "http://liawww.epfl.ch/",
-      "email" : "Boi.Faltings@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Ford, Bryan Alexander",
-      "title" : "Prof.",
-      "given-name" : "Bryan",
-      "family-name" : "Ford",
-      "home-page" : "http://people.epfl.ch/bryan.ford",
-      "fields" : ["(tbd)"],
-      "group" : "Decentralized and Distributed Systems Laboratory",
-      "group-page" : "http://dedis.epfl.ch/",
-      "email" : "bryan.ford@epfl.ch",
-      "since" : "2015"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Fua, Pascal",
-      "title" : "Prof.",
-      "given-name" : "Pascal",
-      "family-name" : "Fua",
-      "home-page" : "http://cvlab.epfl.ch/~fua/",
-      "fields" : ["Computer Vision"],
-      "group" : "Computer Vision Laboratory ",
-      "group-page" : "http://cvlab.epfl.ch/",
-      "email" : "Pascal.Fua@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Gastpar, Michael",
-      "title" : "Prof.",
-      "given-name" : "Michael",
-      "family-name" : "Gastpar",
-      "home-page" : "http://people.epfl.ch/michael.gastpar",
-      "fields" : ["Information Theory"],
-      "group" : "Laboratory for Information & Networked Systems",
-      "group-page" : "http://linx.epfl.ch/",
-      "email" : "michael.gastpar@epfl.ch",
-      "since" : "2011"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Gerstner, Wulfram",
-      "title" : "Prof.",
-      "given-name" : "Wulfram",
-      "family-name" : "Gerstner",
-      "home-page" : "http://diwww.epfl.ch/~gerstner/",
-      "fields" : ["Computational Neuroscience"],
-      "group" : "Computational Neuroscience",
-      "group-page" : "http://lcn.epfl.ch/",
-      "email" : "wulfram.gerstner@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Grossglauser, Matthias",
-      "title" : "Prof.",
-      "given-name" : "Matthias",
-      "family-name" : "Grossglauser",
-      "home-page" : "http://icapeople.epfl.ch/grossglauser",
-      "fields" : ["Computer Communications and Applications"],
-      "group" : "Computer Communications and Applications Laboratory 4 ",
-      "group-page" : "http://icawww.epfl.ch/",
-      "email" : "Matthias.Grossglauser@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Guerraoui, Rachid",
-      "title" : "Prof.",
-      "given-name" : "Rachid",
-      "family-name" : "Guerraoui",
-      "home-page" : "http://lpdwww.epfl.ch/rachid/index.html",
-      "fields" : ["Distributed Programming"],
-      "group" : "Distributed Programming",
-      "group-page" : "http://lpd.epfl.ch/",
-      "email" : "rachid.guerraoui@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Hersch, Roger David",
-      "title" : "Prof.",
-      "given-name" : "Roger David",
-      "family-name" : "Hersch",
-      "home-page" : "http://diwww.epfl.ch/w3lsp/hersch/",
-      "fields" : ["Peripheral Systems"],
-      "group" : "Peripheral Systems",
-      "group-page" : "http://diwww.epfl.ch/w3lsp/",
-      "email" : "roger.hersch@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Huang, Jeffrey",
-      "title" : "Prof.",
-      "given-name" : "Jeffrey",
-      "family-name" : "Huang",
-      "home-page" : "http://people.epfl.ch/jeffrey.huang",
-      "fields" : ["Media and Design"],
-      "group" : "Media and Design Laboratory ",
-      "group-page" : "http://ldm.epfl.ch/",
-      "email" : "jeffrey.huang@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Hubaux, Jean, Pierre",
-      "title" : "Prof.",
-      "given-name" : "Jean-Pierre",
-      "family-name" : "Hubaux",
-      "home-page" : "http://people.epfl.ch/jean-pierre.hubaux",
-      "fields" : ["Computer Communications and Applications"],
-      "group" : "Computer Communications and Applications Laboratory 1 ",
-      "group-page" : "http://icawww.epfl.ch/",
-      "email" : "jean-pierre.hubaux@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Ienne, Paolo",
-      "title" : "Prof.",
-      "given-name" : "Paolo",
-      "family-name" : "Ienne",
-      "home-page" : "http://people.epfl.ch/paolo.ienne",
-      "fields" : ["Processor Architecture"],
-      "group" : "Processor Architecture",
-      "group-page" : "http://lap.epfl.ch/",
-      "email" : "Paolo.Ienne@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Jakob, Wenzel",
-      "title" : "Prof.",
-      "given-name" : "Wenzel",
-      "family-name" : "Jakob",
-      "home-page" : "http://mitsuba-renderer.org/wenzel",
-      "fields" : ["Image Synthesis Computer Graphics"],
-      "group" : "Realistic Graphics Laboratory",
-      "group-page" : "http://rgl.epfl.ch/",
-      "email" : "Wenzel.Jakob@epfl.ch",
-      "since" : "2016"
-    },
-    {     
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Kapralov, Mikhail",
-      "title" : "Prof.",
-      "given-name" : "Mikhail",
-      "family-name" : "Kapralov",
-      "home-page" : "http://theory.epfl.ch/kapralov",
-      "fields" : ["Theory of Computation"],
-      "group" : "Theory of Computation Laboratory 4",
-      "group-page" : "http://theory.epfl.ch/kapralov",
-      "email" : "michael.kapralov@epfl.ch",
-      "since" : "2016"
-    },
-    { 
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Koch, Christoph",
-      "title" : "Prof.",
-      "given-name" : "Christoph",
-      "family-name" : "Koch",
-      "home-page" : "http://people.epfl.ch/christoph.koch",
-      "fields" : ["Data Analysis"],
-      "group" : "Data Analysis Theory & Applications Lab",
-      "group-page" : "http://data.epfl.ch/",
-      "email" : "christoph.koch@epfl.ch",
-      "since" : "2011"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Kozyrakis, Christos",
-      "title" : "Prof.",
-      "given-name" : "Christos",
-      "family-name" : "Kozyrakis",
-      "home-page" : "(tbd)",
-      "fields" : ["(tbd)"],
-      "group" : "Systems, Architecture and Infrastructure Laboratory ",
-      "group-page" : "http://sail.epfl.ch/",
-      "email" : "Christos.Kozyrakis@epfl.ch",
-      "since" : "2015"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Kuncak, Viktor",
-      "title" : "Prof.",
-      "given-name" : "Viktor",
-      "family-name" : "Kuncak",
-      "home-page" : "http://people.epfl.ch/viktor.kuncak",
-      "fields" : ["Automated Reasoning"],
-      "group" : "Laboratory for Automated Reasoning and Analysis ",
-      "group-page" : "http://lara.epfl.ch/~kuncak/",
-      "email" : "Viktor.Kuncak@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "role" : ["Board member"],
-      "label" : "Larus, James",
-      "title" : "Prof.",
-      "given-name" : "James",
-      "family-name" : "Larus",
-      "home-page" : "http://people.epfl.ch/james.larus",
-      "fields" : ["(tbd)"],
-      "group" : "Very Large Scale Computing Laboratory",
-      "since" : "2014",
-      "email" : "james.larus@epfl.ch"
-    },    
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Lenstra, Arjen",
-      "title" : "Prof.",
-      "given-name" : "Arjen",
-      "family-name" : "Lenstra",
-      "home-page" : "http://people.epfl.ch/arjen.lenstra",
-      "fields" : ["Crypotologic Algorithms"],
-      "group" : "Laboratory for cryptologic algorithms ",
-      "group-page" : "http://people.epfl.ch/arjen.lenstra",
-      "email" : "Arjen.Lenstra@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Moret, Bernard",
-      "title" : "Prof.",
-      "given-name" : "Bernard",
-      "family-name" : "Moret",
-      "home-page" : "http://people.epfl.ch/bernard.moret",
-      "fields" : ["Computational Biology and Bioinformatics "],
-      "group" : "Laboratory for Computational Biology and Bioinformatics ",
-      "group-page" : "http://lcbb.epfl.ch/",
-      "email" : "Bernard.Moret@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Odersky, Martin",
-      "title" : "Prof.",
-      "given-name" : "Martin",
-      "family-name" : "Odersky",
-      "home-page" : "http://lampwww.epfl.ch/~odersky/",
-      "fields" : ["Programming Methods"],
-      "group" : "Programming Methods Laboratory",
-      "group-page" : "http://lampwww.epfl.ch/",
-      "email" : "martin.odersky@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Pauly, Mark",
-      "title" : "Prof.",
-      "given-name" : "Mark",
-      "family-name" : "Pauly",
-      "home-page" : "http://people.epfl.ch/mark.pauly",
-      "fields" : ["Computer Graphics"],
-      "group" : "Computer Graphics & Geometry Lab",
-      "group-page" : "http://lgg.epfl.ch/",
-      "email" : "mark.pauly@epfl.ch",
-      "since" : "2011"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Rimoldi, Bixio",
-      "title" : "Prof.",
-      "given-name" : "Bixio",
-      "family-name" : "Rimoldi",
-      "home-page" : "http://people.epfl.ch/113819",
-      "fields" : ["Mobile Communications"],
-      "group" : "Mobile Communications Laboratory",
-      "group-page" : "http://lcmwww.epfl.ch/",
-      "email" : "bixio.rimoldi@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Salathé, Marcel",
-      "title" : "Prof.",
-      "given-name" : "Marcel",
-      "family-name" : "Salathé",
-      "home-page" : "(tbd)",
-      "fields" : ["(tbd)"],
-      "group" : "(tbd)",
-      "group-page" : "(tbd)",
-      "email" : "Marcel.Salathe@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Sanchez, Eduardo",
-      "title" : "Prof.",
-      "given-name" : "Eduardo",
-      "family-name" : " Sanchez",
-      "home-page" : "http://people.epfl.ch/eduardo.sanchez",
-      "fields" : ["(tbd)"],
-      "group" : "Reconfigurable Digital Systems Group",
-      "group-page" : "http://rdsg.epfl.ch/",
-      "email" : "eduardo.sanchez@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Shokrollahi, Amin Mohammad",
-      "title" : "Prof.",
-      "given-name" : "Amin Mohammad",
-      "family-name" : "Shokrollahi",
-      "home-page" : "http://algo.epfl.ch/en/group/members/amin",
-      "fields" : ["Algorithmics"],
-      "group" : "Algorithmics Laboratory ",
-      "group-page" : "http://algo.epfl.ch/",
-      "email" : "amin.shokrollahi@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Sifakis, Joseph",
-      "title" : "Prof.",
-      "given-name" : "Joseph",
-      "family-name" : "Sifakis",
-      "home-page" : "http://people.epfl.ch/joseph.sifakis",
-      "fields" : ["Rigorous System Design"],
-      "group" : "Rigorous System Design Laboratory",
-      "group-page" : "https://people.epfl.ch/joseph.sifakis",
-      "email" : "joseph.sifakis@epfl.ch",
-      "since" : "2011"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Suesstrunk, Sabine",
-      "title" : "Prof.",
-      "given-name" : "Sabine",
-      "family-name" : "S&uuml;sstrunk",
-      "home-page" : "http://ivrl.epfl.ch/people/susstrunk",
-      "fields" : ["Audiovisual Communications"],
-      "group" : "Image and Visual Representation Laboratory",
-      "group-page" : "http://ivrl.epfl.ch/",
-      "email" : "sabine.susstrunk@epfl.ch",
-      "since" : "2008"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Svensson, Ola Nils Anders",
-      "title" : "Prof.",
-      "given-name" : "Ola",
-      "family-name" : "Svensson",
-      "home-page" : "http://people.epfl.ch/ola.svensson",
-      "fields" : ["(tbd)"],
-      "group" : "Theory of Computation Laboratory 2",
-      "group-page" : "http://theory.epfl.ch",
-      "email" : "ola.svensson@epfl.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Telatar, Emre",
-      "title" : "Prof.",
-      "given-name" : "Emre",
-      "family-name" : "Telatar",
-      "home-page" : "http://people.epfl.ch/131639",
-      "fields" : ["Information Theory"],
-      "group" : "Information Theory",
-      "group-page" : "http://lthiwww.epfl.ch/",
-      "email" : "emre.telatar@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Thiran, Patrick",
-      "title" : "Prof.",
-      "given-name" : "Patrick",
-      "family-name" : "Thiran",
-      "home-page" : "http://icapeople.epfl.ch/thiran",
-      "fields" : ["Computer Communications and Applications"],
-      "group" : "Computer Communications and Applications Laboratory 3",
-      "group-page" : "http://icawww.epfl.ch/",
-      "email" : "Patrick.Thiran@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Urbanke, Ruediger",
-      "title" : "Prof.",
-      "given-name" : "R&uuml;diger",
-      "family-name" : "Urbanke",
-      "home-page" : "http://lthcwww.epfl.ch/people/ruediger.php",
-      "fields" : ["Communication Theory"],
-      "group" : "Communication Theory Laboratory",
-      "group-page" : "http://lthcwww.epfl.ch/",
-      "email" : "rudiger.urbanke@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Vaudenay, Serge",
-      "title" : "Prof.",
-      "given-name" : "Serge",
-      "family-name" : "Vaudenay",
-      "home-page" : "http://lasecwww.epfl.ch/~vaudenay/",
-      "fields" : ["Security", "Cryptography"],
-      "group" : "Security and Cryptography",
-      "group-page" : "http://lasecwww.epfl.ch/",
-      "email" : "serge.vaudenay@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Vetterli, Martin",
-      "title" : "Prof.",
-      "given-name" : "Martin",
-      "family-name" : "Vetterli",
-      "home-page" : "http://lcavwww.epfl.ch/~vetterli/",
-      "fields" : ["Audiovisual Communications"],
-      "group" : "Audiovisual Communications Laboratory 1 ",
-      "group-page" : "http://lcavwww.epfl.ch/",
-      "email" : "martin.vetterli@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Vishnoi, Nisheeth",
-      "title" : "Prof.",
-      "given-name" : "Nisheeth",
-      "family-name" : "Vishnoi",
-      "home-page" : "http://theory.epfl.ch/vishnoi/",
-      "fields" : ["(tbd)"],
-      "group" : "Theory of Computation Laboratory 3 ",
-      "group-page" : "(tbd)",
-      "email" : "nisheeth.vishnoi@epfl.ch",
-      "since" : "2014"
-    },
-    { 
-     "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Wegmann, Alain",
-      "title" : "Prof.",
-      "given-name" : "Alain",
-      "family-name" : "Wegmann",
-      "home-page" : "http://lamswww.epfl.ch/people/wegmann",
-      "fields" : ["Systemic Modeling"],
-      "group" : "Systemic Modeling Laboratory",
-      "group-page" : "http://lamswww.epfl.ch/",
-      "email" : "alain.wegmann@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "label" : "Zwaenepoel, Willy",
-      "title" : "Prof.",
-      "given-name" : "Willy",
-      "family-name" : "Zwaenepoel",
-      "home-page" : "http://people.epfl.ch/willy.zwaenepoel",
-      "fields" : ["Operating Systems"],
-      "group" : "Operating Systems",
-      "group-page" : "http://labos.epfl.ch/",
-      "email" : "willy.zwaenepoel@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "role" : ["Emeritus"],
-      "label" : "Bremaud, Pierre",
-      "title" : "Prof.",
-      "given-name" : "Pierre",
-      "family-name" : "Bremaud",
-      "home-page" : "http://lcavwww.epfl.ch/~bremaud/",
-      "email" : "pierre.bremaud@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "role" : ["Emeritus"],
-      "label" : "Coray, Giovanni",
-      "title" : "Prof.",
-      "given-name" : "Giovanni",
-      "family-name" : "Coray",
-      "home-page" : "http://people.epfl.ch/giovanni.coray",
-      "email" : "giovanni.coray@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "role" : ["Emeritus"],
-      "label" : "Hasler, Martin",
-      "title" : "Prof.",
-      "given-name" : "Martin",
-      "family-name" : "Hasler",
-      "home-page" : "http://people.epfl.ch/martin.hasler",
-      "email" : "martin.hasler@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "role" : ["Emeritus"],  
-      "label" : "Janson, Philippe",
-      "title" : "Prof.",
-      "given-name" : "Philippe",
-      "family-name" : "Janson",
-      "home-page" : "http://people.epfl.ch/philippe.janson",
-      "email" : "philippe.janson@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "role" : ["Emeritus"],
-      "label" : "Kirrmann, Hubert",
-      "title" : "Prof.",
-      "given-name" : "Hubert",
-      "family-name" : "Kirrmann",
-      "home-page" : "http://people.epfl.ch/hubert.kirrmann",
-      "email" : "hubert.kirrmann@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "role" : ["Emeritus"],
-      "label" : "Mange, Daniel",
-      "title" : "Prof.",
-      "given-name" : "Daniel",
-      "family-name" : "Mange",
-      "home-page" : "http://people.epfl.ch/daniel.mange",
-      "email" : "daniel.mange@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "role" : ["Emeritus"],
-      "label" : "Nicoud, Jean-Daniel",
-      "title" : "Prof.",
-      "given-name" : "Jean-Daniel",
-      "family-name" : "Nicoud",
-      "home-page" : "http://people.epfl.ch/jean-daniel.nicoud",
-      "email" : "jean-daniel.nicoud@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "role" : ["Emeritus"],
-      "label" : "Nussbaumer, Henri",
-      "title" : "Prof.",
-      "given-name" : "Henri ",
-      "family-name" : "Nussbaumer",
-      "home-page" : "http://people.epfl.ch/henri.nussbaumer",
-      "email" : "henri.nussbaumer@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "role" : ["Emeritus"], 
-      "label" : "Petitpierre, Claude",
-      "title" : "Prof.",
-      "given-name" : "Claude",
-      "family-name" : "Petitpierre",
-      "home-page" : "http://people.epfl.ch/claude.petitpierre",
-      "email" : "claude.petitpierre@epfl.ch"
-    },
-    { 
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "role" : ["Emeritus"],      
-      "label" : "Schiper, André",
-      "title" : "Prof.",
-      "given-name" : "André",
-      "family-name" : "Schiper",
-      "home-page" : "http://lsrwww.epfl.ch",
-      "email" : "Andre.Schiper@epfl.ch"  
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "role" : ["Emeritus"],
-      "label" : "Spaccapietra, Stefano",
-      "title" : "Prof.",
-      "given-name" : "Stefano",
-      "family-name" : "Spaccapietra",
-      "home-page" : "http://people.epfl.ch/stefano.spaccapietra",
-      "email" : "Stefano.Spaccapietra@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "role" : ["Emeritus"],
-      "label" : "Thalmann, Daniel",
-      "title" : "Prof.",
-      "given-name" : "Daniel",
-      "family-name" : "Thalmann",
-      "home-page" : "http://people.epfl.ch/daniel.thalmann",
-      "email" : "daniel.thalmann@epfl.ch"
-    },
-    {
-      "file" : "epfl",
-      "org" : "Swiss Federal Institute of Technology Lausanne - EPFL",
-      "inst" : "IC",
-      "role" : ["Emeritus"],
-      "label" : "Strohmeier, Alfred",
-      "title" : "Prof.",
-      "given-name" : "Alfred",
-      "family-name" : "Strohmeier"
-    },
-{
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "representative": "yes",
-            "label": "Vogel, Manfred",
-            "title": "Prof. Dr.",
-            "given-name": "Manfred",
-            "family-name": "Vogel",
-            "home-page": "http://www.fhnw.ch/people/manfred-vogel",
-            "fields": [
-                "Social network analysis",
-                "Machine Learning",
-                "Information Retrieval"
-            ],
-            "email": "manfred.vogel@fhnw.ch",
-            "since": "2012"
-        },
-		{
-		     "file" : "fhnw",
-		     "org" : "FHNW",
-		     "inst" : "School of Education",
-			"role" : ["Delegate", "Pre-University Education"],
-		     "label" : "Repenning, Alexander",
-		     "title" : "Prof.",
-		     "given-name" : "Alexander",
-		     "family-name" : "Repenning",
-		     "home-page" : "http://www.fhnw.ch/people/alexander-repenning/",
-		     "fields" : ["Computer Science Education",
-			 	"Visual Programming Languages"],
-		     "group" : "Scalable Game Design",
-		     "group-page" : "http://www.scalablegamedesign.ch/",
-		     "email" : "alexander.repenning@fhnw.ch",
-		     "since" : "2014"
-		},
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Csillaghy, Andre",
-            "title": "Prof. Dr.",
-            "given-name": "Andr&eacute;",
-            "family-name": "Csillaghy",
-            "home-page": "http://soleil.i4ds.ch/~csillag/",
-            "fields": [
-                "Data analysis",
-                "Large Scientific Data",
-                "Data Management"
-            ],
-            "group": "Institute of 4D Technologies",
-            "group-page": "http://www.i4ds.ch/",
-            "email": "andre.csillaghy@fhnw.ch",
-            "since": "Feb-03"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Luthiger, Juerg",
-            "title": "Prof.",
-            "given-name": "Juerg",
-            "family-name": "Luthiger",
-            "home-page": "http://www.fhnw.ch/people/juerg-luthiger",
-            "fields": [
-                "Mobile and Distributed Software Systems",
-                "Enterprise Application Architecture",
-                "Service-Oriented Architectures",
-                "Web Technologies"
-            ],
-            "group": "Institute of Mobile and Distributed Systems",
-            "group-page": "http://www.fhnw.ch/technik/imvs",
-            "email": "juerg.luthiger@fhnw.ch",
-            "since": "2012"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Gruntz, Dominik",
-            "title": "Prof. Dr.",
-            "given-name": "Dominik",
-            "family-name": "Gruntz",
-            "home-page": "http://www.fhnw.ch/people/dominik-gruntz",
-            "fields": [
-                "Mobile and Distributed Software Systems",
-                "Programming Languages",
-                "Patterns",
-                "Near Field Computing"
-            ],
-            "group": "Institute of Mobile and Distributed Systems",
-            "group-page": "http://www.fhnw.ch/technik/imvs",
-            "email": "dominik.gruntz@fhnw.ch",
-            "since": "2012"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Gwerder, Martin",
-            "title": "Prof.",
-            "given-name": "Martin",
-            "family-name": "Gwerder",
-            "home-page": "http://www.fhnw.ch/personen/martin-gwerder",
-            "fields": [
-                "Security",
-                "Virtualized Systems",
-                "Cloud Systems"
-            ],
-            "group": "Institute of Mobile and Distributed Systems",
-            "group-page": "http://www.fhnw.ch/technik/imvs",
-            "email": "martin.gwerder@fhnw.ch",
-            "since": "2016"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Hauser, Sarah",
-            "given-name": "Sarah",
-            "family-name": "Hauser",
-            "home-page": "http://www.fhnw.ch/personen/sarah-hauser",
-            "fields": [
-                "Software Engineering",
-                "Software Project Management",
-                "Information Security",
-                "Theoretical Computer Science"
-            ],
-            "group": "iCompetence",
-            "group-page": "http://www.fhnw.ch/technik/bachelor/i/ic",
-            "email": "sarah.hauser@fhnw.ch",
-            "since": "2012"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Oehninger, Markus",
-            "given-name": "Markus",
-            "family-name": "Oehninger",
-            "fields": [
-                "studentische Informatikprojekte",
-                "ERP-Informationssysteme"
-            ],
-            "email": "markus.oehninger@fhnw.ch",
-            "since": "2012"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Wirtschaftsinformatik",
-            "label": "Dornberger, Rolf",
-            "title": "Prof. Dr.-Ing.",
-            "given-name": "Rolf",
-            "family-name": "Dornberger",
-            "home-page": "http://www.fhnw.ch/personen/rolf-dornberger ",
-            "fields": [
-                "Computational Intelligence",
-                "Software Engineering",
-                "Innovation Management"
-            ],
-            "group": "Institute for Information Systems, School of Business",
-            "group-page": "http://www.fhnw.ch/iwi",
-            "email": "rolf.dornberger@fhnw.ch",
-            "since": "2012"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Weck, Wolfgang",
-            "title": "Dr.",
-            "given-name": "Wolfgang",
-            "family-name": "Weck",
-            "home-page": "http://www.fhnw.ch/personen/wolfgang-weck",
-            "fields": [
-                "Software Engineering",
-                "Software Architecture"
-            ],
-            "group": "Institute of Mobile and Distributed Systems",
-            "group-page": "http://www.fhnw.ch/technik/imvs",
-            "email": "wolfgang.weck@fhnw.ch",
-            "since": "2012"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Melchior, Martin",
-            "title": "Dr.",
-            "given-name": "Martin",
-            "family-name": "Melchior",
-            "home-page": "http://www.fhnw.ch/people/martin-melchior",
-            "fields": [
-                "Data analysis",
-                "Large Scientific Data",
-                "Data Management"
-            ],
-            "group": "Institute of 4D Technologies",
-            "group-page": "http://www.i4ds.ch/",
-            "email": "martin.melchior@fhnw.ch",
-            "since": "2012"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Mueller, Ruedi",
-            "title": "",
-            "given-name": "Ruedi",
-            "family-name": "Mueller",
-            "home-page": "http://www.fhnw.ch/people/rued-mueller",
-            "fields": [
-                "Software Systems"
-            ],
-            "group": "Institute of 4D Technologies",
-            "group-page": "http://www.i4ds.ch/",
-            "email": "reudi.mueller@fhnw.ch",
-            "since": "2012"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Denzler, Christoph",
-            "title": "Prof. Dr.",
-            "given-name": "Christoph",
-            "family-name": "Denzler",
-            "home-page": "http://www.fhnw.ch/people/christoph-denzler",
-            "fields": [
-                "Algorithms",
-                "Compiler and Architecture Design",
-                "Mobile and Distributed Software Systems",
-                "Software Architecture",
-                "Programming Languages"
-            ],
-            "group": "Institute of Mobile and Distributed Systems",
-            "group-page": "http://www.fhnw.ch/technik/imvs",
-            "email": "christoph.denzler@fhnw.ch",
-            "since": "2013"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Gysel, Peter",
-            "title": "Prof. Dr.",
-            "given-name": "Peter",
-            "family-name": "Gysel",
-            "home-page": "http://www.fhnw.ch/people/peter-gysel",
-            "fields": [
-                "Communications Technology",
-                "Communication Systems",
-                "Computer Networks",
-                "Security"
-            ],
-            "group": "Institute of Mobile and Distributed Systems",
-            "group-page": "http://www.fhnw.ch/technik/imvs",
-            "email": "peter.gysel@fhnw.ch",
-            "since": "2013"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Kropp, Martin",
-            "title": "Prof.",
-            "given-name": "Martin",
-            "family-name": "Kropp",
-            "home-page": "http://www.fhnw.ch/people/martin-kropp",
-            "fields": [
-                "Software Engineering",
-                "Software Evolution",
-                "SW Development Methodologies",
-                "Test Automation"
-            ],
-            "group": "Institute of Mobile and Distributed Systems",
-            "group-page": "http://www.fhnw.ch/technik/imvs",
-            "email": "martin.kropp@fhnw.ch",
-            "since": "2013"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Lubich, Hannes",
-            "title": "Prof. Dr.",
-            "given-name": "Hannes",
-            "family-name": "Lubich",
-            "home-page": "http://www.fhnw.ch/people/hannes-lubich",
-            "fields": [
-                "ICT System & Service Management",
-                "ICT Risk Management",
-                "Business Continuity Management"
-            ],
-            "group": "Institute of Mobile and Distributed Systems",
-            "group-page": "http://www.fhnw.ch/technik/imvs",
-            "email": "hannes.lubich@fhnw.ch",
-            "since": "2013"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Stamm, Christoph",
-            "title": "Prof. Dr.",
-            "given-name": "Christoph",
-            "family-name": "Stamm",
-            "home-page": "http://www.fhnw.ch/people/christoph-stamm",
-            "fields": [
-                "Algorithms",
-                "Computational Geometry",
-                "Computer Vision",
-                "Image Processing",
-                "Parallel Systems"
-            ],
-            "group": "Institute of Mobile and Distributed Systems",
-            "group-page": "http://www.fhnw.ch/technik/imvs",
-            "email": "christoph.stamm@fhnw.ch",
-            "since": "2013"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Felix, Simon",
-            "title": "",
-            "given-name": "Simon",
-            "family-name": "Felix",
-            "home-page": "http://www.fhnw.ch/personen/simon-felix/",
-            "fields": [
-                "Data mining",
-                "Machine learning",
-                "Algorithms",
-                "Computer vision",
-                "Image processing",
-                "Operating system research",
-                "Game development",
-                "Distributed systems"
-            ],
-            "group": "Institute of 4D Technologies",
-            "group-page": "http://www.i4ds.ch/",
-            "email": "simon.felix@fhnw.ch",
-            "since": "2013"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Stefan, Arisona",
-            "title": "Prof. Dr.",
-            "given-name": "Stefan",
-            "family-name": "Arisona",
-            "home-page": "http://www.fhnw.ch/personen/stefan-arisona/profil",
-            "fields": [
-                "Computer Graphics",
-                "Game Design & Gamification",
-                "Virtual, Augmented and Mixed Reality",
-                "Visualization & Visual Analytics",
-                "Human-Computer Interaction",
-                "Computational Design & Computational Aesthetics"
-            ],
-            "group": "Institute of 4D Technologies",
-            "group-page": "http://www.i4ds.ch/",
-            "email": "stefan.arisona@fhnw.ch",
-            "since": "2016"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Simon, Schubiger",
-            "title": "Prof. Dr.",
-            "given-name": "Simon",
-            "family-name": "Schubiger",
-            "home-page": "http://www.fhnw.ch/personen/simon-schubiger",
-            "fields": [
-                "Computer Graphics",
-                "Game Design",
-                "Audio/Video Processing"
-            ],
-            "group": "Institute of 4D Technologies",
-            "group-page": "http://www.i4ds.ch/",
-            "email": "simon.schubiger@fhnw.ch",
-            "since": "2016"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Samuel, Fricker",
-            "title": "Prof. Dr.",
-            "given-name": "Samuel",
-            "family-name": "Fricker",
-            "home-page": "http://www.fhnw.ch/personen/samuel-fricker",
-            "fields": [
-                "Requirements Engineering",
-                "Project Management"
-            ],
-            "group": "Institute of 4D Technologies",
-            "group-page": "http://www.i4ds.ch/",
-            "email": "samuel.fricker@fhnw.ch",
-            "since": "2016"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Norbert, Seyff",
-            "title": "Prof. Dr.",
-            "given-name": "Norbert",
-            "family-name": "Seyff",
-            "home-page": "http://www.fhnw.ch/personen/norbert-seyff",
-            "fields": [
-                "Requirements Engineering"
-            ],
-            "group": "Institute of 4D Technologies",
-            "group-page": "http://www.i4ds.ch/",
-            "email": "norbert.seyff@fhnw.ch",
-            "since": "2016"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Wyss, Bernhard",
-            "title": "Prof.",
-            "given-name": "Bernhard",
-            "family-name": "Wyss",
-            "home-page": "http://www.fhnw.ch/people/bernhard-wyss",
-            "fields": [
-                "Database systems",
-                "Distributed Information Systems"
-            ],
-            "group": "Institute of Mobile and Distributed Systems",
-            "group-page": "http://www.fhnw.ch/technik/imvs",
-            "email": "bernhard.wyss@fhnw.ch",
-            "since": "2013"
-        },
-        {
-            "file": "fhnw",
-            "org": "FHNW",
-            "inst": "Informatik",
-            "label": "Agotai, Doris",
-            "title": "Dr. sc. ETH",
-            "given-name": "Doris",
-            "family-name": "Agotai",
-            "home-page": "http://www.fhnw.ch/personen/doris-agotai",
-            "fields": [
-                "Interface- and Interaction Design, Usability, UX",
-                "Virtual Reality"
-            ],
-            "group": "Institute of 4D Technologies",
-            "group-page": "http://www.fhnw.ch/technik/i4Ds",
-            "email": "doris.agotai@fhnw.ch",
-            "since": "2013"
-        },
-{
-      "file" : "fho",
-      "org" : "University of Applied Sciences of Eastern Switzerland - FHO",
-      "inst" : "FHSG",
-      "representative" : "yes",
-      "label" : "Reimer, Ulrich",
-      "title" : "Prof. Dr.",
-      "given-name" : "Ulrich",
-      "family-name" : "Reimer",
-      "home-page" : "http://www.fhsg.ch/fhs.nsf/en/research-and-services-institutes-ipm-fhs-staff-contact-details-ulrich-reimer",
-      "fields" : ["Semantic Web", "Knowledge management", "Information retrieval", "Model-driven development", "Reference modeling", "e-Health"],
-      "group" : "Institute for Information and Process Management",
-      "group-page" : "http://www.fhsg.ch/ipm",
-      "email" : "ulrich.reimer@fhsg.ch",
-      "since" : "2007"
-    },
-    {
-      "file" : "fho",
-      "org" : "University of Applied Sciences of Eastern Switzerland - FHO",
-      "inst" : "FHSG",
-      "representative" : "no",
-      "label" : "Maier, Edith",
-      "title" : "Prof. Dr.",
-      "given-name" : "Edith",
-      "family-name" : "Maier",
-      "home-page" : "http://www.fhsg.ch/fhs.nsf/en/research-and-services-institutes-ipm-fhs-staff-contact-details-edith-maier",
-      "fields" : ["e-Health", "HCI", "Knowledge management", "Inter-cultural issues"],
-      "group" : "Institute for Information and Process Management",
-      "group-page" : "http://www.fhsg.ch/ipm",
-      "email" : "Edith.Maier@fhsg.ch",
-      "since" : "2011"
-    },
-    {
-      "file" : "fho",
-      "org" : "University of Applied Sciences of Eastern Switzerland - FHO",
-      "inst" : "FHSG",
-      "representative" : "no",
-      "label" : "Jaeschke, Peter",
-      "title" : "Prof. Dr.",
-      "given-name" : "Peter",
-      "family-name" : "Jaeschke",
-      "home-page" : "http://www.fhsg.ch/fhs.nsf/de/person?OpenDocument&person=peter-jaeschke&",
-      "fields" : ["Requirements Engineering", "Business process management", "IT Governance", "Information management"],
-      "group" : "Institute for Information and Process Management",
-      "group-page" : "http://www.fhsg.ch/ipm",
-      "email" : "peter.jaeschke@fhsg.ch",
-      "since" : "2013"
-    },
-    {
-        "file" : "fho",
-        "org" : "University of Applied Sciences of Eastern Switzerland - FHO",
-        "inst" : "HSR",
-        "representative" : "no",
-        "label" : "Stolze, Markus",
-        "title" : "Prof. Dr.",
-        "given-name" : "Markus",
-        "family-name" : "Stolze",
-        "home-page" : "http://wiki.hsr.ch/MarkusStolze/wiki.cgi?MarkusStolze",
-        "fields" : ["User Interface Technology", "Mobile Systems", "Public Displays", "Human Computer Interaction Design", "Recommender Systems"],
-        "group" : "Institute for Software",
-        "group-page" : "http://www.ifs.hsr.ch/",
-        "email" : "mstolze@hsr.ch",
-        "since" : "2008"
-    },
-    {
-      "file" : "fho",
-      "org" : "University of Applied Sciences of Eastern Switzerland - FHO",
-      "inst" : "HTW Chur",
-      "representative" : "no",
-      "label" : "Studer, Martin",
-      "title" : "Prof.",
-      "given-name" : "Martin",
-      "family-name" : "Studer",
-      "home-page" : " http://www.htwchur.ch/martinstuder.html",
-      "fields" : ["Information Retrieval"],
-      "group" : "Institut für Informations- und Telekommunikationstechnik IKT ",
-      "group-page" : " www.fh-htwchur.ch/ikt",
-      "email" : "martin.studer@htwchur.ch",
-      "since" : "2006"
-    },
-    {
-      "file" : "fho",
-      "org" : "University of Applied Sciences of Eastern Switzerland - FHO",
-      "inst" : "HSR",
-      "representative" : "no",
-      "label" : "Sommerlad, Peter",
-      "title" : "Prof.",
-      "given-name" : "Peter",
-      "family-name" : "Sommerlad",
-      "home-page" : "http://wiki.hsr.ch/PeterSommerlad/",
-      "fields" : ["Software Engineering", "Patterns", "Refactoring", "Test Automation", "C++"],
-      "group" : "Institute for Software",
-      "group-page" : "http://ifs.hsr.ch",
-      "email" : "peter.sommerlad@hsr.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "fho",
-      "org" : "University of Applied Sciences of Eastern Switzerland - FHO",
-      "inst" : "FHSG",
-      "representative" : "no",
-      "label" : "End, Rainer",
-      "title" : "Prof. Dr.",
-      "given-name" : "Rainer",
-      "family-name" : "Endl",
-      "home-page" : "http://www.fhsg.ch/fhs.nsf/de/person?OpenDocument&person=rainer-endl&",
-      "fields" : ["e-health", "Mobile Health", "Business process management", "IT Management", "Strategic Process Management"],
-      "group" : "Institute for Information and Process Management",
-      "group-page" : "http://www.fhsg.ch/ipm",
-      "email" : "rainer.endl@fhsg.ch",
-      "since" : "2014"
-    },
-    {
-      "file" : "fho",
-      "org" : "University of Applied Sciences of Eastern Switzerland - FHO",
-      "inst" : "FHSG",
-      "representative" : "no",
-      "label" : "Thiel, Christian",
-      "title" : "Prof. Dr.",
-      "given-name" : "Christian",
-      "family-name" : "Thiel",
-      "home-page" : "http://www.fhsg.ch/fhs.nsf/de/person?OpenDocument&person=christian-thiel&",
-      "fields" : ["IT Governance", "Information management", "Information Security", "Risk Management", "IT Compliance"],
-      "group" : "Institute for Information and Process Management",
-      "group-page" : "http://www.fhsg.ch/ipm",
-      "email" : "christian.thiel@fhsg.ch",
-      "since" : "2014"
-    },
-{
-      "file" : "hes-so",
-      "org" : "University of Applied Sciences and Arts Western Switzerland - HES-SO",
-      "inst" : "EIA-FR",
-      "label" : "Delley, Antoine",
-      "title" : "Prof.",
-      "given-name" : "Antoine",
-      "family-name" : "Delley",
-      "home-page" : "http://antoine.delley.home.hefr.ch/",
-      "fields" : ["IP Networks", "Security"],
-      "group" : "Ecole d'ing&eacute;nieurs et d'architectes de Fribourg / Hochschule f&uuml;r Technik und Architektur Freiburg",
-      "group-page" : "http://www.eia-fr.ch/",
-      "email" : "antoine.delley@hefr.ch"
-    },
-    {
-      "file" : "hes-so",
-      "org" : "University of Applied Sciences and Arts Western Switzerland - HES-SO",
-      "inst" : "EIA-FR",
-      "representative" : "yes",
-      "label" : "Kuonen, Pierre",
-      "title" : "Prof.",
-      "given-name" : "Pierre",
-      "family-name" : "Kuonen",
-      "home-page" : "http://pierre.kuonen.home.hefr.ch/",
-      "fields" : ["HPC", "Parallel and Distributed Computing"],
-      "email" : "pierre.kuonen@hefr.ch"
-    },
-    {
-      "file" : "hes-so",
-      "org" : "University of Applied Sciences and Arts Western Switzerland - HES-SO",
-      "inst" : "HEIG-VD",
-      "label" : "Robert, Stephan",
-      "title" : "Prof.",
-      "given-name" : "Stephan",
-      "family-name" : "Robert",
-      "home-page" : "http://www.stephan-robert.ch/",
-      "fields" : ["Communication Systems"],
-      "group" : "Ecole d'Ing&eacute;nieurs du Canton de Vaud (EIVD), Yverdon-les-Bains ",
-      "group-page" : "http://www.heig-vd.ch/",
-      "email" : "stephan.robert@eivd.ch",
-      "since" : "Dez 2002"
-    },
-    {
-      "file" : "hes-so",
-      "org" : "University of Applied Sciences and Arts Western Switzerland - HES-SO",
-      "inst" : "HEVs",
-      "label" : "Sciboz, Laurent",
-      "title" : "Prof.",
-      "given-name" : "Laurent",
-      "family-name" : "Sciboz",
-      "home-page" : "http://iig.hevs.ch/valais/laurent-sciboz.html",
-      "fields" : ["Modeling and Design", "Software Engineering"],
-      "group" : "L'institut Informatique de gestion",
-      "group-page" : "http://iig.hevs.ch/",
-      "email" : "laurent.sciboz@hevs.ch",
-      "since" : "Apr-10"
-    },
-{
-      "file" : "hslu",
-      "org" : "Lucerne University of Applied Sciences and Arts - HSLU",
-      "inst" : "HSLU T&A",
-      "label" : "Bürgler, Josef",
-      "title" : "Prof.,Dr.",
-      "given-name" : "Josef",
-      "family-name" : "B&uuml;rgler",
-      "home-page" : "http://www.hslu.ch/hochschule-luzern/h-ueber-uns/h-person.htm?id_person=200017&id_teilschule=25621&row=0",
-      "fields" : ["Simulation", "High Performance Computing"],
-      "group" : "Abteilung Informatik",
-      "group-page" : "http://www.hslu.ch/technik-architektur/t-ausbildung/t-ausbildung_bachelor/t-bachelor_informatik.htm",
-      "email" : "josef.buergler@hslu.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "hslu",
-      "org" : "Lucerne University of Applied Sciences and Arts - HSLU",
-      "inst" : "HSLU T&A",
-      "label" : "Diehl, Roger",
-      "title" : "Dozent",
-      "given-name" : "Roger",
-      "family-name" : "Diehl",
-      "home-page" : "http://www.hslu.ch/hochschule-luzern/h-ueber-uns/h-person.htm?id_person=202059&id_teilschule=25621&row=0",
-      "fields" : ["Concurrent Programming", "Multi Media Computing"],
-      "group" : "Abteilung Informatik",
-      "group-page" : "http://www.hslu.ch/technik-architektur/t-ausbildung/t-ausbildung_bachelor/t-bachelor_informatik.htm",
-      "email" : "roger.diehl@hslu.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "hslu",
-      "org" : "Lucerne University of Applied Sciences and Arts - HSLU",
-      "inst" : "HSLU T&A",
-      "representative" : "yes",
-      "label" : "Diethelm, Hansjoerg",
-      "title" : "Prof.",
-      "given-name" : "Hansj&ouml;rg",
-      "family-name" : "Diethelm",
-      "home-page" : "http://www.hslu.ch/hochschule-luzern/h-ueber-uns/h-person.htm?id_person=204152&id_teilschule=25621&row=0",
-      "fields" : ["Software Engineering", "Simulation"],
-      "group" : "Abteilung Informatik",
-      "group-page" : "http://www.hslu.ch/technik-architektur/t-ausbildung/t-ausbildung_bachelor/t-bachelor_informatik.htm",
-      "email" : "hansjoerg.diethelm@hslu.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "hslu",
-      "org" : "Lucerne University of Applied Sciences and Arts - HSLU",
-      "inst" : "HSLU T&A",
-      "label" : "Hämmerli, Bernhard",
-      "title" : "Prof.,Dr.",
-      "given-name" : "Bernhard",
-      "family-name" : "H&auml;mmerli",
-      "home-page" : "http://www.hslu.ch/hochschule-luzern/h-ueber-uns/h-person.htm?id_person=200025&id_teilschule=25621&row=0",
-      "fields" : ["Information Security", "Computer Networks", "Green IT"],
-      "group" : "Abteilung Informatik",
-      "group-page" : "http://www.hslu.ch/technik-architektur/t-ausbildung/t-ausbildung_bachelor/t-bachelor_informatik.htm",
-      "email" : "bernhard.haemmerli@hslu.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "hslu",
-      "org" : "Lucerne University of Applied Sciences and Arts - HSLU",
-      "inst" : "HSLU T&A",
-      "label" : "Klaper, Martin",
-      "title" : "Prof.",
-      "given-name" : "Martin",
-      "family-name" : "Klaper",
-      "home-page" : "http://www.hslu.ch/hochschule-luzern/h-ueber-uns/h-person.htm?id_person=204812&id_teilschule=25621&row=0",
-      "fields" : ["Computer Science Fundamentals", "Programming Languages", "Software Defined Radio"],
-      "group" : "Abteilung Informatik",
-      "group-page" : "http://www.hslu.ch/technik-architektur/t-ausbildung/t-ausbildung_bachelor/t-bachelor_informatik.htm",
-      "email" : "martin.klaper@hslu.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "hslu",
-      "org" : "Lucerne University of Applied Sciences and Arts - HSLU",
-      "inst" : "HSLU T&A",
-      "label" : "Kurmann, Andreas",
-      "title" : "Prof.",
-      "given-name" : "Andreas",
-      "family-name" : "Kurmann",
-      "home-page" : "http://www.hslu.ch/hochschule-luzern/h-ueber-uns/h-person.htm?id_person=1076115&id_teilschule=25621&row=0",
-      "fields" : ["Project Management", "Modelling/Conception", "IT Operations"],
-      "group" : "Abteilung Informatik",
-      "group-page" : "http://www.hslu.ch/technik-architektur/t-ausbildung/t-ausbildung_bachelor/t-bachelor_informatik.htm",
-      "email" : "andreas.kurmann@hslu.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "hslu",
-      "org" : "Lucerne University of Applied Sciences and Arts - HSLU",
-      "inst" : "HSLU T&A",
-      "label" : "Arnold, Ruedi",
-      "title" : "Dr.",
-      "given-name" : "Ruedi",
-      "family-name" : "Arnold",
-      "home-page" : "http://www.hslu.ch/hochschule-luzern/h-ueber-uns/h-person.htm?id_person=1231634&id_teilschule=25621&row=3",
-      "fields" : ["Mobile Systems", "Education", "Social Media"],
-      "group" : "Distributed Secure Software Systems",
-      "group-page" : "http://www.hslu.ch/technik-architektur/t-forschung-entwicklung/t-forschung_entwicklung_informatik/t-forschung_entwicklung_informations_softwaresicherheit.htm",
-      "email" : "ruedi.arnold@hslu.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "hslu",
-      "org" : "Lucerne University of Applied Sciences and Arts - HSLU",
-      "inst" : "HSLU T&A",
-      "label" : "Hofstetter, Joerg",
-      "title" : "Prof.",
-      "given-name" : "J&ouml;rg",
-      "family-name" : "Hofstetter",
-      "home-page" : "http://www.hslu.ch/hochschule-luzern/h-ueber-uns/h-person.htm?id_person=204803&id_teilschule=25621&row=2",
-      "fields" : ["Software Engineering", "Distributed Secure Software Systems"],
-      "group" : "Distributed Secure Software Systems",
-      "group-page" : "http://www.hslu.ch/technik-architektur/t-forschung-entwicklung/t-forschung_entwicklung_informatik/t-forschung_entwicklung_informations_softwaresicherheit.htm",
-      "email" : "joerg.hofstetter@hslu.ch",
-      "since" : "2003"
-    },
-    {
-      "file" : "hslu",
-      "org" : "Lucerne University of Applied Sciences and Arts - HSLU",
-      "inst" : "HSLU T&A",
-      "label" : "Jud, Martin",
-      "title" : "Prof.",
-      "given-name" : "Martin",
-      "family-name" : "Jud",
-      "home-page" : "http://www.hslu.ch/hochschule-luzern/h-ueber-uns/h-person.htm?id_person=211463&id_teilschule=25621&row=1",
-      "fields" : ["Software Engineering"],
-      "group" : "Distributed Secure Software Systems",
-      "group-page" : "http://www.hslu.ch/technik-architektur/t-forschung-entwicklung/t-forschung_entwicklung_informatik/t-forschung_entwicklung_informations_softwaresicherheit.htm",
-      "email" : "martin.jud@hslu.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "hslu",
-      "org" : "Lucerne University of Applied Sciences and Arts - HSLU",
-      "inst" : "HSLU T&A",
-      "label" : "Koehler, Jana",
-      "title" : " Prof.,Dr.",
-      "given-name" : "Jana",
-      "family-name" : "Koehler",
-      "home-page" : "http://www.hslu.ch/hochschule-luzern/h-ueber-uns/h-person.htm?id_person=1199923&id_teilschule=25621&row=0",
-      "fields" : ["Artificial Intelligence", "Business Process Management", "Enterprise Application Architecture"],
-      "group" : "Distributed Secure Software Systems",
-      "group-page" : "http://www.hslu.ch/technik-architektur/t-forschung-entwicklung/t-forschung_entwicklung_informatik/t-forschung_entwicklung_informations_softwaresicherheit.htm",
-      "email" : "jana.koehler@hslu.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "hslu",
-      "org" : "Lucerne University of Applied Sciences and Arts - HSLU",
-      "inst" : "HSLU T&A",
-      "label" : "Koller, Thomas",
-      "title" : "Dr.",
-      "given-name" : "Thomas",
-      "family-name" : "Koller",
-      "home-page" : "http://www.hslu.ch/hochschule-luzern/h-ueber-uns/h-person.htm?id_person=208566&id_teilschule=25621&row=1",
-      "fields" : ["Visual Computing", "Software Engineering", "Image Processing"],
-      "group" : "Distributed Secure Software Systems",
-      "group-page" : "http://www.hslu.ch/technik-architektur/t-forschung-entwicklung/t-forschung_entwicklung_informatik/t-forschung_entwicklung_informations_softwaresicherheit.htm",
-      "email" : "thomas.koller@hslu.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "hslu",
-      "org" : "Lucerne University of Applied Sciences and Arts - HSLU",
-      "inst" : "HSLU T&A",
-      "label" : "Meier, René",
-      "title" : "Prof.,Dr.",
-      "given-name" : "René",
-      "family-name" : "Meier",
-      "home-page" : "http://www.hslu.ch/hochschule-luzern/h-ueber-uns/h-person.htm?id_person=1231632&id_teilschule=25621&row=0",
-      "fields" : ["Mobile Systems", "Software Architecture", "International Transportation Systems"],
-      "group" : "Distributed Secure Software Systems",
-      "group-page" : "http://www.hslu.ch/technik-architektur/t-forschung-entwicklung/t-forschung_entwicklung_informatik/t-forschung_entwicklung_informations_softwaresicherheit.htm",
-      "email" : "rene.meier@hslu.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "hslu",
-      "org" : "Lucerne University of Applied Sciences and Arts - HSLU",
-      "inst" : "HSLU T&A",
-      "label" : "Olnhoff, Thomas",
-      "title" : " Prof.,Dr.",
-      "given-name" : "Thomas",
-      "family-name" : "Olnhoff",
-      "home-page" : "http://www.hslu.ch/hochschule-luzern/h-ueber-uns/h-person.htm?id_person=204845&id_teilschule=25621&row=0",
-      "fields" : ["Data Management", "Data Analysis"],
-      "group" : "Distributed Secure Software Systems",
-      "group-page" : "http://www.hslu.ch/technik-architektur/t-forschung-entwicklung/t-forschung_entwicklung_informatik/t-forschung_entwicklung_informations_softwaresicherheit.htm",
-      "email" : "thomas.olnhoff@hslu.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "hslu",
-      "org" : "Lucerne University of Applied Sciences and Arts - HSLU",
-      "inst" : "HSLU T&A",
-      "label" : "Portmann, Roland",
-      "title" : "Prof.",
-      "given-name" : "Roland",
-      "family-name" : "Portmann",
-      "home-page" : "http://www.hslu.ch/hochschule-luzern/h-ueber-uns/h-person.htm?id_person=200003&id_teilschule=25621&row=0",
-      "fields" : ["Information Security", "IT Infrastructure"],
-      "group" : "Distributed Secure Software Systems",
-      "group-page" : "http://www.hslu.ch/technik-architektur/t-forschung-entwicklung/t-forschung_entwicklung_informatik/t-forschung_entwicklung_informations_softwaresicherheit.htm",
-      "email" : "roland.portmann@hslu.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "hslu",
-      "org" : "Lucerne University of Applied Sciences and Arts - HSLU",
-      "inst" : "HSLU T&A",
-      "label" : "Pouly, Marc",
-      "title" : "Dr.",
-      "given-name" : "Marc",
-      "family-name" : "Pouly",
-      "home-page" : "http://www.hslu.ch/hochschule-luzern/h-ueber-uns/h-person.htm?id_person=1231629&id_teilschule=25621&row=0",
-      "fields" : ["Artificial Intelligence", "Mobile Systems", "Information Security"],
-      "group" : "Distributed Secure Software Systems",
-      "group-page" : "http://www.hslu.ch/technik-architektur/t-forschung-entwicklung/t-forschung_entwicklung_informatik/t-forschung_entwicklung_informations_softwaresicherheit.htm",
-      "email" : "marc.pouly@hslu.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "hslu",
-      "org" : "Lucerne University of Applied Sciences and Arts - HSLU",
-      "inst" : "HSLU T&A",
-      "label" : "Sollberger, Peter",
-      "title" : "Prof.",
-      "given-name" : "Peter",
-      "family-name" : "Sollberger",
-      "home-page" : "http://www.hslu.ch/hochschule-luzern/h-ueber-uns/h-person.htm?id_person=213499&id_teilschule=25621&row=0",
-      "fields" : ["Software Engineering", "Wireless Sensor Networks"],
-      "group" : "Distributed Secure Software Systems",
-      "group-page" : "http://www.hslu.ch/technik-architektur/t-forschung-entwicklung/t-forschung_entwicklung_informatik/t-forschung_entwicklung_informations_softwaresicherheit.htm",
-      "email" : "peter.sollberger@hslu.ch",
-      "since" : "2012"
-    },
-{
-               "file" : "supsi",
-               "org" : "University of Applied Sciences and Arts of Southern Switzerland - SUPSI",
-               "inst" : "DTI",
-               "representative" : "yes",
-               "label" : "Pedrazzini, Sandro",
-               "title" : "Prof Dr",
-               "given-name" : "Sandro",
-               "family-name" : "Pedrazzini",
-               "fields" : ["Software Engineering", "SW Development Methodologies"],
-               "email" : "sandro.pedrazzini@supsi.ch",
-               "since" : "2012"
-               },
-               
-               {
-               "file" : "supsi",
-               "org" : "University of Applied Sciences and Arts of Southern Switzerland - SUPSI",
-               "inst" : "DTI",
-               "representative" : "no",
-               "label" : "Ravano, Giambattista",
-               "title" : "Prof ",
-               "given-name" : "Giambattista",
-               "family-name" : "Ravano",
-               "fields" : ["Software Engineering"],
-               "email" : "giambattista.ravano@supsi.ch",
-               "since" : "2012"
-               },
-               
-               {
-               "file" : "supsi",
-               "org" : "University of Applied Sciences and Arts of Southern Switzerland - SUPSI",
-               "inst" : "DTI",
-               "representative" : "no",
-               "label" : "Gambardella, Luca",
-               "title" : "Prof ",
-               "given-name" : "Luca",
-               "family-name" : "Gambardella",
-               "fields" : ["Artificial Intelligence", "Optimization"],
-               "email" : "luca.gambardella@supsi.ch",
-               "since" : "2012"
-               },
-               
-               {
-               "file" : "supsi",
-               "org" : "University of Applied Sciences and Arts of Southern Switzerland - SUPSI",
-               "inst" : "DTI",
-               "representative" : "no",
-               "label" : "Zaffalon, Marco",
-               "title" : "Prof Dr.",
-               "given-name" : "Marco",
-               "family-name" : "Zaffalon",
-               "home-page" : "http://www.idsia.ch/~zaffalon/",
-               "fields" : ["Artificial Intelligence", "Data mining","Imprecise Probabilities"],
-               "email" : "marco.zaffalon@supsi.ch",
-               "since" : "2012"
-               },
-               
-               {
-               "file" : "supsi",
-               "org" : "University of Applied Sciences and Arts of Southern Switzerland - SUPSI",
-               "inst" : "DTI",
-               "representative" : "no",
-               "label" : "Sommaruga, Lorenzo",
-               "title" : "Prof Dr.",
-               "given-name" : "Lorenzo",
-               "family-name" : "Sommaruga",
-               "fields" : ["Web Systems"],
-               "email" : "lorenzo.sommaruga@supsi.ch",
-               "since" : "2012"
-               },
-               
-               {
-               "file" : "supsi",
-               "org" : "University of Applied Sciences and Arts of Southern Switzerland - SUPSI",
-               "inst" : "DTI",
-               "representative" : "no",
-               "label" : "Rizzoli, Andrea",
-               "title" : "Prof Dr.",
-               "given-name" : "Andrea Emilio",
-               "family-name" : "Rizzoli",
-               "home-page" : "http://www.idsia.ch/~andrea/",
-               "fields" : ["Artificial Intelligence", "Simulation","Environmental Systems"],
-               "email" : "andrea.rizzoli@supsi.ch",
-               "since" : "2012"
-               },
-               
-               {
-               "file" : "supsi",
-               "org" : "University of Applied Sciences and Arts of Southern Switzerland - SUPSI",
-               "inst" : "DTI",
-               "representative" : "no",
-               "label" : "Mastropietro, Roberto",
-               "title" : "Dott",
-               "given-name" : "Roberto",
-               "family-name" : "Mastropietro",
-               "fields" : ["ICT Architectures"],
-               "email" : "roberto.mastropietro@supsi.ch",
-               "since" : "2012"
-               },
-{
-      "file" : "unibas",
-      "org" : "University of Basel",
-      "inst" : "Departement Mathematik und Informatik",
-      "label" : "Burkhart, Helmar",
-      "title" : "Prof.",
-      "given-name" : "Helmar",
-      "family-name" : "Burkhart",
-      "home-page" : "http://informatik.unibas.ch/personen/burkhart_h.html",
-	"fields" : ["Parallel and Distributed Processing", "Web Technologies", "E-learning", "Software Development", "High Performance Computing"],
-      "group" : "High Performance and Web Computing",
-      "group-page" : "http://fgb.informatik.unibas.ch/index.html",
-      "email" : "helmar.burkhart@unibas.ch"
-    },
-    {
-      "file" : "unibas",
-      "org" : "University of Basel",
-      "inst" : "Departement Mathematik und Informatik",
-      "label" : "Helmert, Malte",
-      "title" : "Prof.",
-      "given-name" : "Malte",
-      "family-name" : "Helmert",
-      "home-page" : "http://ai.cs.unibas.ch/people/helmert/index.html",
-      "fields" : ["Artificial Intelligence", "Algorithms and tools for intelligent problem solving"],
-      "group" : "Artificial Intelligence",
-      "group-page" : "http://ci.cs.unibas.ch/",
-      "email" : "malte.helmert@unibas.ch",
-      "since" : "2011"
-    },
-    {
-      "file" : "unibas",
-      "org" : "University of Basel",
-      "inst" : "Departement Mathematik und Informatik",
-      "label" : "Roth, Volker",
-      "title" : "Prof.",
-      "given-name" : "Volker",
-      "family-name" : "Roth",
-      "home-page" : "http://informatik.unibas.ch/personen/roth_volker/roth_v.html",
-      "fields" : ["Machine Learning", "Data Analysis", "Clustering", "Kernel-based algorithms", "Feature Selection and Data Fusion"],
-      "group" : "Biomedical Data Analysis Group",
-      "group-page" : "http://informatik.unibas.ch/personen/roth_volker/BiomedicalDataGroup.html",
-      "email" : "volker.roth@unibas.ch",
-      "since" : "2007"
-    },
-    {
-      "file" : "unibas",
-      "org" : "University of Basel",
-      "inst" : "Departement Mathematik und Informatik",
-      "label" : "Schuldt, Heiko",
-      "title" : "Prof.",
-      "given-name" : "Heiko",
-      "family-name" : "Schuldt",
-      "home-page" : "http://dbis.cs.unibas.ch/team/heiko-schuldt/dbis_staff_view",
-      "fields" : ["Cloud & Grid Computing", "Digital Libraries", "Services & Workflows", "e-Health", "e-Science"],
-      "group" : "Databases and Information Systems",
-      "group-page" : "http://dbis.cs.unibas.ch/",
-      "email" : "heiko.schuldt@unibas.ch",
-      "since" : "2006"
-    },
-    {
-      "file" : "unibas",
-      "org" : "University of Basel",
-      "inst" : "Deptartement Mathematik und Informatik",
-      "representative" : "yes",
-      "label" : "Tschudin, Christian",
-      "title" : "Prof.",
-      "given-name" : "Christian",
-      "family-name" : "Tschudin",
-      "home-page" : "http://cn.cs.unibas.ch/people/cft/",
-	"fields" : ["Mobile code", "Wireless networking", "Security", "Network Architecture"],
-      "group" : "Computer Networks",
-      "group-page" : "http://cn.cs.unibas.ch/",
-	"email" : "christian.tschudin@unibas.ch",
-	"since" : "2002"
-    },
-    {
-      "file" : "unibas",
-      "org" : "University of Basel",
-      "inst" : "Departement Mathematik und Informatik",
-      "label" : "Vetter, Thomas",
-      "title" : "Prof.",
-      "given-name" : "Thomas",
-      "family-name" : "Vetter",
-      "home-page" : "http://informatik.unibas.ch/personen/vetter_t.html",
-      "fields" : ["Automated Image Understanding"],
-      "group" : "Graphics and Vision Research Group",
-      "group-page" : "http://gravis.cs.unibas.ch/",
-	"email" : "thomas.vetter@unibas.ch",
-	"since" : "2002"
-    },
-{
-      "file" : "unibe",
-      "org" : "University of Bern",
-      "inst" : "INF",
-      "role" : ["Emeritus"],
-      "label" : "Bieri, Hanspeter",
-      "title" : "Prof.",
-      "given-name" : "Hanspeter",
-      "family-name" : "Bieri",
-      "home-page" : "http://cgg.unibe.ch/staff/alumni/hanspeter-bieri/",
-      "email" : "bieri@inf.unibe.ch"
-    },
-    {
-      "file" : "unibe",
-      "org" : "University of Bern",
-      "inst" : "INF",
-      "role" : ["Emeritus"],
-      "label" : "Bunke, Horst",
-      "title" : "Prof.",
-      "given-name" : "Horst",
-      "family-name" : "Bunke",
-      "home-page" : "http://www.inf.unibe.ch/~fkiwww/staff/bunke.html",
-      "email" : "bunke@inf.unibe.ch"
-    },
-    {
-      "file" : "unibe",
-      "org" : "University of Bern",
-      "inst" : "INF",
-      "label" : "Braun, Torsten",
-      "title" : "Prof.",
-      "given-name" : "Torsten",
-      "family-name" : "Braun",
-      "home-page" : "http://www.inf.unibe.ch/~braun/",
-      "fields" : ["Wired/Wireless Networks", "Distributed Systems"],
-      "group" : "Communication and Distributed Systems",
-      "group-page" : "http://cds.unibe.ch/",
-      "email" : "braun@inf.unibe.ch"
-    },
-    {
-     "file" : "unibe",
-     "org" : "University of Bern",
-     "inst" : "INF",
-     "label" : "Favaro, Paolo",
-     "title" : "Prof.",
-     "given-name" : "Paolo",
-     "family-name" : "Favaro",
-     "home-page" : "http://home.eps.hw.ac.uk/~pf21/",
-     "fields" : ["Computer Vision, Machine Learning, Blind Deconvolution, Optimization Methods"],
-     "group" : "Computer Vision Group",
-     "email" : "paolo.favaro@inf.unibe.ch"
-    },
-    {
-      "file" : "unibe",
-      "org" : "University of Bern",
-      "inst" : "INF",
-      "label" : "Jaeger, Gerhard",
-      "title" : "Prof.",
-      "given-name" : "Gerhard",
-      "family-name" : "J&auml;ger",
-      "home-page" : "http://www.inf.unibe.ch/ltg/staff/jaeger",
-      "fields" : ["Logic", "Theory"],
-      "group" : "Logic and Theory Group",
-      "group-page" : "http://www.ltg.unibe.ch",
-      "email" : "jaeger@inf.unibe.ch"
-    },
-    {
-      "file" : "unibe",
-      "org" : "University of Bern",
-      "inst" : "INF",
-      "label" : "Strahm, Thomas",
-      "title" : "Prof.",
-      "given-name" : "Thomas",
-      "family-name" : "Strahm",
-      "home-page" : "http://www.inf.unibe.ch/~strahm/",
-      "fields" : ["Logic", "Theory"],
-      "group" : "Logic and Theory Group",
-      "group-page" : "http://www.ltg.unibe.ch",
-      "email" : "strahm@inf.unibe.ch",
-      "since" : "2007"
-    },
-    {
-      "file" : "unibe",
-      "org" : "University of Bern",
-      "inst" : "INF",
-      "label" : "Studer, Thomas",
-      "title" : "Prof.",
-      "given-name" : "Thomas",
-      "family-name" : "Studer",
-      "home-page" : "http://www.inf.unibe.ch/~tstuder/",
-      "fields" : ["Proof theory", "Modal logics", "Data privacy"],
-      "group" : "Logic and Theory Group",
-      "group-page" : "http://www.ltg.unibe.ch",
-      "email" : "tstuder@inf.unibe.ch",
-      "since" : "2013"
-    },
-    {
-      "file" : "unibe",
-      "org" : "University of Bern",
-      "inst" : "INF",
-      "role" : ["Board member"],
-      "representative" : "yes",
-      "label" : "Nierstrasz, Oscar",
-      "title" : "Prof.",
-      "given-name" : "Oscar",
-      "family-name" : "Nierstrasz",
-      "home-page" : "http://scg.unibe.ch/staff/oscar",
-      "fields" : ["Software Evolution", "Software Engineering", "Programming Languages"],
-      "group" : "Software Composition Group",
-      "group-page" : "http://scg.unibe.ch/",
-      "email" : "oscar@inf.unibe.ch"
-    },
-    {
-      "file" : "unibe",
-      "org" : "University of Bern",
-      "inst" : "INF",
-      "label" : "Zwicker, Matthias",
-      "title" : "Prof.",
-      "given-name" : "Matthias",
-      "family-name" : "Zwicker",
-      "home-page" : "http://cgg.unibe.ch/staff/matthias-zwicker",
-      "fields" : ["Computer Graphics"],
-      "group" : "Computer Graphics Group",
-      "group-page" : "http://cgg.unibe.ch/",
-      "email" : "zwicker@inf.unibe.ch",
-      "since" : "2008"
-    },
-    {
-      "file" : "unibe",
-      "org" : "University of Bern",
-      "inst" : "IWI",
-      "label" : "Portmann, Edy",
-      "title" : "Prof.",
-      "given-name" : "Edy",
-      "family-name" : "Portmann",
-      "home-page" : "http://www.iwi.unibe.ch/ueber_uns/personen/prof_dr_portmann_edy/index_ger.html",
-      "fields" : ["Information Management"],
-      "email" : "edy.portmann@iwi.unibe.ch",
-      "since" : "2013"
-    },
-    {
-      "file" : "unibe",
-      "org" : "University of Bern",
-      "inst" : "IWI",
-      "label" : "Krancher, Oliver",
-      "title" : "Prof.",
-      "given-name" : "Oliver",
-      "family-name" : "Krancher",
-      "home-page" : "http://www.iwi.unibe.ch/ueber_uns/personen/prof_dr_krancher_oliver/index_ger.html",
-      "fields" : ["Information Engineering"],
-      "email" : "oliver.krancher@iwi.unibe.ch",
-      "since" : "2013"
-    },
-    {
-      "file" : "unibe",
-      "org" : "University of Bern",
-      "inst" : "IWI",
-      "label" : "Dibbern, Jens",
-      "title" : "Prof.",
-      "given-name" : "Jens",
-      "family-name" : "Dibbern",
-      "home-page" : "http://www.iwi.unibe.ch/ueber_uns/personen/prof_dr_dibbern_jens/index_ger.html",
-      "fields" : ["Information Engineering"],
-      "email" : "jens.dibbern@iwi.unibe.ch",
-      "since" : "2009"
-    },
-    {
-      "file" : "unibe",
-      "org" : "University of Bern",
-      "inst" : "IWI",
-      "role" : ["Emeritus"],
-      "label" : "Knolmayer, Gerhard",
-      "title" : "Prof.",
-      "given-name" : "Gerhard",
-      "family-name" : "Knolmayer",
-      "home-page" : "http://www.iwi.unibe.ch/ueber_uns/personen/prof_em_dr_knolmayer_gerhard/index_ger.html",
-      "fields" : ["Sourcing", "Supply Chain Management", "Compliance", "Usability"],
-      "email" : "gerhard.knolmayer@iwi.unibe.ch"
-    },
-    {
-      "file" : "unibe",
-      "org" : "University of Bern",
-      "inst" : "IWI",
-      "label" : "Myrach, Thomas",
-      "title" : "Prof.",
-      "given-name" : "Thomas",
-      "family-name" : "Myrach",
-      "home-page" : "http://www.iwi.unibe.ch/ueber_uns/personen/prof_dr_myrach_thomas/index_ger.html",
-      "fields" : ["Information Engineering"],
-      "email" : "thomas.myrach@iwi.unibe.ch"
-    },
-    {
-      "file" : "unibe",
-      "org" : "University of Bern",
-      "inst" : "INF",
-      "role" : ["Emeritus"],
-      "label" : "Mey, Hansjuerg",
-      "title" : "Prof.",
-      "given-name" : "Hansj&uuml;rg",
-      "family-name" : "Mey",
-      "email" : "h.mey@bluewin.ch"
-    },
-   {
-      "file" : "unifr",
-      "org" : "University of Fribourg",
-      "inst" : "Dept. Informatics",
-      "role" : ["Emeritus"],
-      "label" : "Haettenschwiler, Pius",
-      "title" : "Prof.",
-      "given-name" : "Pius",
-      "family-name" : "H&auml;ttenschwiler",
-      "home-page" : "http://diuf.unifr.ch/ds/pius.haettenschwiler/",
-      "email" : "pius.haettenschwiler@unifr.ch"
-    },
-    {
-      "file" : "unifr",
-      "org" : "University of Fribourg",
-      "inst" : "Dept. Informatics",
-      "role" : ["Emeritus"],
-      "label" : "Kohlas, Juerg",
-      "title" : "Prof.",
-      "given-name" : "J&uuml;rg",
-      "family-name" : "Kohlas",
-      "home-page" : "http://diuf.unifr.ch/main/tns/juerg_kohlas",
-      "email" : "juerg.kohlas@unifr.ch"
-    },
-    {
-      "file" : "unifr",
-      "org" : "University of Fribourg",
-      "inst" : "Dept. Informatics",
-      "label" : "Donze, Laurent",
-      "title" : "Prof.",
-      "given-name" : "Laurent",
-      "family-name" : "Donz&eacute;",
-      "home-page" : "http://www.unifr.ch/stat/de/coll/prof/donze",
-      "email" : "laurent.donze@unifr.ch"
-    },
-    {
-      "file" : "unifr",
-      "org" : "University of Fribourg",
-      "inst" : "Dept. Informatics",
-      "label" : "Lalanne, Denis",
-      "title" : "Prof.",
-      "given-name" : "Denis",
-      "family-name" : "Lalanne",
-      "home-page" : "http://diuf.unifr.ch/people/lalanned/",
-      "fields" : ["Human-Computer Interaction"],
-      "group" : "Human-IST Center ",
-      "group-page" : "http://human-ist.unifr.ch",
-      "email" : "denis.lalanne@unifr.ch"
-    },
-    {
-      "file" : "unifr",
-      "org" : "University of Fribourg",
-      "inst" : "Dept. Informatics",
-      "label" : "Groeflin, Heinz",
-      "title" : "Prof.",
-      "given-name" : "Heinz",
-      "family-name" : "Gr&ouml;flin",
-      "home-page" : "http://www.unifr.ch/informatics/departement/professoren/groeflin/default_f.php",
-      "fields" : ["Decision Support"],
-      "group" : "Decision Support Group ",
-      "group-page" : "http://diuf.unifr.ch/ds/",
-      "email" : "heinz.groeflin@unifr.ch"
-    },
-    {
-      "file" : "unifr",
-      "org" : "University of Fribourg",
-      "inst" : "Dept. Informatics",
-      "label" : "Widmer, Marino",
-      "title" : "Prof.",
-      "given-name" : "Marino",
-      "family-name" : "Widmer",
-      "home-page" : "http://diuf.unifr.ch/ds/marino.widmer/",
-      "fields" : ["Simulation", "CIM"],
-      "group" : "Decision Support Group ",
-      "group-page" : "http://diuf.unifr.ch/ds/",
-      "email" : "marino.widmer@unifr.ch"
-    },
-    {
-      "file" : "unifr",
-      "org" : "University of Fribourg",
-      "inst" : "Dept. Informatics",
-      "label" : "Hirsbrunner, Beat",
-      "title" : "Prof.",
-      "given-name" : "B&eacute;at",
-      "family-name" : "Hirsbrunner",
-      "home-page" : "http://diuf.unifr.ch/people/hirsbrun/",
-      "fields" : ["Pervasive and Artificial Intelligence"],
-      "group" : "Pervasive and Artificial Intelligence",
-      "group-page" : "http://diuf.unifr.ch/pai/",
-      "email" : "beat.hirsbrunner@unifr.ch"
-    },
-    {
-      "file" : "unifr",
-      "org" : "University of Fribourg",
-      "inst" : "Dept. Informatics",
-      "label" : "Ingold, Rolf",
-      "title" : "Prof.",
-      "given-name" : "Rolf",
-      "family-name" : "Ingold",
-      "home-page" : "http://diuf.unifr.ch/diva/web/site/index.php/home-people/2-people/16-rolf-ingolds-home-page",
-      "fields" : ["Document Analysis", "Image Analysis", "Voice Analysis"],
-      "group" : "Document, Image and Voice Analysis",
-      "group-page" : "http://diuf.unifr.ch/diva/siteDIVA04/html/home.html",
-      "email" : "rolf.ingold@unifr.ch"
-    },
-    {
-      "file" : "unifr",
-      "org" : "University of Fribourg",
-      "inst" : "Dept. Informatics",
-      "label" : "Cudre, Mauroux, Philippe",
-      "title" : "Prof.",
-      "given-name" : "Philippe",
-      "family-name" : "Cudr&eacute;-Mauroux",
-      "home-page" : "http://diuf.unifr.ch/main/xi/",
-      "fields" : ["Exascale Information Management"],
-      "group" : "Exascale Information Management",
-      "group-page" : "http://diuf.unifr.ch/main/xi/",
-      "email" : "pcm@unifr.ch",
-      "since" : "2011"
-    },
-    {
-      "file" : "unifr",
-      "org" : "University of Fribourg",
-      "inst" : "Dept. Informatics",
-      "label" : "Meier (UNIFR), Andreas",
-      "title" : "Prof.",
-      "given-name" : "Andreas",
-      "family-name" : "Meier",
-      "home-page" : "http://diuf.unifr.ch/is/andreas_meier",
-      "fields" : ["Information Systems"],
-      "group" : "Information Systems",
-      "group-page" : "http://diuf.unifr.ch/is/home.php",
-      "email" : "andreas.meier@unifr.ch"
-    },
-    {
-      "file" : "unifr",
-      "org" : "University of Fribourg",
-      "inst" : "Dept. Informatics",
-      "representative" : "yes",
-      "label" : "Ultes, Nitsche, Ulrich",
-      "title" : "Prof.",
-      "given-name" : "Ulrich",
-      "family-name" : "Ultes-Nitsche",
-      "home-page" : "http://diuf.unifr.ch/people/uun/",
-      "fields" : ["Formal Methods", "Automata Theory", "Logic"],
-      "group" : "Telecommunications, Networks & Security",
-      "group-page" :"http://diuf.unifr.ch/people/uun/",
-      "email" : "uun@unifr.ch",
-      "since" : "2004"
-    },
-    {
-      "file" : "unifr",
-      "org" : "University of Fribourg",
-      "inst" : "Dept. Informatics",
-      "label" : "Pasquier, Jacques",
-      "title" : "Prof.",
-      "given-name" : "Jacques",
-      "family-name" : "Pasquier",
-      "home-page" : "http://diuf.unifr.ch/people/pasquiej/",
-      "fields" : ["Software Engineering"],
-      "group" : "Software Engineering",
-      "group-page" : "http://diuf.unifr.ch/softeng/",
-      "email" : "jacques.pasquier@unifr.ch"
-    },
-    {
-      "file" : "unifr",
-      "org" : "University of Fribourg",
-      "inst" : "IIMT",
-      "label" : "Teufel, Stephanie",
-      "title" : "Prof.",
-      "given-name" : "Stephanie",
-      "family-name" : "Teufel",
-      "home-page" : "http://www.iimt.ch/index.php?id=140",
-      "fields" : ["Information management", "decision Support"],
-      "group" : "International Institute of Management in Technology",
-      "group-page" : "http://www.iimt.ch/",
-      "email" : "stephanie.teufel@unifr.ch"
-    },
-{
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "role" : ["Emeritus"],
-      "label" : "Harms, Juergen",
-      "title" : "Prof.",
-      "given-name" : "J&uuml;rgen",
-      "family-name" : "Harms",
-      "email" : "Juergen.Harms@unige.ch"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "role" : ["Emeritus"],
-      "label" : "Levrat, Bernard",
-      "title" : "Prof.",
-      "given-name" : "Bernard",
-      "family-name" : "Levrat",
-      "home-page" : "http://cui.unige.ch/~levrat/",
-      "email" : "Bernard.Levrat@unige.ch"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "role" : ["Emeritus"],
-      "label" : "Tsichritzis, Dennis",
-      "title" : "Prof.",
-      "given-name" : "Dennis",
-      "family-name" : "Tsichritzis",
-      "email" : "Dennis.Tsichritzis@unige.ch"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "role" : ["Emeritus"],
-      "label" : "Morel, Raymond",
-      "title" : "Prof.",
-      "given-name" : "Raymond",
-      "family-name" : "Morel",
-      "email" : "raymond.morel@unige.ch"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Rolim, Jose",
-      "title" : "Prof.",
-      "given-name" : "Jos&eacute;",
-      "family-name" : "Rolim",
-      "home-page" : "http://tcs.unige.ch/doku.php/user/rolim",
-      "fields" : ["Theoretical Computer Science"],
-      "group" : "Theoretical Computer Science",
-      "group-page" : "http://cui.unige.ch/tcs/index.html",
-      "email" : "Jose.Rolim@unige.ch"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Leone, Pierre",
-      "title" : "Prof.",
-      "given-name" : "Pierre",
-      "family-name" : "Leonne",
-      "home-page" : "http://tcs.unige.ch/doku.php/user/leone",
-      "fields" : ["Theoretical Computer Science", "Sensor Networks"],
-      "group" : "Theoretical Computer Science",
-      "group-page" : "http://cui.unige.ch/tcs/index.html",
-      "email" : "pierre.leone@unige.ch"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "role" : ["Emeritus"],
-      "label" : "Pellegrini, Christian",
-      "title" : "Prof.",
-      "given-name" : "Christian",
-      "family-name" : "Pellegrini",
-      "home-page" : "http://cui.unige.ch/AI-group/home.html",
-      "fields" : ["Artificial Intelligence"],
-      "group" : "Artificial Intelligence Group",
-      "group-page" : "http://cui.unige.ch/AI-group/home.html",
-      "email" : "Christian.Pellegrini@unige.ch"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Buchs, Didier",
-      "title" : "Prof.",
-      "given-name" : "Didier",
-      "family-name" : "Buchs",
-      "home-page" : "http://smv.unige.ch/tiki-index.php",
-      "fields" : ["Software Modelling", "Verification"],
-      "group" : "Software Modelling and Verification",
-      "group-page" : "http://smv.unige.ch/tiki-index.php",
-      "email" : "didier.buchs@unige.ch"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Pun, Thierry",
-      "title" : "Prof.",
-      "given-name" : "Thierry",
-      "family-name" : "Pun",
-      "home-page" : "http://cvml.unige.ch/doku.php/members/thierrypun",
-      "fields" : ["Computer Vision"],
-      "group" : "Computer Vision Group",
-      "group-page" : "http://cvml.unige.ch/",
-      "email" : "Thierry.Pun@unige.ch"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Voloshynovskiy, Sviatoslav",
-      "title" : "Prof.",
-      "given-name" : "Sviatoslav",
-      "family-name" : "Voloshynovskiy",
-      "home-page" : "http://cvml.unige.ch/doku.php/members/sviatoslavvoloshynovskiy",
-      "fields" : ["Computer Vision"],
-      "group" : "Computer Vision Group",
-      "group-page" : "http://cvml.unige.ch/",
-      "email" : "Svyatoslav.Voloshynovskyy@unige.ch",
-      "since" : "2008"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Koval, Oleksiy",
-      "title" : "Prof.",
-      "given-name" : "Oleksiy",
-      "family-name" : "Koval",
-      "home-page" : "http://cvml.unige.ch/doku.php/members/oleksiykoval",
-      "fields" : ["Computer Vision", "Stochastic Information Processing"],
-      "group" : "Computer Vision Group",
-      "group-page" : "http://cvml.unige.ch/",
-      "email" : "Oleksiy.Koval@unige.ch",
-      "since" : "2010"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Leonard, Michel",
-      "title" : "Prof.",
-      "given-name" : "Michel",
-      "family-name" : "L&eacute;onard",
-      "home-page" : "http://matis.unige.ch/Members/leonard",
-      "fields" : ["Databases"],
-      "group" : "Database Research Group",
-      "group-page" : "http://matis.unige.ch/",
-      "email" : "Michel.Leonard@unige.ch"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Magnenat, Thalmann, Nadia",
-      "title" : "Prof.",
-      "given-name" : "Nadia",
-      "family-name" : "Magnenat-Thalmann",
-      "home-page" : "http://www.miralab.unige.ch/",
-      "fields" : ["Computer Graphics"],
-      "group" : "MIRALab Computer Graphics & Animation Group",
-      "group-page" : "http://miralabwww.unige.ch/",
-      "email" : "Nadia.Thalmann@unige.ch"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Wehrli, Eric",
-      "title" : "Prof.",
-      "given-name" : "Eric",
-      "family-name" : "Wehrli",
-      "home-page" : "http://www.latl.unige.ch/personal/eric_f.html",
-      "fields" : ["Language Analysis"],
-      "group" : "Laboratory For Language Analysis And Technology",
-      "group-page" : "http://www.latl.unige.ch/",
-      "email" : "Eric.Wehrli@lettres.unige.ch"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Merlo, Paola",
-      "title" : "Prof.",
-      "given-name" : "Merlo",
-      "family-name" : "Paola",
-      "home-page" : "http://www.latl.unige.ch/personal/paola.html",
-      "fields" : ["Language Analysis"],
-      "group" : "Laboratory For Language Analysis And Technology",
-      "group-page" : "http://www.latl.unige.ch/",
-      "email" : "Paola.Merlo@unige.ch"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Chopard, Bastien",
-      "title" : "Prof.",
-      "given-name" : "Bastien",
-      "family-name" : "Chopard",
-      "home-page" : "http://cui.unige.ch/~chopard/",
-      "fields" : ["Scientific and Parallel Computing"],
-      "group" : "Scientific and Parallel Computing Group",
-      "group-page" : "http://spc.unige.ch/",
-      "email" : "Bastien.Chopard@unige.ch"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Konstantas, Dimitri",
-      "title" : "Prof.",
-      "given-name" : "Dimitri",
-      "family-name" : "Konstantas",
-      "home-page" : "http://asg.unige.ch/index.php?cat=team&id=001&name=Dimitri%20Konstantas",
-      "fields" : ["Multimedia", "Mobile health", "e-Commerce"],
-      "group" : "Advanced Systems Group",
-      "group-page" : "http://asg.unige.ch/index.php",
-      "email" : "dimitri.konstantas@unige.ch",
-      "since" : "2004"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Falquet, Gilles",
-      "title" : "Prof.",
-      "given-name" : "Gilles",
-      "family-name" : "Falquet",
-      "home-page" : "http://cui.unige.ch/~falquet",
-      "fields" : ["Knowledge engineering"],
-      "group" : "Knowledge engineering",
-      "group-page" : "http://www.unige.ch/icle",
-      "email" : "gilles.falquet@unige.ch",
-      "since" : "2006"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Metral, Claudine",
-      "title" : "Prof.",
-      "given-name" : "Claudine",
-      "family-name" : "M&eacute;tral",
-      "home-page" : "http://www.unige.ch/icle",
-      "fields" : ["Knowledge engineering"],
-      "group" : "Knowledge engineering",
-      "group-page" : "http://www.unige.ch/icle",
-      "email" : "claudine.metral@unige.ch",
-      "since" : "2007"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "representative" : "yes",
-      "label" : "Di Marzo Serugendo, Giovanna",
-      "title" : "Prof.",
-      "given-name" : "Giovanna",
-      "family-name" : "Di Marzo Serugendo",
-      "home-page" : "http://iss.unige.ch/?q=users/giovanna-di-marzo-serugendo",
-      "fields" : ["Ecosystem of Services", "Swarm Intelligence", "Self-Organising Systems", "Autonomic Systems"],
-      "group" : "Institute of services science",
-      "group-page" : "http://iss.unige.ch",
-      "email" : "Giovanna.DiMarzo@unige.ch",
-      "since" : "2011"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Morin, Jean-Henry",
-      "title" : "Prof.",
-      "given-name" : "Jean-Henry",
-      "family-name" : "Morin",
-      "home-page" : "http://iss.unige.ch",
-      "fields" : ["Digital Rights", "Policies for Services"],
-      "group" : "Institute of services science",
-      "group-page" : "http://iss.unige.ch",
-      "email" : "Jean-Henry.Morin@unige.ch",
-      "since" : "2011"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Seigneur, Jean-Marc",
-      "title" : "Prof.",
-      "given-name" : "Jean-Marc",
-      "family-name" : "Seigneur",
-      "home-page" : "http://iss.unige.ch",
-      "fields" : ["Computational trust management", "Online reputation services", "Security and privacy of augmented human technologies"],
-      "group" : "Institute of services science",
-      "group-page" : "http://iss.unige.ch",
-      "email" : "Jean-Marc.Seigneur@unige.ch",
-      "since" : "2010"
-    },
-
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Ralyte, Jolita",
-      "title" : "Prof.",
-      "given-name" : "Jolita",
-      "family-name" : "Ralyte",
-      "home-page" : "http://iss.unige.ch",
-      "fields" : ["Method and requirement engineering"],
-      "group" : "Institute of services science",
-      "group-page" : "http://iss.unige.ch",
-      "email" : "Jolita.Ralyte@unige.ch",
-      "since" : "2007"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Moccozet, Laurent",
-      "title" : "Prof.",
-      "given-name" : "Laurent",
-      "family-name" : "Moccozet",
-      "home-page" : "http://iss.unige.ch",
-      "fields" : ["Educational and multimedia systems and services"],
-      "group" : "Institute of services science",
-      "group-page" : "http://iss.unige.ch",
-      "email" : "Laurent.Moccozet@unige.ch",
-      "since" : "2009"
-    },
-    {
-      "file" : "unige",
-      "org" : "University of Geneva",
-      "inst" : "CUI",
-      "label" : "Marchand-Maillet, Stephane",
-      "title" : "Prof.",
-      "given-name" : "Stephane",
-      "family-name" : "Marchand-Maillet",
-      "home-page" : "http://viper.unige.ch",
-      "fields" : ["Information Retrieval","Machine Learning", "Knowledge engineering","Data mining","Multimedia","Databases","e-Commerce", "Computer Vision"],
-      "group" : "Information Retrieval and Machine Learning",
-      "group-page" : "http://viper.unige.ch",
-      "email" : "Stephane.Marchand-Maillet@unige.ch",
-      "since" : "2003"
-    },
-{
-      "file" : "unil",
-      "org" : "University of Lausanne",
-      "inst" : "ISI",
-      "label" : "Duparc, Jacques",
-      "title" : "Prof.",
-      "given-name" : "Jacques",
-      "family-name" : "Duparc",
-      "home-page" : "http://www.hec.unil.ch/people/jduparc",
-      "fields" : ["Theoretical Computer Science", "Logic"],
-      "email" : "Jacques.Duparc@unil.ch"
-    },
-    {
-      "file" : "unil",
-      "org" : "University of Lausanne",
-      "inst" : "ISI",
-      "label" : "Estier, Thibault",
-      "title" : "Prof.",
-      "given-name" : "Thibault",
-      "family-name" : "Estier",
-      "home-page" : "http://www.hec.unil.ch/people/testier",
-      "fields" : ["Information Systems Integration and Evolution"],
-      "email" : "Thibault.Estier@unil.ch"
-    },
-    {
-      "file" : "unil",
-      "org" : "University of Lausanne",
-      "inst" : "ISI",
-      "label" : "Garbinato, Benoit",
-      "title" : "Prof.",
-      "given-name" : "Beno&icirc;t",
-      "family-name" : "Garbinato",
-      "home-page" : "http://www.hec.unil.ch/people/bgarbinato",
-      "fields" : ["Distributed Systems", "Networks"],
-      "group" : "Distributed Object Programming Laboratory (DOPLab)",
-      "email" : "Benoit.Garbinato@unil.ch",
-      "since" : "Jan.2003"
-    },
-    {
-      "file" : "unil",
-      "org" : "University of Lausanne",
-      "inst" : "ISI",
-      "label" : "Ghernaouti, Helie, Solange",
-      "title" : "Prof.",
-      "given-name" : "Solange",
-      "family-name" : "Ghernaouti-H&eacute;lie",
-      "home-page" : "http://www.hec.unil.ch/people/sgh",
-      "fields" : ["Telecommunication", "Security"],
-      "group" : "Security in Digital Environments (SeDgE)",
-      "email" : "sgh@unil.ch"
-    },
-    {
-      "file" : "unil",
-      "org" : "University of Lausanne",
-      "inst" : "ISI",
-      "representative" : "yes",
-      "label" : "Legner, Christine",
-      "title" : "Prof.",
-      "given-name" : "Christine",
-      "family-name" : "Legner",
-      "home-page" : "http://www.hec.unil.ch/people/clegner",
-      "fields" : ["Service-Oriented Architectures"],
-      "email" : "Christine.Legner@unil.ch",
-      "since" : "2011"
-    },
-    {
-      "file" : "unil",
-      "org" : "University of Lausanne",
-      "inst" : "ISI",
-      "label" : "Missonier, Stephanie",
-      "title" : "Prof.",
-      "given-name" : "St&eacute;phanie",
-      "family-name" : "Missonier",
-      "home-page" : "http://www.hec.unil.ch/people/smissonier",
-      "fields" : ["IT Governance", "Project Management"],
-      "email" : "Stephanie.Missonier@unil.ch",
-      "since" : "2011"
-    },
-    {
-      "file" : "unil",
-      "org" : "University of Lausanne",
-      "inst" : "ISI",
-      "label" : "Pigneur, Yves",
-      "title" : "Prof.",
-      "given-name" : "Yves",
-      "family-name" : "Pigneur",
-      "home-page" : "http://www.hec.unil.ch/people/ypigneur",
-      "fields" : ["Information Systems", "Business Models"],
-      "email" : "Yves.Pigneur@unil.ch"
-    },
-    {
-      "file" : "unil",
-      "org" : "University of Lausanne",
-      "inst" : "ISI",
-      "label" : "Tomassini, Marco",
-      "title" : "Prof.",
-      "given-name" : "Marco",
-      "family-name" : "Tomassini",
-      "home-page" : "http://www.hec.unil.ch/people/mtomassini",
-      "fields" : ["Bio-inspired computations and Machines"],
-      "email" : "Marco.Tomassini@unil.ch",
-      "since" : "2002"
-    },
-    {
-      "file" : "unil",
-      "org" : "University of Lausanne",
-      "inst" : "ISI",
-      "label" : "Villa, Alessandro",
-      "title" : "Prof.",
-      "given-name" : "Alessandro",
-      "family-name" : "Villa",
-      "home-page" : "http://www.hec.unil.ch/people/avilla",
-      "fields" : ["Computational Neuroscience"],
-      "email" : "Alessandro.Villa@unil.ch",
-      "since" : "2011"
-    },
-    {
-      "file" : "unil",
-      "org" : "University of Lausanne",
-      "inst" : "ISI",
-      "label" : "Wentland, Maia",
-      "title" : "Prof.",
-      "given-name" : "Maia",
-      "family-name" : "Wentland",
-      "home-page" : "http://www.hec.unil.ch/people/mwf",
-      "fields" : ["Knowledge Management", "e-Learning"],
-      "email" : "mwf@unil.ch"
-    },
-    {
-      "file" : "unil",
-      "org" : "University of Lausanne",
-      "inst" : "ISI",
-      "role" : ["Emeritus"],
-      "label" : "Bonzon, Pierre",
-      "title" : "Prof.",
-      "given-name" : "Pierre",
-      "family-name" : "Bonzon",
-      "home-page" : "http://www.hec.unil.ch/people/pbonzon",
-      "fields" : ["Artificial Intelligence", "Agent-based Systems"],
-      "email" : "Pierre.Bonzon@unil.ch"
-    },
-    {
-      "file" : "unil",
-      "org" : "University of Lausanne",
-      "inst" : "ISI",
-      "role" : ["Emeritus"],
-      "label" : "Munari, Silvio",
-      "title" : "Prof.",
-      "given-name" : "Silvio",
-      "family-name" : "Munari",
-      "home-page" : "http://www.hec.unil.ch/people/smunari",
-      "fields" : ["Business Information Systems", "Information Systems Modeling"],
-      "email" : "Silvio.Munari@unil.ch"
-    },
-    {
-      "file" : "unil",
-      "org" : "University of Lausanne",
-      "inst" : "ISI",
-      "role" : ["Emeritus"],
-      "label" : "Grize, Fran&ccedil;ois",
-      "title" : "Prof.",
-      "given-name" : "Fran&ccedil;ois",
-      "family-name" : "Grize",
-      "home-page" : "http://www.hec.unil.ch/people/fgrize",
-      "fields" : ["Human-Machine Interface"],
-      "email" : "Francois.Grize@unil.ch"
-    },
-    {
-      "file" : "unil",
-      "org" : "University of Lausanne",
-      "inst" : "ISI",
-      "role" : ["Emeritus"],
-      "label" : "Probst, Andr&eacute;-Ren&eacute;",
-      "title" : "Prof.",
-      "given-name" : "Andr&eacute;-Ren&eacute;",
-      "family-name" : "Probst",
-      "home-page" : "http://www.hec.unil.ch/people/aprobst",
-      "fields" : ["Business Information Systems", "Knowledge Management"],
-      "email" : "Andre-Rene.Probst@unil.ch"
-    },
-{
-      "file" : "unine",
-      "org" : "University of Neuchatel",
-      "inst" : "IIUN",
-      "role" : ["Emeritus"],
-      "label" : "Erard, Pierre, Jean",
-      "title" : "Prof.",
-      "given-name" : "Pierre-Jean",
-      "family-name" : "Erard",
-      "email" : "pierre-jean.erard@unine.ch"
-    },
-    {
-      "file" : "unine",
-      "org" : "University of Neuchatel",
-      "inst" : "IIUN",
-      "role" : ["Emeritus"],
-      "label" : "Naegeli, Hans, Heinrich",
-      "title" : "Prof.",
-      "given-name" : "Hans-Heinrich",
-      "family-name" : "N&auml;geli",
-      "email" : "hans.naegeli@unine.ch"
-    },
-    {
-      "file" : "unine",
-      "org" : "University of Neuchatel",
-      "inst" : "IIUN",
-      "representative" : "yes",
-      "label" : "Kropf, Peter",
-      "title" : "Prof.",
-      "given-name" : "Peter",
-      "family-name" : "Kropf",
-      "home-page" : "http://members.unine.ch/peter.kropf/index.html",
-      "fields" : ["Distributed systems"],
-      "group" : "Distributed systems",
-      "group-page" : "http://iiun.unine.ch",
-      "email" : "peter.kropf@unine.ch"
-    },
-    {
-      "file" : "unine",
-      "org" : "University of Neuchatel",
-      "inst" : "IIUN",
-      "label" : "Felber, Pascal",
-      "title" : "Prof.",
-      "given-name" : "Pascal",
-      "family-name" : "Felber",
-      "home-page" : "http://members.unine.ch/pascal.felber/index.html",
-      "group-page" : "http://iiun.unine.ch",
-      "fields" : ["Distributed systems"],
-      "email" : "pascal.felber@unine.ch"
-    },
-    {
-      "file" : "unine",
-      "org" : "University of Neuchatel",
-      "inst" : "IIUN",
-      "label" : "Savoy, Jacques",
-      "title" : "Prof.",
-      "given-name" : "Jacques",
-      "family-name" : "Savoy",
-      "home-page" : "http://www2.unine.ch/jacques.savoy",
-      "fields" : ["Information Retrieval"],
-      "group" : "Information Retrieval",
-      "group-page" : "http://iiun.unine.ch",
-      "email" : "jacques.savoy@unine.ch"
-    },
-    {
-      "file" : "unine",
-      "org" : "University of Neuchatel",
-      "inst" : "IMI",
-      "label" : "Stoffel, Kilian",
-      "title" : "Prof.",
-      "given-name" : "Kilian",
-      "family-name" : "Stoffel",
-      "home-page" : "http://www3.unine.ch/members/kilian.stoffel",
-      "fields" : ["Knowledge Information &amp; Data Processing"],
-      "group" : "Knowledge Information &amp; Data Processing",
-      "group-page" : "http://www.unine.ch/imi/",
-      "email" : "Kilian.Stoffel@unine.ch"
-    },
-{
-      "file" : "unisg",
-      "org" : "University of St Gallen",
-      "inst" : "IWI",
-      "label" : "Leimeister, Jan Marco",
-      "title" : "Prof.",
-      "given-name" : "Jan Marco",
-      "family-name" : "Leimeister",
-      "home-page" : "http://www.iwi.unisg.ch/nc/ueber-uns/team/details/?tx_smemployeelist_pi1%5BempID%5D=1095",
-      "fields" : ["crowd sourcing", "service engineering", "collaboration engineering"],
-      "email" : "janmarco.leimeister@unisg.ch"
-    },    {
-      "file" : "unisg",
-      "org" : "University of St Gallen",
-      "inst" : "IWI",
-      "label" : "Back, Andrea",
-      "title" : "Prof.",
-      "given-name" : "Andrea",
-      "family-name" : "Back",
-      "home-page" : "http://verdi.unisg.ch/org/iwi/iwi_web.nsf/wwwTeamGer/BackAndrea.htm",
-      "fields" : ["E-learning", "Mobile business", "Social Software"],
-      "email" : "andrea.back@unisg.ch"
-    },
-    {
-      "file" : "unisg",
-      "org" : "University of St Gallen",
-      "inst" : "IWI",
-      "label" : "Brenner, Walter",
-      "title" : "Prof.",
-      "given-name" : "Walter",
-      "family-name" : "Brenner",
-      "home-page" : "http://verdi.unisg.ch/org/iwi/iwi_web.nsf/wwwTeamGer/BrennerWalter.htm",
-      "fields" : ["Integrated Information Management"],
-      "email" : "walter.brenner@unisg.ch"
-    },
-    {
-      "file" : "unisg",
-      "org" : "University of St Gallen",
-      "inst" : "IWI",
-      "role" : ["Board member"],
-      "representative" : "yes",
-      "label" : "Jung, Reinhard",
-      "title" : "Prof.",
-      "given-name" : "Reinhard",
-      "family-name" : "Jung",
-      "home-page" : "http://web.iwi.unisg.ch/org/iwi/IWI_Web_2.nsf/wwwPubMemberGer/JungReinhard.htm",
-      "fields" : ["Business Engineering", "Social Media", "Business Value of IT"],
-      "email" : "Reinhard.Jung@unisg.ch",
-      "since" : "2009"
-    },
-    {
-      "file" : "unisg",
-      "org" : "University of St Gallen",
-      "inst" : "IWI",
-      "role" : ["Emeritus"],
-      "label" : "Oesterle, Hubert",
-      "title" : "Prof.",
-      "given-name" : "Hubert",
-      "family-name" : "&Ouml;sterle",
-      "home-page" : "http://verdi.unisg.ch/org/iwi/iwi_web.nsf/wwwTeamGer/OesterleHubert.htm",
-      "fields" : ["Corporate Data Quality", "Independent Living"],
-      "email" : "hubert.oesterle@unisg.ch"
-    },
-    {
-      "file" : "unisg",
-      "org" : "University of St Gallen",
-      "inst" : "IWI",
-      "label" : "Winter, Robert",
-      "title" : "Prof.",
-      "given-name" : "Robert",
-      "family-name" : "Winter",
-      "home-page" : "http://verdi.unisg.ch/org/iwi/iwi_web.nsf/wwwTeamGer/WinterRobert.htm",
-      "fields" : ["Business IT", "Business Engineering"],
-      "email" : "robert.winter@unisg.ch"
-    },
-    {
-      "file" : "unisg",
-      "org" : "University of St Gallen",
-      "inst" : "IWI",
-      "label" : "Fleisch, Elgar",
-      "title" : "Prof.",
-      "given-name" : "Elgar",
-      "family-name" : "Fleisch",
-      "home-page" : "http://www.im.ethz.ch/people/efleisch",
-      "fields" : ["Ubiquitous Computing"],
-      "group" : "Technology management",
-      "group-page" : "http://www.item.unisg.ch/",
-      "email" : "elgar.fleisch@unisg.ch"
-    },
-    {
-      "file" : "unisg",
-      "org" : "University of St Gallen",
-      "inst" : "IWI",
-      "role" : ["Emeritus"],
-      "label" : "Schmid, Beat",
-      "title" : "Prof.",
-      "given-name" : "Beat",
-      "family-name" : "Schmid",
-      "home-page" : "http://www.mcm.unisg.ch/content/view/33/164/lang,de/",
-      "email" : "beat.schmid@unisg.ch"
-    },
- {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Alippi, Cesare",
-      "title" : "Prof.",
-      "given-name" : "Cesare",
-      "family-name" : "Alippi",
-      "home-page" : "http://search.usi.ch/people/e45d870d6c9d9871b8cf6493e73c0558/Alippi-Cesare",
-      "fields" : ["Adaptation and Learning mechanisms", "Intelligence for cyber-physical and embedded systems"],
-      "email" : "cesare.alippi@usi.ch",
-      "since" : "2015"
-    },
-	{
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Bavota, Gabriele",
-      "title" : "Prof.",
-      "given-name" : "Gabriele",
-      "family-name" : "Bavota",
-      "home-page" : "http://www.inf.usi.ch/faculty/bavota",
-      "fields" : ["Software Engineering"],
-      "email" : "gabriele.bavota@usi.ch",
-      "since" : "2016"
-    },
-	{
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Binder, Walter",
-      "title" : "Prof.",
-      "given-name" : "Walter",
-      "family-name" : "Binder",
-      "home-page" : "http://www.inf.usi.ch/faculty/binder",
-      "fields" : ["Dynamic program analysis"],
-      "email" : "walter.binder@usi.ch",
-      "since" : "2007"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Bronstein, Michael",
-      "title" : "Prof.",
-      "given-name" : "Michael",
-      "family-name" : "Bronstein",
-      "home-page" : "http://www.inf.usi.ch/bronstein/",
-      "fields" : ["Machine Learning", "Computer Vision"],
-      "email" : "michael.bronstein@usi.ch",
-      "since" : "2010"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Carzaniga, Antonio",
-      "title" : "Prof.",
-      "given-name" : "Antonio",
-      "family-name" : "Carzaniga",
-      "home-page" : "http://www.inf.usi.ch/carzaniga",
-      "fields" : ["Distributed Computing"],
-      "email" : "antonio.carzaniga@usi.ch",
-      "since" : "2004"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Crestani, Fabio",
-      "title" : "Prof.",
-      "given-name" : "Fabio",
-      "family-name" : "Crestani",
-      "home-page" : "http://www.inf.usi.ch/faculty/crestani",
-      "fields" : ["Information Retrieval"],
-      "email" : "fabio.crestani@usi.ch",
-      "since" : "2007"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Hauswirth, Matthias",
-      "title" : "Prof.",
-      "given-name" : "Matthias",
-      "family-name" : "Hauswirth",
-      "home-page" : "http://www.inf.usi.ch/faculty/hauswirth",
-      "fields" : ["Programming Languages", "Software Engineering"],
-      "email" : "matthias.hauswirth@usi.ch",
-      "since" : "2005"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Horenko, Illia",
-      "title" : "Prof.",
-      "given-name" : "Illia",
-      "family-name" : "Horenko",
-      "home-page" : "https://www.ics.usi.ch/index.php/people-detail-page/illia-horenko",
-      "fields" : ["Computational Time Series Analysis"],
-      "email" : "illia.horenko@usi.ch",
-      "since" : "2010"
-    },
-{
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Hormann, Kai",
-      "title" : "Prof.",
-      "given-name" : "Kai",
-      "family-name" : "Hormann",
-      "home-page" : "http://www.inf.usi.ch/faculty/hormann",
-      "fields" : ["Geometric Modeling"],
-      "email" : "kai.hormann@usi.ch",
-      "since" : "2009"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Jazayeri, Mehdi",
-      "title" : "Prof.",
-      "given-name" : "Mehdi",
-      "family-name" : "Jazayeri",
-      "home-page" : "http://www.inf.usi.ch/faculty/jazayeri",
-      "fields" : ["Software Engineering"],
-      "email" : "mehdi.jazayeri@usi.ch",
-      "since" : "2004"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Krause, Rolf",
-      "title" : "Prof.",
-      "given-name" : "Rolf",
-      "family-name" : "Krause",
-      "home-page" : "http://www.ics.inf.usi.ch/people/prof-rolf-krause.html",
-      "fields" : ["Scientific Computing"],
-      "email" : "rolf.krause@usi.ch",
-      "since" : "2009"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Langheinrich, Marc",
-      "title" : "Prof.",
-      "given-name" : "Marc",
-      "family-name" : "Langheinrich",
-      "home-page" : "http://www.inf.usi.ch/faculty/langheinrich",
-      "fields" : ["Ubiquitous Computing"],
-      "email" : "marc.langheinrich@usi.ch",
-      "since" : "2008"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Lanza, Michele",
-      "title" : "Prof.",
-      "given-name" : "Michele",
-      "family-name" : "Lanza",
-      "home-page" : "http://www.inf.usi.ch/faculty/lanza",
-      "fields" : ["Software Engineering"],
-      "email" : "michele.lanza@usi.ch",
-      "since" : "2004"
-    },
-	{
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Limongelli, Vittorio",
-      "title" : "Prof.",
-      "given-name" : "Vittorio",
-      "family-name" : "Limongelli",
-      "home-page" : "http://www.inf.usi.ch/faculty/limongelli",
-      "fields" : ["tbd"],
-      "email" : "vittorio.limongelli@usi.ch",
-      "since" : "2015"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Nystrom, Nate",
-      "title" : "Prof.",
-      "given-name" : "Nate",
-      "family-name" : "Nystrom",
-      "home-page" : "http://www.inf.usi.ch/nystrom",
-      "fields" : ["Programming Languages"],
-      "email" : "nate.nystrom@usi.ch",
-      "since" : "2010"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Papadopoulou, Evanthia",
-      "title" : "Prof.",
-      "given-name" : "Evanthia",
-      "family-name" : "Papadopoulou",
-      "home-page" : "http://www.inf.usi.ch/faculty/papadopoulou",
-      "fields" : ["Algorithms", "Computational Geometry"],
-      "email" : "evanthia.papadopoulou@usi.ch",
-      "since" : "2008"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Parrinello, Michele",
-      "title" : "Prof.",
-      "given-name" : "Michele",
-      "family-name" : "Parrinello",
-      "home-page" : "http://www.rgp.ethz.ch/",
-      "fields" : ["Computational Science"],
-      "email" : "parrinello@phys.chem.ethz.ch",
-      "since" : "2010"
-    },{
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Pautasso, Cesare",
-      "title" : "Prof.",
-      "given-name" : "Cesare",
-      "family-name" : "Pautasso",
-      "home-page" : "http://www.inf.usi.ch/faculty/pautasso",
-      "fields" : ["Software Composition for Distributed Systems"],
-      "email" : "cesare.pautasso@usi.ch",
-      "since" : "2007"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Pedone, Fernando",
-      "title" : "Prof.",
-      "given-name" : "Fernando",
-      "family-name" : "Pedone",
-      "home-page" : "http://www.inf.usi.ch/faculty/pedone",
-      "fields" : ["Dependable Distributed Systems"],
-      "email" : "fernando.pedone@usi.ch",
-      "since" : "2004"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "representative" : "yes",
-      "label" : "Pezze, Mauro",
-      "title" : "Prof.",
-      "given-name" : "Mauro",
-      "family-name" : "Pezz&egrave;",
-      "home-page" : "http://www.inf.usi.ch/faculty/pezze",
-      "fields" : ["Software Engineering"],
-      "email" : "mauro.pezze@usi.ch",
-      "since" : "2007"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Pivkin, Igor",
-      "title" : "Prof.",
-      "given-name" : "Igor",
-      "family-name" : "Pivkin",
-      "home-page" : "https://www.ics.usi.ch/index.php/people-detail-page/igor-pivkin",
-      "fields" : ["Applied Mathematics"],
-      "email" : "igor.pivkin@usi.ch",
-      "since" : "2010"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Pozzi, Laura",
-      "title" : "Prof.",
-      "given-name" : "Laura",
-      "family-name" : "Pozzi",
-      "home-page" : "http://www.inf.usi.ch/faculty/pozzi",
-      "fields" : ["Compiler and Architecture Design", "Embedded Systems"],
-      "email" : "laura.pozzi@usi.ch",
-      "since" : "2005"
-    },
-		{
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Santini, Silvia",
-      "title" : "Prof.",
-      "given-name" : "Silvia",
-      "family-name" : "Santini",
-      "home-page" : "http://www.inf.usi.ch/faculty/santini",
-	  "fields" : ["Cyber-physical systems", "Mobile sensing"],
-      "group-page" : "http://search.usi.ch/faculties/3/Faculty-of-Informatics/people",
-      "email" : "silvia.santini@usi.ch",
-      "since" : "2016"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Schenk, Olaf",
-      "title" : "Prof.",
-      "given-name" : "Olaf",
-      "family-name" : "Schenk",
-      "home-page" : "https://www.ics.usi.ch/index.php/people-detail-page/olaf-schenk",
-      "fields" : ["Computational mathematics, scientific computing and high-performance computing"],
-      "email" : "olaf.schenk@usi.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Schmidhuber, Juergen",
-      "title" : "Prof.",
-      "given-name" : "J&uuml;rgen",
-      "family-name" : "Schmidhuber",
-      "home-page" : "http://www.idsia.ch/~juergen/",
-      "fields" : ["Computer Vision"],
-      "email" : "juergen@idsia.ch",
-      "since" : "2009"
-    },
-	 {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Sharygina, Natasha",
-      "title" : "Prof.",
-      "given-name" : "Natasha",
-      "family-name" : "Sharygina",
-      "home-page" : "http://www.inf.usi.ch/faculty/sharygina",
-      "fields" : ["Software and Hardware verification"],
-      "email" : "natasha.sharygina@usi.ch",
-      "since" : "2005"
-    },
-   	{
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Soulé, Robert",
-      "title" : "Prof.",
-      "given-name" : "Robert",
-      "family-name" : "Soulé",
-      "home-page" : "http://www.inf.usi.ch/faculty/soule",
-      "fields" : ["systems and applied programming languages"],
-      "email" : "robert.soule@usi.ch",
-      "since" : "2014"
-    },
-    {
-      "file" : "usi",
-      "org" : "University of Lugano - USI",
-      "inst" : "Faculty of Informatics",
-      "label" : "Wolf, Stefan",
-      "title" : "Prof.",
-      "given-name" : "Stefan",
-      "family-name" : "Wolf",
-      "home-page" : "http://search.usi.ch/people/eefbe656c9dfacf0e1a1e15bf8893bcb/Wolf-Stefan",
-      "fields" : ["Cryptography", "Information Theory"],
-      "email" : "stefan.wolf@usi.ch",
-      "since" : "2011"
-    },
-{
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "role" : ["Emeritus"],
-     "label" : "Bauknecht, Kurt",
-     "title" : "Prof.",
-     "given-name" : "Kurt",
-     "family-name" : "Bauknecht",
-     "email" : "baukn@ifi.uzh.ch",
-     "since" : "2003"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "role" : ["Emeritus"],
-     "label" : "Richter, Lutz",
-     "title" : "Prof.",
-     "given-name" : "Lutz",
-     "family-name" : "Richter",
-     "home-page" : "http://www.ifi.uzh.ch/staff/richter/",
-     "email" : "richter@ifi.uzh.ch"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "role" : ["Emeritus"],
-     "label" : "Schauer, Helmut",
-     "title" : "Prof.",
-     "given-name" : "Helmut",
-     "family-name" : "Schauer",
-     "home-page" : "http://www.ifi.uzh.ch/staff/schauer/",
-     "email" : "schauer@ifi.uzh.ch",
-     "since" : "2009"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "role" : ["Emeritus"],
-     "label" : "Stucki, Peter",
-     "title" : "Prof.",
-     "given-name" : "Peter",
-     "family-name" : "Stucki",
-     "home-page" : "http://www.ifi.uzh.ch/mml/crew/stucki.php4",
-     "email" : "stucki@ifi.uzh.ch",
-     "since" : "2003"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "role" : ["Emeritus"],
-     "label" : "Pfeifer, Rolf",
-     "title" : "Prof.",
-     "given-name" : "Rolf",
-     "family-name" : "Pfeifer",
-     "home-page" : "http://www.ifi.uzh.ch/staff/pfeifer/",
-     "fields" : ["Artificial Intelligence"],
-     "group" : "Artificial Intelligence",
-     "group-page" : "http://www.ifi.uzh.ch/groups/ailab/",
-     "email" : "pfeifer@ifi.uzh.ch",
-     "since" : "2014"
-   },
-    {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "CL",
-     "role" : ["Emeritus"],
-     "label" : "Hess, Michael",
-     "title" : "Prof.",
-     "given-name" : "Michael",
-     "family-name" : "Hess",
-     "home-page" : "http://www.cl.uzh.ch/people/team/hess.html",
-     "fields" : ["Computational Linguistics"],
-     "group" : "Computational Linguistics",
-     "group-page" : "http://www.cl.uzh.ch",
-     "email" : "hess@cl.uzh.ch",
-     "since" : "2015"
-   },
- {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "representative" : "yes",
-     "role" : ["Board member", "President"],
-     "label" : "Bernstein, Abraham",
-     "title" : "Prof.",
-     "given-name" : "Abraham",
-     "family-name" : "Bernstein",
-     "home-page" : "http://www.ifi.uzh.ch/ddis/people/bernstein/",
-     "fields" : ["Semantic Web", "Artificial IntelligenceI", "Data Mining", "HCI", "CSCW", "Crowd Computing"],
-     "group" : "Dynamic and Distributed Information Systems",
-     "group-page" : "http://www.ifi.uzh.ch/ddis/",
-     "email" : "bernstein@ifi.uzh.ch",
-     "since" : "2002"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "label" : "Boehlen, Michael",
-     "title" : "Prof.",
-     "given-name" : "Michael",
-     "family-name" : "B&ouml;hlen",
-     "home-page" : "http://www.ifi.uzh.ch/dbtg/Staff/Boehlen/",
-     "fields" : ["Database Technology"],
-     "group" : "Database Technology",
-     "group-page" : "http://www.ifi.uzh.ch/dbtg/",
-     "email" : "boehlen@ifi.uzh.ch",
-     "since" : "2009"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "label" : "Fritz, Thomas",
-     "title" : "Prof.",
-     "given-name" : "Thomas",
-     "family-name" : "Fritz",
-     "home-page" : "http://seal.ifi.uzh.ch/fritz/",
-     "fields" : ["Software Quality", "Software Engineering"],
-     "group" : "Software Quality",
-     "group-page" : "http://seal.ifi.uzh.ch/",
-     "email" : "fritz@ifi.uzh.ch",
-     "since" : "2011"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "label" : "Gall, Harald",
-     "title" : "Prof.",
-     "given-name" : "Harald",
-     "family-name" : "Gall",
-     "home-page" : "http://seal.ifi.uzh.ch/hg.html",
-     "fields" : ["Software Engineering"],
-     "group" : "Software Engineering",
-     "group-page" : "http://seal.ifi.uzh.ch/",
-     "email" : "gall@ifi.uzh.ch",
-     "since" : "2004"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "label" : "Glinz, Martin",
-     "title" : "Prof.",
-     "given-name" : "Martin",
-     "family-name" : "Glinz",
-     "home-page" : "http://www.ifi.uzh.ch/rerg/people/glinz.html",
-     "fields" : ["Requirements Engineering", "Software Engineering", "Software Quality"],
-     "group" : "Requirements Engineering",
-     "group-page" : "http://www.ifi.uzh.ch/req/",
-     "email" : "glinz@ifi.uzh.ch",
-     "since" : "1993"
-    },
-    {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "CL",
-     "label" : "Volk, Martin",
-     "title" : "Prof.",
-     "given-name" : "Martin",
-     "family-name" : "Volk",
-     "home-page" : "http://www.cl.uzh.ch/people/team/volk.html",
-     "fields" : ["Computational Linguistics"],
-     "email" : "volk@cl.uzh.ch",
-     "since" : "2008"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "label" : "Hilty, Lorenz",
-     "title" : "Prof.",
-     "given-name" : "Lorenz",
-     "family-name" : "Hilty",
-     "home-page" : "http://www.ifi.uzh.ch/isr/people/hilty.html",
-     "fields" : ["Informatics and Sustainability", "Environmental Informatics", "Green ICT", "Modelling and Simulation"],
-    "group" : "Informatics and Sustainability Research",
-     "group-page" : "http://www.ifi.uzh.ch/isr/",
-
-     "email" : "hilty@ifi.uzh.ch",
-     "since" : "2010"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "label" : "Hu, Daning",
-     "title" : "Prof.",
-     "given-name" : "Daning",
-     "family-name" : "Hu",
-     "home-page" : "http://www.ifi.uzh.ch/bi/people/hu.html",
-     "fields" : ["Business Intelligence Research Group"],
-     "group" : "Business Intelligence Research Group",
-     "group-page" : "http://www.ifi.uzh.ch/bi.html",
-     "email" : "hdaning@ifi.uzh.ch",
-     "since" : "2011"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "label" : "Huang, Elaine M.",
-     "title" : "Prof.",
-     "given-name" : "Elaine",
-     "family-name" : "Huang",
-     "home-page" : "http://www.ifi.uzh.ch/zpac/people/huang.html",
-     "fields" : ["People and Computing", "Human Computer Interaction", "HCI", "CSCW", "Ubiquitous Computing"],
-     "group" : "People and Computing",
-     "group-page" : "http://www.ifi.uzh.ch/zpac.html",
-     "email" : "huang@ifi.uzh.ch",
-     "since" : "2010"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "label" : "Pajarola, Renato",
-     "title" : "Prof.",
-     "given-name" : "Renato",
-     "family-name" : "Pajarola",
-     "home-page" : "http://www.ifi.uzh.ch/vmml/people/current-staff/pajarola.html",
-     "fields" : ["Scientific Visualization", "3D Computer Graphics", "Interactive Rendering", "Geometric Modeling", "Scalable High Performance Visualization", "Physically Based Simulation"],
-     "group" : "Visualization and MultiMedia Lab",
-     "group-page" : "http://www.ifi.uzh.ch/vmml/",
-     "email" : "pajarola@acm.org",
-     "since" : "2005"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "label" : "Scaramuzza, Davide",
-     "title" : "Prof.",
-     "given-name" : "Davide",
-     "family-name" : "Scaramuzza",
-     "home-page" : "https://sites.google.com/site/scarabotix/",
-     "fields" : ["Human-Oriented Robotics"],
-     "group" : "Human-Oriented Robotics",
-     "group-page" : "https://sites.google.com/site/scarabotix/",
-     "email" : "davide.scaramuzza@ieee.org",
-     "since" : "2012"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "label" : "Seuken, Sven",
-     "title" : "Prof.",
-     "given-name" : "Sven",
-     "family-name" : "Seuken",
-     "home-page" : "http://www.ifi.uzh.ch/ce/people/seuken.html",
-     "fields" : ["Computation and Economics"],
-     "group" : "Computation and Economics",
-     "group-page" : "http://www.ifi.uzh.ch/ce.html",
-     "email" : "seuken@ifi.uzh.ch",
-     "since" : "2011"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "label" : "Schwabe, Gerhard",
-     "title" : "Prof.",
-     "given-name" : "Gerhard",
-     "family-name" : "Schwabe",
-     "home-page" : "http://www.ifi.uzh.ch/im/",
-     "fields" : ["Information Management"],
-     "group" : "Information Management",
-     "group-page" : "http://www.ifi.uzh.ch/im/",
-     "email" : "schwabe@ifi.uzh.ch"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "label" : "Stiller, Burkhard",
-     "title" : "Prof.",
-     "given-name" : "Burkhard",
-     "family-name" : "Stiller",
-     "home-page" : "http://www.csg.uzh.ch/",
-     "fields" : ["Communication systems"],
-     "group" : "Communication systems",
-     "group-page" : "http://www.csg.uzh.ch/",
-     "email" : "stiller@ifi.uzh.ch",
-     "since" : "2004"
-   },
-   {
-     "file" : "uzh",
-     "org" : "University of Zurich - UZH",
-     "inst" : "IfI",
-     "label" : "Wacharamanotham, Chat",
-     "title" : "Prof.",
-     "given-name" : "Chat",
-     "family-name" : "Wacharamanotham",
-     "home-page" : "http://www.ifi.uzh.ch/zpac/people/chat.html",
-     "fields" : ["People and Computing", "Human Computer Interaction", "Interaction Design"],
-     "group" : "People and Computing",
-     "group-page" : "http://www.ifi.uzh.ch/zpac.html",
-     "email" : "chat@ifi.uzh.ch",
-     "since" : "2016"
-   },
- {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "role" : ["Emeritus"],
-      "label" : "Bosshard, Alexander",
-      "given-name" : "Alexander",
-      "family-name" : "Bosshard",
-      "home-page" : "http://www.zhaw.ch/fileadmin/php_includes/popup/person-detail.php?kurzz=bsha",
-      "email" : "bsha@zhaw.ch",
-      "since" : "2012"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Ackermann, Philipp",
-      "title" : "Prof.",
-      "given-name" : "Philipp",
-      "family-name" : "Ackermann",
-      "group" : "School of Engineering",
-      "email" : "philipp.ackermann@zhaw.ch",
-	  "home-page" : "https://www.zhaw.ch/en/about-us/person/acke/"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Aders, Arnold",
-      "title" : "Prof.",
-      "given-name" : "Arnold",
-      "family-name" : "Aders",
-      "group" : "School of Engineering",
-      "email" : "arnold.aders@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Baudinot, Gerold",
-      "title" : "Prof.",
-      "given-name" : "Gerold",
-      "family-name" : "Baudinot",
-      "home-page" : "http://www.zhaw.ch/en/engineering/institute-of-applied-information-technology.html",
-      "group" : "Institute of Applied Information Technology",
-      "email" : "baug@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Bohnert, Thomas Michael ",
-      "title" : "Prof.",
-      "given-name" : "Thomas Michael ",
-      "family-name" : "Bohnert",
-      "home-page" : "http://tmb.nginet.de/",
-      "group" : "Institute of Applied Information Technology",
-      "email" : "thomasmichael.bohnert@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Braschler, Martin",
-      "title" : "Prof.",
-      "given-name" : "Martin",
-      "family-name" : "Braschler",
-      "group" : "School of Engineering",
-      "email" : "martin.braschler@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Cieliebak, Mark",
-      "title" : "Dr.",
-      "given-name" : "Mark",
-      "family-name" : "Cieliebak",
-      "group" : "School of Engineering",
-      "email" : "mark.cieliebak@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Darvishy, Alireza",
-      "title" : "Prof.",
-      "given-name" : "Alireza",
-      "family-name" : "Darvishy",
-      "group" : "School of Engineering",
-      "email" : "alireza.darvishy@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Hutter, Hans-Peter",
-      "title" : "Prof.",
-      "given-name" : "Hans-Peter",
-      "family-name" : "Hutter",
-      "group" : "School of Engineering",
-      "email" : "hans-peter.hutter@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Meier, Andreas",
-      "given-name" : "Andreas",
-      "family-name" : "Meier",
-      "group" : "School of Engineering",
-      "email" : "andreas.meier@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Rege, Karl",
-      "title" : "Prof.",
-      "given-name" : "Karl",
-      "family-name" : "Rege",
-      "home-page" : "http://waikiki.zhaw.ch/~rege/",
-      "group" : "School of Engineering",
-      "email" : "karl.rege@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Rennhard, Marc",
-      "title" : "Prof.",
-      "given-name" : "Marc",
-      "family-name" : "Rennhard",
-      "group" : "School of Engineering",
-      "email" : "marc.rennhard@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Tellenbach, Bernhard",
-      "given-name" : "Bernhard",
-      "family-name" : "Tellenbach",
-      "group" : "School of Engineering",
-      "email" : "bernhard.tellenbach@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Thaler, Markus",
-      "title" : "Prof.",
-      "given-name" : "Markus",
-      "family-name" : "Thaler",
-      "group" : "School of Engineering",
-      "email" : "markus.thaler@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Marti, Christof",
-      "given-name" : "Christof",
-      "family-name" : "Marti",
-      "group" : "School of Engineering",
-      "email" : "christof.marti@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Doran, Hans",
-      "title" : "Prof.",
-      "given-name" : "Hans",
-      "family-name" : "Doran",
-      "group" : "Institute of Embedded Systems",
-      "group-page" : "http://www.ines.zhaw.ch/",
-      "email" : "hans.doran@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Gelke, Hans-Joachim",
-      "title" : "Prof.",
-      "given-name" : "Hans-Joachim",
-      "family-name" : "Gelke",
-      "group" : "Institute of Embedded Systems",
-      "group-page" : "http://www.ines.zhaw.ch/",
-      "email" : "hans.gelke@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Gruber, Juan-Mario",
-      "title" : "Prof.",
-      "given-name" : "Juan-Mario",
-      "family-name" : "Gruber",
-      "group" : "Institute of Embedded Systems",
-      "group-page" : "http://www.ines.zhaw.ch/",
-      "email" : "jean-mario.gruber@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Hauser, Kurt",
-      "given-name" : "Kurt",
-      "family-name" : "Hauser",
-      "group" : "Institute of Embedded Systems",
-      "group-page" : "http://www.ines.zhaw.ch/",
-      "email" : "kurt.hauser@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Meli, Marcel",
-      "title" : "Prof.",
-      "given-name" : "Marcel",
-      "family-name" : "Meli",
-      "group" : "Institute of Embedded Systems",
-      "group-page" : "http://www.ines.zhaw.ch/",
-      "email" : "marcel.meli@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Müller, Thomas",
-      "title" : "Prof.",
-      "given-name" : "Thomas",
-      "family-name" : "Müller",
-      "group" : "Institute of Embedded Systems",
-      "group-page" : "http://www.ines.zhaw.ch/",
-      "email" : "thomas.mueller@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Rosenthal, Matthias",
-      "title" : "Dr.",
-      "given-name" : "Matthias",
-      "family-name" : "Rosenthal",
-      "group" : "Institute of Embedded Systems",
-      "group-page" : "http://www.ines.zhaw.ch/",
-      "email" : "matthias.rosenthal.gruber@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Rüst, Andreas",
-      "title" : "Prof.",
-      "given-name" : "Andreas",
-      "family-name" : "Rüst",
-      "group" : "Institute of Embedded Systems",
-      "group-page" : "http://www.ines.zhaw.ch/",
-      "email" : "andreas.ruest@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Weibel, Hans",
-      "title" : "Prof.",
-      "given-name" : "Hans",
-      "family-name" : "Weibel",
-      "home-page" : "https://home.zhaw.ch/~wlan/",
-      "fields" : ["Cyber Physical Systems", "Communications Technology"],
-      "group" : "Institute of Embedded Systems",
-      "group-page" : "http://www.ines.zhaw.ch/",
-      "email" : "hans.weibel@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "representative" : "yes",
-      "label" : "Stern, Olaf",
-      "title" : "Prof.",
-      "given-name" : "Olaf",
-      "family-name" : "Stern",
-      "group" : "School of Engineering",
-      "email" : "olaf.stern@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Knaack, Reto",
-      "title" : "Dr.",
-      "given-name" : "Reto",
-      "family-name" : "Knaack",
-      "group" : "School of Engineering",
-      "email" : "reto.knaack@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Bachmann, Matthias",
-      "given-name" : "Matthias",
-      "family-name" : "Bachmann",
-      "group" : "School of Engineering",
-      "email" : "matthias.bachmann@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "de Spindler, Alexandre",
-      "title" : "Prof.",
-      "given-name" : "Alexandre",
-      "family-name" : "de Spindler",
-      "group" : "School of Management and Law",
-      "group-page" : "http://www.zwi.zhaw.ch/",
-      "email" : "alexandre.despindler@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Keller, Thomas",
-      "title" : "Prof.",
-      "given-name" : "Thomas",
-      "family-name" : "Keller",
-      "group" : "School of Management and Law",
-      "group-page" : "http://www.zwi.zhaw.ch/",
-      "email" : "th.keller@zhaw.ch"
-    },
-    {
-      "file" : "zfh",
-      "org" : "University of Applied Sciences of Zurich - ZFH",
-      "inst" : "ZHAW",
-      "label" : "Zeman, Jan",
-      "title" : "Prof.",
-      "given-name" : "Jan",
-      "family-name" : "Zeman",
-      "home-page" : "https://home.zhaw.ch/~zema/",
-      "group" : "School of Engineering",
-      "email" : "jan.zeman@zhaw.ch"
-    }   
-  ]
-}
\ No newline at end of file
diff --git a/interaction/input/caltech_hp.bib b/interaction/input/caltech_hp.bib
deleted file mode 100644
index 00b4f7a..0000000
--- a/interaction/input/caltech_hp.bib
+++ /dev/null
@@ -1,11090 +0,0 @@
-@article{Marsh:2001rw,
-	Abstract = {Accurate data on the three-dimensional architecture of the Golgi is prerequisite for evaluating the mechanisms of transit through this organelle. Here we detail the structure of the Golgi ribbon within part of an insulin-secreting cell in three dimensions at approximately 6 nm resolution. Rapid freezing, freeze-substitution and electron tomography were employed. The Golgi in this region is composed of seven cisternae. The cis-most element is structurally intermediate between the endoplasmic reticulum (ER)-Golgi intermediate compartment (ERGIC) and the cis-most cisterna characterized in three dimensions at high resolution in a normal rat kidney cell [Ladinsky, Mastronarde, McIntosh, Howell and Staehelin (1999) J. Cell Biol. 144, 1135-1149]. There are three trans-cisternae that demonstrate morphological and functional variation. The membrane surface areas and volumes of these elements decrease from cis to trans. The two trans-most cisternae are dissociated from the stack and are fragmented by tubulation. ER closely adheres to and inserts between individual trans-cisternae. Many of the 2119 small, clathrin-negative vesicles that are in close proximity to the Golgi fill the region where trans-cisternae have moved out of register with the ribbon. These data provide evidence that cisternal progression/maturation, trafficking via membrane tubules and vesicle-mediated transport act in concert in the same region of the Golgi ribbon, and suggest an important role for the ER in regulating membrane dynamics at the trans-Golgi.
-},
-	Author = {Marsh,  B J and Mastronarde,  D N and McIntosh,  J R and Howell,  K E},
-	Date-Added = {2008-05-21 18:40:43 -0700},
-	Date-Modified = {2008-05-21 18:40:58 -0700},
-	Eprint = {http://www.biochemsoctrans.org/bst/029/0461/0290461.pdf},
-	Journal = {Biochem Soc Trans},
-	Number = {Pt 4},
-	Pages = {461-467},
-	Title = {Structural evidence for multiple transport mechanisms through the Golgi in the pancreatic beta-cell line, HIT-T15},
-	Url = {http://www.biochemsoctrans.org/bst/029/bst0290461.htm},
-	Volume = {29},
-	Year = {2001},
-	Bdsk-Url-1 = {http://www.biochemsoctrans.org/bst/029/bst0290461.htm}}
-
-@article{White:2000vn,
-	Abstract = {This paper is concerned with filtering of hidden Markov processes (HMPs) which possess (or approximately possess) the property of lumpability. This property is a generalization of the property of lumpability of a Markov chain which has been previously addressed by others. In essence, the property of lumpability means that there is a partition of the (atomic) states of the Markov chain into aggregated sets which act in a similar manner as far as the state dynamics and observation statistics are concerned. We prove necessary and sufficient conditions on the HMP for exact lumpability to hold. For a particular class of hidden Markov models (HMMs), namely finite output alphabet models, conditions for lumpability of all HMPs representable by a specified HMM are given. The corresponding optimal filter algorithms for the aggregated states are then derived. The paper also describes an approach to efficient suboptimal filtering for HMPs which are approximately lumpable. By this we mean that the HMM generating the process may be approximated by a lumpable HMM. This approach involves directly finding a lumped HMM which approximates the original HMM well, in a matrix norm sense. An alternative approach for model reduction based on approximating a given HMM;I by an exactly lumpable HMM is also derived. This method is based on the alternating convex projections algorithm. Some simulation examples are presented which illustrate the performance of the suboptimal filtering algorithms.},
-	Author = {White, L. B. and Mahony, R. and Brushe, G. D.},
-	Date = {DEC},
-	Date-Added = {2008-03-31 10:42:14 -0700},
-	Date-Modified = {2008-03-31 10:42:14 -0700},
-	Isi = {ISI:000166570300007},
-	Issn = {0018-9286},
-	Journal = {IEEE TRANSACTIONS ON AUTOMATIC CONTROL},
-	Month = {Dec},
-	Number = {12},
-	Pages = {2297--2306},
-	Publication-Type = {J},
-	Title = {Lumpable hidden Markov models - Model reduction and reduced complexity filtering},
-	Volume = {45},
-	Year = {2000},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAJJsDcIMjAwMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAkmja8QWb/BQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVdoaXRlAAAQAAgAAMFxjUkAAAARAAgAAMQW0mAAAAABABgCSbA3AEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpXaGl0ZToyMDAwLnBkZgAOABIACAAyADAAMAAwAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1doaXRlLzIwMDAucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvV2hpdGUvMjAwMC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Etchebest:2007kx,
-	Abstract = {Abstract~~Protein sequence world is considerably larger than structure world. In consequence, numerous non-related sequences may adopt similar 3D folds and different kinds of amino acids may thus be found in similar 3D structures. By grouping together the 20 amino acids into a smaller number of representative residues with similar features, sequence world simplification may be achieved. This clustering hence defines a reduced amino acid alphabet (reduced AAA). Numerous works have shown that protein 3D structures are composed of a limited number of building blocks, defining a structural alphabet. We previously identified such an alphabet composed of 16 representative structural motifs (5-residues length) called Protein Blocks (PBs). This alphabet permits to translate the structure (3D) in sequence of PBs (1D). Based on these two concepts, reduced AAA and PBs, we analyzed the distributions of the different kinds of amino acids and their equivalences in the structural context. Different reduced sets were considered. Recurrent amino acid associations were found in all the local structures while other were specific of some local structures (PBs) (e.g Cysteine, Histidine, Threonine and Serine for the {\^I}$\pm$-helix Ncap). Some similar associations are found in other reduced AAAs, e.g Ile with Val, or hydrophobic aromatic residues Trp with Phe and Tyr. We put into evidence interesting alternative associations. This highlights the dependence on the information considered (sequence or structure). This approach, equivalent to a substitution matrix, could be useful for designing protein sequence with different features (for instance adaptation to environment) while preserving mainly the 3D fold. },
-	Author = {Etchebest, C. and Benros, C. and Bornot, A. and Camproux, A. -C. and de Brevern, A.},
-	Date-Added = {2008-03-31 10:37:52 -0700},
-	Date-Modified = {2008-03-31 10:37:52 -0700},
-	Journal = {European Biophysics Journal},
-	M3 = {10.1007/s00249-007-0188-5},
-	Number = {8},
-	Pages = {1059--1069},
-	Title = {A reduced amino acid alphabet for understanding and designing protein adaptation to mutation},
-	Ty = {JOUR},
-	Url = {http://dx.doi.org/10.1007/s00249-007-0188-5},
-	Volume = {36},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAJJoy4IMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAkmi68QWb5BQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACUV0Y2hlYmVzdAAAEAAIAADBcY1JAAAAEQAIAADEFtIAAAAAAQAYAkmjLgBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6RXRjaGViZXN0OjIwMDcucGRmAA4AEgAIADIAMAAwADcALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvRXRjaGViZXN0LzIwMDcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRXRjaGViZXN0LzIwMDcucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1007/s00249-007-0188-5}}
-
-@article{nemenman-2002-14,
-	Author = {Ilya Nemenman and Fariel Shafee and William Bialek},
-	Date-Added = {2008-03-31 10:35:41 -0700},
-	Date-Modified = {2008-03-31 10:35:41 -0700},
-	Journal = {ADVANCES IN NEURAL INFORMATION PROCESSING SYSTEMS},
-	Title = {Entropy and inference, revisited},
-	Url = {http://www.citebase.org/abstract?id=oai:arXiv.org:physics/0108025},
-	Volume = {14},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAIdefIJMjAwMmEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAh15V8QNta4AAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACE5lbWVubWFuABAACAAAwXGNSQAAABEACAAAxA4YHgAAAAEAGAIdefIARmvPAEZq1QBGahsARmRoAECJQwACAERoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOk5lbWVubWFuOjIwMDJhLnBkZgAOABQACQAyADAAMAAyAGEALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTmVtZW5tYW4vMjAwMmEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTmVtZW5tYW4vMjAwMmEucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==},
-	Bdsk-Url-1 = {http://www.citebase.org/abstract?id=oai:arXiv.org:physics/0108025}}
-
-@article{Arnold:1999dp,
-	Author = {Arnold, F H},
-	Date-Added = {2008-03-28 14:31:06 -0700},
-	Date-Modified = {2008-05-29 12:11:20 -0700},
-	Journal = {Engineering and Science},
-	Number = {1--2},
-	Pages = {41--50},
-	Title = {Unnatural Selection: Molecular Sex for Fun and Profit},
-	Volume = {LXII},
-	Year = {1999}}
-
-@article{Wong:2005nx,
-	Abstract = {Over 2000 proteins in the Ensembl human genome database have been linked with disease information from OMIM. In comparison with all human proteins, we find that disease-associated proteins tend to have less designable folds in terms of their SCOP family counts, suggesting that they are intrinsically less robust to mutation and environmental stress. Disease proteins also tend to have isoelectric points closer to neutrality and more alternating hydrophilic-hydrophobic amino acid stretches compared with the average human protein. These results suggest that protein aggregation is a significant phenomenon associated with diseases. Another finding in this work is that many disease proteins are highly sequence similar to other disease proteins, suggesting that gene duplication has contributed to the expansion of disease-prone protein families.},
-	Address = {Institute for Bioinformatics, GSF, National Research Center for Environment and Health, Ingolstadter Landstrasse 1, D-85764 Neuherberg, Germany.},
-	Au = {Wong, P and Fritz, A and Frishman, D},
-	Author = {Wong, Philip and Fritz, Andreas and Frishman, Dmitrij},
-	Da = {20050928},
-	Date-Added = {2008-03-28 11:30:22 -0700},
-	Date-Modified = {2008-03-28 11:31:03 -0700},
-	Dcom = {20051115},
-	Dep = {20050909},
-	Doi = {10.1093/protein/gzi056},
-	Edat = {2005/09/13 09:00},
-	Group = {Designability},
-	Issn = {1741-0126 (Print)},
-	Jid = {101186484},
-	Journal = {Protein Eng Des Sel},
-	Jt = {Protein engineering, design \& selection : PEDS},
-	Language = {eng},
-	Lr = {20061115},
-	Mh = {Amino Acid Substitution; Databases, Genetic; *Disease; *Drug Design; *Gene Duplication; *Genome, Human; Humans; Hydrophobicity; Isoelectric Point; Protein Folding; Protein Structure, Quaternary; Protein Structure, Tertiary; Proteins/*chemistry/*genetics},
-	Mhda = {2005/11/16 09:00},
-	Number = {10},
-	Own = {NLM},
-	Pages = {503--508},
-	Phst = {2005/09/09 {$[$}aheadofprint{$]$}},
-	Pii = {gzi056},
-	Pl = {England},
-	Pmid = {16155116},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print-Electronic},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Protein Eng Des Sel. 2005 Oct;18(10):503-8. Epub 2005 Sep 9.},
-	Stat = {MEDLINE},
-	Title = {Designability, aggregation propensity and duplication of disease-associated proteins},
-	Volume = {18},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbU0IMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAjjxbsQShzYAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFdvbmcAEAAIAADBcY1JAAAAEQAIAADEEummAAAAAQAYAEZtTQBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6V29uZzoyMDA1LnBkZgAADgASAAgAMgAwADAANQAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Xb25nLzIwMDUucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1dvbmcvMjAwNS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1093/protein/gzi056}}
-
-@article{Wong:2006wd,
-	Abstract = {Fold designability has been estimated by the number of families contained in that fold. Here, we show that among orthologous proteins, sequence divergence is higher for folds with greater numbers of families. Folds with greater numbers of families also tend to have families that appear more often in the proteome and greater promiscuity (the number of unique "partner" folds that the fold is found with within the same protein). We also find that many disease-related proteins have folds with relatively few families. In particular, a number of these proteins are associated with diseases occurring at high frequency. These results suggest that family counts reflect how certain structures are distributed in nature and is an important characteristic associated with many human diseases.},
-	Author = {Wong, Philip AND Frishman, Dmitrij},
-	Date-Added = {2008-03-28 11:24:50 -0700},
-	Date-Modified = {2008-03-28 11:27:33 -0700},
-	Doi = {10.1371/journal.pcbi.0020040},
-	Group = {Designability},
-	Journal = {PLoS Comput Biol},
-	Month = {May},
-	Number = {5},
-	Pages = {e40},
-	Publisher = {Public Library of Science},
-	Title = {Fold Designability, Distribution, and Disease },
-	Url = {http://dx.doi.org/10.1371%2Fjournal.pcbi.0020040},
-	Volume = {2},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbU0JMjAwNmEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAjjkjsQSheIAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFdvbmcAEAAIAADBcY1JAAAAEQAIAADEEuhSAAAAAQAYAEZtTQBGa88ARmrVAEZqGwBGZGgAQIlDAAIAQGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6V29uZzoyMDA2YS5wZGYADgAUAAkAMgAwADAANgBhAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1dvbmcvMjAwNmEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvV29uZy8yMDA2YS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1371/journal.pcbi.0020040},
-	Bdsk-Url-2 = {http://dx.doi.org/10.1371/journal.pcbi.0020040}}
-
-@article{Wingreen:2004rr,
-	Abstract = { Only about 1000 qualitatively different protein folds are believed to exist in nature. Here, we review theoretical studies which suggest that some folds are intrinsically more designable than others, i.e. are lowest energy states of an unusually large number of sequences. The sequences associated with these folds are also found to be unusually thermally stable. The connection between highly designable structures and highly stable sequences is generally known as the `designability principle'. The designability principle may help explain the small number of natural folds, and may also guide the design of new folds.},
-	Author = {Wingreen, Ned S. and Li, Hao and Tang, Chao},
-	Date-Added = {2008-03-28 11:17:30 -0700},
-	Date-Modified = {2008-03-28 11:17:42 -0700},
-	Group = {Designability},
-	Journal = {Polymer},
-	Keywords = {Protein folding; Lattice models; Off-lattice models},
-	Number = {2},
-	Pages = {699--705},
-	Title = {Designability and thermal stability of protein structures},
-	Title1 = {Conformational Protein Conformations},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6TXW-4B0WK9F-1/2/10e1752ef50fce0b4d2baefa6d528ef6},
-	Volume = {45},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAI441cIMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAjjjJMQShAAAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACFdpbmdyZWVuABAACAAAwXGNSQAAABEACAAAxBLmcAAAAAEAGAI441cARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOldpbmdyZWVuOjIwMDQucGRmAAAOABIACAAyADAAMAA0AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1dpbmdyZWVuLzIwMDQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL1dpbmdyZWVuLzIwMDQucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6TXW-4B0WK9F-1/2/10e1752ef50fce0b4d2baefa6d528ef6}}
-
-@article{Emberly:2002rm,
-	Abstract = {A typical protein structure is a compact packing of connected alpha-helices and/or beta-strands. We have developed a method for generating the ensemble of compact structures a given set of helices and strands can form. The method is tested on structures composed of four alpha-helices connected by short turns. All such natural four-helix bundles that are connected by short turns seen in nature are reproduced to closer than 3.6 A per residue within the ensemble. Because structures with no natural counterpart may be targets for ab initio structure design, the designability of each structure in the ensemble-defined as the number of sequences with that structure as their lowest-energy state-is evaluated using a hydrophobic energy. For the case of four alpha-helices, a small set of highly designable structures emerges, most of which have an analog among the known four-helix fold families; however, several packings and topologies with no analogs in protein database are identified.},
-	Address = {NEC Research Institute, 4 Independence Way, Princeton, NJ 08540, USA.},
-	Au = {Emberly, EG and Wingreen, NS and Tang, C},
-	Author = {Emberly, Eldon G and Wingreen, Ned S and Tang, Chao},
-	Da = {20020821},
-	Date-Added = {2008-03-28 11:05:07 -0700},
-	Date-Modified = {2008-03-28 11:16:49 -0700},
-	Dcom = {20020927},
-	Dep = {20020812},
-	Doi = {10.1073/pnas.162105999},
-	Edat = {2002/08/15 10:00},
-	Group = {Designability},
-	Issn = {0027-8424 (Print)},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci U S A},
-	Jt = {Proceedings of the National Academy of Sciences of the United States of America},
-	Language = {eng},
-	Lr = {20030103},
-	Mh = {Amino Acid Sequence; Databases, Protein; Drug Design; Models, Molecular; *Protein Structure, Secondary; *Protein Structure, Tertiary; Proteins/*chemistry},
-	Mhda = {2002/09/28 04:00},
-	Number = {17},
-	Own = {NLM},
-	Pages = {11163--11168},
-	Phst = {2002/08/12 {$[$}aheadofprint{$]$}},
-	Pii = {162105999},
-	Pl = {United States},
-	Pmc = {PMC123227},
-	Pmid = {12177419},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print-Electronic},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Proc Natl Acad Sci U S A. 2002 Aug 20;99(17):11163-8. Epub 2002 Aug 12.},
-	Stat = {MEDLINE},
-	Title = {Designability of alpha-helical proteins},
-	Volume = {99},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbDMIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAjjfX8QSgPYAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0VtYmVybHkAABAACAAAwXGNSQAAABEACAAAxBLjZgAAAAEAGABGbDMARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkVtYmVybHk6MjAwMi5wZGYADgASAAgAMgAwADAAMgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9FbWJlcmx5LzIwMDIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRW1iZXJseS8yMDAyLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1073/pnas.162105999}}
-
-@article{Needleman:1970bv,
-	Au = {Needleman, SB and Wunsch, CD},
-	Author = {Needleman, S B and Wunsch, C D},
-	Da = {19700729},
-	Date-Added = {2008-03-27 09:19:38 -0700},
-	Date-Modified = {2008-05-29 11:56:19 -0700},
-	Dcom = {19700729},
-	Edat = {1970/03/01},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology},
-	Language = {eng},
-	Lr = {20001218},
-	Mh = {*Amino Acid Sequence; Computers; Hemoglobins; Methods; Muramidase; Myoglobin; Probability; Ribonucleases},
-	Mhda = {1970/03/01 00:01},
-	Number = {3},
-	Own = {NLM},
-	Pages = {443--453},
-	Pii = {0022-2836(70)90057-4},
-	Pl = {ENGLAND},
-	Pmid = {5420325},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Hemoglobins); 0 (Myoglobin); EC 3.1.- (Ribonucleases); EC 3.2.1.17 (Muramidase)},
-	Sb = {IM},
-	So = {J Mol Biol. 1970 Mar;48(3):443-53.},
-	Stat = {MEDLINE},
-	Title = {A general method applicable to the search for similarities in the amino acid sequence of two proteins},
-	Volume = {48},
-	Year = {1970},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAIvam4IMTk3MC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAi9qV8QRFycAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACU5lZWRsZW1hbgAAEAAIAADBcY1JAAAAEQAIAADEEXmXAAAAAQAYAi9qbgBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6TmVlZGxlbWFuOjE5NzAucGRmAA4AEgAIADEAOQA3ADAALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTmVlZGxlbWFuLzE5NzAucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTmVlZGxlbWFuLzE5NzAucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@article{Rost:1999la,
-	Abstract = {Sequence alignments unambiguously distinguish between protein pairs of similar and non-similar structure when the pairwise sequence identity is high (>40% for long alignments). The signal gets blurred in the twilight zone of 20-35% sequence identity. Here, more than a million sequence alignments were analysed between protein pairs of known structures to re-define a line distinguishing between true and false positives for low levels of similarity. Four results stood out. (i) The transition from the safe zone of sequence alignment into the twilight zone is described by an explosion of false negatives. More than 95% of all pairs detected in the twilight zone had different structures. More precisely, above a cut-off roughly corresponding to 30% sequence identity, 90% of the pairs were homologous; below 25% less than 10% were. (ii) Whether or not sequence homology implied structural identity depended crucially on the alignment length. For example, if 10 residues were similar in an alignment of length 16 (>60%), structural similarity could not be inferred. (iii) The 'more similar than identical' rule (discarding all pairs for which percentage similarity was lower than percentage identity) reduced false positives significantly. (iv) Using intermediate sequences for finding links between more distant families was almost as successful: pairs were predicted to be homologous when the respective sequence families had proteins in common. All findings are applicable to automatic database searches.},
-	Address = {EMBL, Heidelberg, Germany.},
-	Au = {Rost, B},
-	Author = {Rost, B},
-	Da = {19990603},
-	Date-Added = {2008-03-27 08:20:13 -0700},
-	Date-Modified = {2008-05-29 12:02:11 -0700},
-	Dcom = {19990603},
-	Edat = {1999/04/09},
-	Jid = {8801484},
-	Journal = {Protein Eng},
-	Jt = {Protein engineering},
-	Language = {eng},
-	Lr = {20061115},
-	Mh = {Computer Simulation; Databases, Factual; False Positive Reactions; Models, Statistical; Sequence Alignment/*methods; *Sequence Homology, Amino Acid},
-	Mhda = {1999/04/09 00:01},
-	Number = {2},
-	Own = {NLM},
-	Pages = {85--94},
-	Pl = {ENGLAND},
-	Pmid = {10195279},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print},
-	Sb = {IM},
-	So = {Protein Eng. 1999 Feb;12(2):85-94. },
-	Stat = {MEDLINE},
-	Title = {Twilight zone of protein sequence alignments},
-	Volume = {12},
-	Year = {1999},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABr/eAIMTk5OS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAlRzsA27O0AAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFJvc3QAEAAIAADBcY1JAAAAEQAIAADAN11tAAAAAQAYAGv94ABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6Um9zdDoxOTk5LnBkZgAADgASAAgAMQA5ADkAOQAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Sb3N0LzE5OTkucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1Jvc3QvMTk5OS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Benson:1999dq,
-	Abstract = {The unusual bacteriophage PRD1 features a membrane beneath its icosahedral protein coat. The crystal structure of the major coat protein, P3, at 1.85 A resolution reveals a molecule with three interlocking subunits, each with two eight-stranded viral jelly rolls normal to the viral capsid, and putative membrane-interacting regions. Surprisingly, the P3 molecule closely resembles hexon, the equivalent protein in human adenovirus. Both viruses also have similar overall architecture, with identical capsid lattices and attachment proteins at their vertices. Although these two dsDNA viruses infect hosts from very different kingdoms, their striking similarities, from major coat protein through capsid architecture, strongly suggest their evolutionary relationship.},
-	Address = {The Wistar Institute, Philadelphia, Pennsylvania 19104, USA.},
-	Au = {Benson, SD and Bamford, JK and Bamford, DH and Burnett, RM},
-	Author = {Benson, S D and Bamford, J K and Bamford, D H and Burnett, R M},
-	Da = {19991015},
-	Date-Added = {2008-03-27 07:36:49 -0700},
-	Date-Modified = {2008-03-27 07:36:59 -0700},
-	Dcom = {19991015},
-	Edat = {1999/09/28},
-	Issn = {0092-8674 (Print)},
-	Jid = {0413066},
-	Journal = {Cell},
-	Jt = {Cell},
-	Language = {eng},
-	Lr = {20061115},
-	Mh = {Adenoviruses, Human/chemistry; Amino Acid Sequence; Capsid/*chemistry; *Capsid Proteins; Crystallization; Crystallography, X-Ray; Evolution, Molecular; Models, Molecular; Molecular Sequence Data; Protein Conformation; Synchrotrons; Tectiviridae/*chemistry},
-	Mhda = {1999/09/28 00:01},
-	Number = {6},
-	Own = {NLM},
-	Pages = {825--833},
-	Pii = {S0092-8674(00)81516-0},
-	Pl = {UNITED STATES},
-	Pmid = {10499799},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Capsid Proteins); 0 (hexon capsid protein, Adenovirus); 0 (protein P3, bacteriophage PRD1)},
-	Sb = {IM; S},
-	Si = {PDB/1CJD; PDB/R1CJDSF},
-	So = {Cell. 1999 Sep 17;98(6):825-33. },
-	Stat = {MEDLINE},
-	Title = {Viral evolution revealed by bacteriophage PRD1 and human adenovirus coat protein structures},
-	Volume = {98},
-	Year = {1999},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa+4IMTk5OS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAi9DusQQ/5sAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkJlbnNvbgAQAAgAAMFxjUkAAAARAAgAAMQRYgsAAAABABgARmvuAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpCZW5zb246MTk5OS5wZGYAAA4AEgAIADEAOQA5ADkALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvQmVuc29uLzE5OTkucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0JlbnNvbi8xOTk5LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Nandhagopal:2002mb,
-	Abstract = {Paramecium bursaria Chlorella virus type 1 (PBCV-1) is a very large, icosahedral virus containing an internal membrane enclosed within a glycoprotein coat consisting of pseudohexagonal arrays of trimeric capsomers. Each capsomer is composed of three molecules of the major capsid protein, Vp54, the 2.0-A resolution structure of which is reported here. Four N-linked and two O-linked glycosylation sites were identified. The N-linked sites are associated with nonstandard amino acid motifs as a result of glycosylation by virus-encoded enzymes. Each monomer of the trimeric structure consists of two eight-stranded, antiparallel beta-barrel, "jelly-roll" domains related by a pseudo-sixfold rotation. The fold of the monomer and the pseudo-sixfold symmetry of the capsomer resembles that of the major coat proteins in the double-stranded DNA bacteriophage PRD1 and the double-stranded DNA human adenoviruses, as well as the viral proteins VP2-VP3 of picornaviruses. The structural similarities among these diverse groups of viruses, whose hosts include bacteria, unicellular eukaryotes, plants, and mammals, make it probable that their capsid proteins have evolved from a common ancestor that had already acquired a pseudo-sixfold organization. The trimeric capsid protein structure was used to produce a quasi-atomic model of the 1,900-A diameter PBCV-1 outer shell, based on fitting of the Vp54 crystal structure into a three-dimensional cryoelectron microscopy image reconstruction of the virus.},
-	Address = {Department of Biological Sciences, Purdue University, West Lafayette, IN 47907, USA.},
-	Au = {Nandhagopal, N and Simpson, AA and Gurnon, JR and Yan, X and Baker, TS and Graves, MV and Van Etten, JL and Rossmann, MG},
-	Author = {Nandhagopal, N and Simpson, A A and Gurnon, J R and Yan, X and Baker, T S and Graves, M V and Van Etten, J L and Rossmann, M G},
-	Da = {20021113},
-	Date-Added = {2008-03-27 07:16:06 -0700},
-	Date-Modified = {2008-05-30 02:46:22 -0700},
-	Dcom = {20030115},
-	Dep = {20021031},
-	Edat = {2002/11/02 04:00},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci USA},
-	Jt = {Proceedings of the National Academy of Sciences of the United States of America},
-	Language = {eng},
-	Lr = {20061115},
-	Mh = {Capsid/*chemistry/ultrastructure; Cryoelectron Microscopy; DNA Viruses/*chemistry; Evolution, Molecular; Glycosylation; Models, Molecular; Protein Conformation; Protein Subunits/chemistry},
-	Mhda = {2003/01/16 04:00},
-	Number = {23},
-	Own = {NLM},
-	Pages = {14758--14763},
-	Phst = {2002/10/31 {$[$}aheadofprint{$]$}},
-	Pii = {232580699},
-	Pl = {United States},
-	Pmc = {PMC137492},
-	Pmid = {12411581},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print-Electronic},
-	Rn = {0 (Protein Subunits)},
-	Sb = {IM},
-	Si = {PDB/1J5Q; PDB/1M3Y; PDB/1M4X},
-	So = {Proc Natl Acad Sci U S A. 2002 Nov 12;99(23):14758-63. Epub 2002 Oct 31.},
-	Stat = {MEDLINE},
-	Title = {The structure and evolution of the major capsid protein of a large, lipid-containing {DNA} virus},
-	Volume = {99},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGcAAAAAAGcAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAInwdIIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAifBtcQQT5QAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAC05hbmRoYWdvcGFsAAAQAAgAAMFxjUkAAAARAAgAAMQQsgQAAAABABgCJ8HSAEZrzwBGatUARmobAEZkaABAiUMAAgBGaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpOYW5kaGFnb3BhbDoyMDAyLnBkZgAOABIACAAyADAAMAAyAC4AcABkAGYADwAIAAMAaABzAHIAEgBCVXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL05hbmRoYWdvcGFsLzIwMDIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxApLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTmFuZGhhZ29wYWwvMjAwMi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AlkCXgJnAnICdgKEAosClALAAsUCyAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALV},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1073/pnas.232580699}}
-
-@article{nemenman-2002-14,
-	Author = {Ilya Nemenman and Fariel Shafee and William Bialek},
-	Date-Added = {2008-03-25 16:58:29 -0700},
-	Date-Modified = {2008-03-25 16:59:36 -0700},
-	Group = {Next},
-	Journal = {ADVANCES IN NEURAL INFORMATION PROCESSING SYSTEMS},
-	Title = {Entropy and inference, revisited},
-	Url = {http://www.citebase.org/abstract?id=oai:arXiv.org:physics/0108025},
-	Volume = {14},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAIdefIIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAh15zsQO3+tQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACE5lbWVubWFuABAACAAAwXGNSQAAABEACAAAxA9CWwAAAAEAGAIdefIARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOk5lbWVubWFuOjIwMDIucGRmAAAOABIACAAyADAAMAAyAC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL05lbWVubWFuLzIwMDIucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL05lbWVubWFuLzIwMDIucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==},
-	Bdsk-Url-1 = {http://www.citebase.org/abstract?id=oai:arXiv.org:physics/0108025}}
-
-@article{Wicker:2008hb,
-	Abstract = {Dirichlet distributions are natural choices to analyse data described by frequencies or proportions since they are the simplest known distributions for such data apart from the uniform distribution. They are often used whenever proportions are involved, for example, in text-mining, image analysis, biology or as a prior of a multinomial distribution in Bayesian statistics. As the Dirichlet distribution belongs to the exponential family, its parameters can be easily inferred by maximum likelihood. Parameter estimation is usually performed with the Newton-Raphson algorithm after an initialisation step using either the moments or Ronning's methods. However this initialisation can result in parameters that lie outside the admissible region. A simple and very efficient alternative based on a maximum likelihood approximation is presented. The advantages of the presented method compared to two other methods are demonstrated on synthetic data sets as well as for a practical biological problem: the clustering of protein sequences based on their amino acid compositions. (c) 2007 Elsevier B.V All rights reserved.},
-	Af = {Wicker, Nicolas Muller, Jean Kalathur, Ravi Kiran Reddy Poch, Olivier},
-	Author = {Wicker, N. and Muller, J. and Kalathur, R. K. R. and Poch, O.},
-	Date = {JAN 1},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Isi = {ISI:000253669700005},
-	Issn = {0167-9473},
-	Journal = {COMPUTATIONAL STATISTICS & DATA ANALYSIS},
-	Month = {Jan},
-	Number = {3},
-	Pages = {1315--1322},
-	Publication-Type = {J},
-	Title = {A maximum likelihood approximation method for Dirichlet's parameter estimation},
-	Volume = {52},
-	Year = {2008}}
-
-@article{Bernardes:2007vf,
-	Abstract = {Background: Remote homology detection is a challenging problem in Bioinformatics. Arguably, profile Hidden Markov Models (pHMMs) are one of the most successful approaches in addressing this important problem. pHMM packages present a relatively small computational cost, and perform particularly well at recognizing remote homologies. This raises the question of whether structural alignments could impact the performance of pHMMs trained from proteins in the Twilight Zone, as structural alignments are often more accurate than sequence alignments at identifying motifs and functional residues. Next, we assess the impact of using structural alignments in pHMM performance. Results: We used the SCOP database to perform our experiments. Structural alignments were obtained using the 3DCOFFEE and MAMMOTH-mult tools; sequence alignments were obtained using CLUSTALW, TCOFFEE, MAFFT and PROBCONS. We performed leave-one-family-out cross-validation over super-families. Performance was evaluated through ROC curves and paired two tailed t-test. Conclusion: We observed that pHMMs derived from structural alignments performed significantly better than pHMMs derived from sequence alignment in low-identity regions, mainly below 20%. We believe this is because structural alignment tools are better at focusing on the important patterns that are more often conserved through evolution, resulting in higher quality pHMMs. On the other hand, sensitivity of these tools is still quite low for these low-identity regions. Our results suggest a number of possible directions for improvements in this area.},
-	Af = {Bernardes, Juliana S. Davila, Alberto Mr Costa, Vitor S. Zaverucha, Gerson},
-	Author = {Bernardes, J. S. and Davila, A. M. and Costa, V. S. and Zaverucha, G.},
-	Date = {NOV 9},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Di = {ARTN 435},
-	Isi = {ISI:000253595000001},
-	Issn = {1471-2105},
-	Journal = {BMC BIOINFORMATICS},
-	Month = {Nov},
-	Pages = {435},
-	Publication-Type = {J},
-	Title = {Improving model construction of profile HMMs for remote homology detection through structural alignment},
-	Volume = {8},
-	Year = {2007}}
-
-@article{Brown:2007uk,
-	Abstract = {Function prediction by homology is widely used to provide preliminary functional annotations for genes for which experimental evidence of function is unavailable or limited. This approach has been shown to be prone to systematic error, including percolation of annotation errors through sequence databases. Phylogenomic analysis avoids these errors in function prediction but has been difficult to automate for high- throughput application. To address this limitation, we present a computationally efficient pipeline for phylogenomic classification of proteins. This pipeline uses the SCI- PHY ( Subfamily Classification in Phylogenomics) algorithm for automatic subfamily identification, followed by subfamily hidden Markov model ( HMM) construction. A simple and computationally efficient scoring scheme using family and subfamily HMMs enables classification of novel sequences to protein families and subfamilies. Sequences representing entirely novel subfamilies are differentiated from those that can be classified to subfamilies in the input training set using logistic regression. Subfamily HMM parameters are estimated using an information- sharing protocol, enabling subfamilies containing even a single sequence to benefit from conservation patterns defining the family as a whole or in related subfamilies. SCI- PHY subfamilies correspond closely to functional subtypes defined by experts and to conserved clades found by phylogenetic analysis. Extensive comparisons of subfamily and family HMM performances show that subfamily HMMs dramatically improve the separation between homologous and non- homologous proteins in sequence database searches. Subfamily HMMs also provide extremely high specificity of classification and can be used to predict entirely novel subtypes. The SCI- PHY Web server at http:// phylogenomics. berkeley. edu/ SCI- PHY/ allows users to upload a multiple sequence alignment for subfamily identification and subfamily HMM construction. Biologists wishing to provide their own subfamily definitions can do so. Source code is available on the Web page. The Berkeley Phylogenomics Group PhyloFacts resource contains precalculated subfamily predictions and subfamily HMMs for more than 40,000 protein families and domains at http:// phylogenomics. berkeley. edu/ phylofacts/.},
-	Af = {Brown, Duncan P. Krishnamurthy, Nandini Sjoelander, Kimmen},
-	Author = {Brown, D. P. and Krishnamurthy, N. and Sjolander, K.},
-	Date = {AUG},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Isi = {ISI:000249767100012},
-	Issn = {1553-734X},
-	Journal = {PLOS COMPUTATIONAL BIOLOGY},
-	Month = {Aug},
-	Number = {8},
-	Pages = {1526--1538},
-	Publication-Type = {J},
-	Title = {Automated protein subfamily identification and classification},
-	Volume = {3},
-	Year = {2007}}
-
-@article{Krishnamurthy:2007by,
-	Abstract = {Background: Function prediction by transfer of annotation from the top database hit in a homology search has been shown to be prone to systematic error. Phylogenomic analysis reduces these errors by inferring protein function within the evolutionary context of the entire family. However, accuracy of function prediction for multi-domain proteins depends on all members having the same overall domain structure. By contrast, most common homolog detection methods are optimized for retrieving local homologs, and do not address this requirement. Results: We present FlowerPower, a novel clustering algorithm designed for the identification of global homologs as a precursor to structural phylogenomic analysis. Similar to methods such as PSIBLAST, FlowerPower employs an iterative approach to clustering sequences. However, rather than using a single HMM or profile to expand the cluster, FlowerPower identifies subfamilies using the SCI-PHY algorithm and then selects and aligns new homologs using subfamily hidden Markov models. FlowerPower is shown to outperform BLAST, PSI-BLAST and the UCSC SAM-Target 2K methods at discrimination between proteins in the same domain architecture class and those having different overall domain structures. Conclusion: Structural phylogenomic analysis enables biologists to avoid the systematic errors associated with annotation transfer; clustering sequences based on sharing the same domain architecture is a critical first step in this process. FlowerPower is shown to consistently identify homologous sequences having the same domain architecture as the query.},
-	Af = {Krishnamurthy, Nandini Brown, Duncan Sjoelander, Kimmen},
-	Author = {Krishnamurthy, N. and Brown, D. and Sjolander, K.},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Di = {ARTN S12},
-	Isi = {ISI:000249891000012},
-	Issn = {1471-2148},
-	Journal = {BMC EVOLUTIONARY BIOLOGY},
-	Pages = {S12},
-	Publication-Type = {J},
-	Supplement = {Suppl. 1},
-	Title = {FlowerPower: clustering proteins into domain architecture classes for phylogenomic inference of protein function},
-	Volume = {7},
-	Year = {2007}}
-
-@article{Won:2006eq,
-	Abstract = {A genetic algorithm (GA) is proposed for finding the structure of hidden Markov Models (HMMs) used for biological sequence analysis. The GA is designed to preserve biologically meaningful building blocks. The search through the space of HMM structures is combined with optimization of the emission and transition probabilities using the classic Baum-Welch algorithm. The system is tested on the problem of finding the promoter and coding region of C jejuni. The resulting HMM has a superior discrimination ability to a handcrafted model that has been published in the literature.},
-	Author = {Won, K. J. and Prugel-Bennett, A. and Krogh, A.},
-	Date = {FEB},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Isi = {ISI:000235725500004},
-	Issn = {1089-778X},
-	Journal = {IEEE TRANSACTIONS ON EVOLUTIONARY COMPUTATION},
-	Month = {Feb},
-	Number = {1},
-	Pages = {39--49},
-	Publication-Type = {J},
-	Title = {Evolving the structure of hidden Markov Models},
-	Volume = {10},
-	Year = {2006}}
-
-@article{Olsen:2005fq,
-	Abstract = {Sequence divergence among orthologous proteins was characterized with 34 amino acid replacement matrices, sequence context analysis, and a phylogenetic tree. The model was trained on very large datasets of aligned protein sequences drawn from 15 organisms including protists, plants, Dictyostelium, fungi, and animals. Comparative tests with models currently used in phylogeny, i.e., with JTT + Gamma +/- F and WAG + Gamma +/- F, made on a test dataset of 380 multiple alignments containing protein sequences from all five of the major taxonomic groups mentioned, indicate that our model should be preferred over the JTT + Gamma +/- F and WAG + Gamma +/- F models on datasets similar to the test dataset. The strong performance of our model of orthologous protein sequence divergence can be attributed to its ability to better approximate amino acid equilibrium frequencies to compositions found in alignment columns.},
-	Author = {Olsen, R. and Loomis, W. F.},
-	Date = {NOV},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Isi = {ISI:000232980100010},
-	Issn = {0022-2844},
-	Journal = {JOURNAL OF MOLECULAR EVOLUTION},
-	Month = {Nov},
-	Number = {5},
-	Pages = {659--665},
-	Publication-Type = {J},
-	Title = {A collection of amino acid replacement matrices derived from clusters of orthologs},
-	Volume = {61},
-	Year = {2005}}
-
-@article{Wistrand:2005oj,
-	Abstract = {Background: Profile hidden Markov model (HMM) techniques are among the most powerful methods for protein homology detection. Yet, the critical features for successful modelling are not fully known. In the present work we approached this by using two of the most popular HMM packages: SAM and HMMER. The programs' abilities to build models and score sequences were compared on a SCOP/Pfam based test set. The comparison was done separately for local and global HMM scoring. Results: Using default settings, SAM was overall more sensitive. SAM's model estimation was superior, while HMMER's model scoring was more accurate. Critical features for model building were then analysed by comparing the two packages' algorithmic choices and parameters. The weighting between prior probabilities and multiple alignment counts held the primary explanation why SAM's model building was superior. Our analysis suggests that HMMER gives too much weight to the sequence counts. SAM's emission prior probabilities were also shown to be more sensitive. The relative sequence weighting schemes are different in the two packages but performed equivalently. Conclusion: SAM model estimation was more sensitive, while HMMER model scoring was more accurate. By combining the best algorithmic features from both packages the accuracy was substantially improved compared to their default performance.},
-	Author = {Wistrand, M. and Sonnhammer, E. L. L.},
-	Date = {APR 15},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Di = {ARTN 99},
-	Isi = {ISI:000229027100001},
-	Issn = {1471-2105},
-	Journal = {BMC BIOINFORMATICS},
-	Month = {Apr},
-	Pages = {99},
-	Publication-Type = {J},
-	Title = {Improved profile HMM performance by assessment of critical algorithmic features in SAM and HMMER},
-	Volume = {6},
-	Year = {2005}}
-
-@article{Wistrand:2004wt,
-	Abstract = {Profile hidden Markov models (HMMs) are used to model protein families and for detecting evolutionary relationships between proteins. Such a profile HMM is typically constructed from a multiple alignment of a set of related sequences. Transition probability parameters in an HMM are used to model insertions and deletions in the alignment. We show here that taking into account unrelated sequences when estimating the transition probability parameters helps to construct more discriminative models for the global/local alignment mode. After normal HMM training, a simple heuristic is employed that adjusts the transition probabilities between match and delete states according to observed transitions in the training set relative to the unrelated (noise) set. The method is called adaptive transition probabilities (ATP) and is based on the HMMER package implementation. It was benchmarked in two remote homology tests based on the Pfam and the SCOP classifications. Compared to the HMMER default procedure, the rate of misclassification was reduced significantly in both tests and across all levels of error rate. (C) 2004 Elsevier Ltd. All rights reserved.},
-	Author = {Wistrand, M. and Sonnhammer, E. L. L.},
-	Date = {MAY 7},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Isi = {ISI:000221160400018},
-	Issn = {0022-2836},
-	Journal = {JOURNAL OF MOLECULAR BIOLOGY},
-	Month = {May},
-	Number = {4},
-	Pages = {847--854},
-	Publication-Type = {J},
-	Title = {Improving profile HMM discrimination by adapting transition probabilities},
-	Volume = {338},
-	Year = {2004}}
-
-@article{Sadreyev:2003wo,
-	Abstract = {We present a novel method for the comparison of multiple protein alignments with assessment of statistical significance (COMPASS). The method derives numerical profiles from alignments, constructs optimal local profile-profile alignments and analytically estimates E-values for the detected similarities. The scoring system and E-value calculation are based on a generalization of the PSI-BLAST approach to profile-sequence comparison, which is adapted for the profile-profile case. Tested along with existing methods for profile-sequence (PSI-BLAST) and profile-profile (prof_sim) comparison, COMPASS shows increased abilities for sensitive and selective detection of remote sequence similarities, as well as improved quality of local alignments. The method allows prediction of relationships between protein families in the PFAM database beyond the range of conventional methods. Two predicted relations with high significance are similarities between various Rossmann-type folds and between various helix-turn-helix-containing families. The potential value of COMPASS for structure/function predictions is illustrated by the detection of an intricate homology between the DNA-binding domain of the CTF/NFI family and the MH1 domain of the Smad family. (C) 2003 Elsevier Science Ltd. All rights reserved.},
-	Author = {Sadreyev, R. and Grishin, N.},
-	Date = {FEB 7},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Isi = {ISI:000180997600027},
-	Issn = {0022-2836},
-	Journal = {JOURNAL OF MOLECULAR BIOLOGY},
-	Month = {Feb},
-	Number = {1},
-	Pages = {317--336},
-	Publication-Type = {J},
-	Title = {COMPASS: A tool for comparison of multiple protein alignments with assessment of statistical significance},
-	Volume = {326},
-	Year = {2003}}
-
-@article{Head-Gordon:2001si,
-	Abstract = {The goal of computational biology in the early twenty-first century is to link the various genome sequencing projects to a high-throughput effort in complete structural and functional annotation of whole genomes or biological pathways. If is, in fact, a logical extension of the genome effort to systematically elaborate DNA (deoxyribonucleic acid) sequences into full three-dimensional structures through to functional analysis of cellular networks. The first level of the biological hierarchy is comparative analysis of the rapidly emerging genomic data at the sequence level. However, knowing only the sequence of DNA does not always tell us about the structure or function of the genes, nor does it fell us about the combined action of their protein products, which is the essence of higher order biological function. Complete annotation will include the determination of structure and function of proteins, and a move from analysis of these individual macromolecules to their complex interactions that make up the processes of cellular decisions. This paper represents an effort by a research community to define the hard computational biology problems of the future, to define what mixture of basic research directions and practical algorithmic approaches will be required to achieve our goals, and to outline the directions that will likely be taken in the postgenomic era.},
-	Author = {Head-Gordon, T. and Wooley, J. C.},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Isi = {ISI:000169186500003},
-	Issn = {0018-8670},
-	Journal = {IBM SYSTEMS JOURNAL},
-	Number = {2},
-	Pages = {265--296},
-	Publication-Type = {J},
-	Title = {Computational challenges in structural and functional genomics},
-	Volume = {40},
-	Year = {2001}}
-
-@article{Ng:2001le,
-	Abstract = {Many missense substitutions are identified in single nucleotide polymorphism (SNP) data and large-scale random mutagenesis projects. Each amino acid substitution potentially affects protein Function. We have constructed a tool that uses sequence homology to predict whether a substitution affects protein function. SIFT, which sorts intolerant from tolerant substitutions, classifies substitutions as tolerated or deleterious. A higher proportion of substitutions predicted to be deleterious by SIFT gives an affected phenotype than substitutions predicted to be deleterious by substitution scoring matrices in three test cases, Using SIFT before mutagenesis studies could reduce the number of functional assays required and yield a higher proportion of affected phenotypes. SIFT may be used to identify plausible disease candidates among the SNPs that cause missense substitutions.},
-	Author = {Ng, P. C. and Henikoff, S.},
-	Date = {MAY},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Isi = {ISI:000168501600021},
-	Issn = {1088-9051},
-	Journal = {GENOME RESEARCH},
-	Month = {May},
-	Number = {5},
-	Pages = {863--874},
-	Publication-Type = {J},
-	Title = {Predicting deleterious amino acid substitutions},
-	Volume = {11},
-	Year = {2001}}
-
-@misc{Henikoff:2000ez,
-	Author = {Henikoff, S. and Henikoff, J. G.},
-	Booktitle = {ADVANCES IN PROTEIN CHEMISTRY},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:10:40 -0700},
-	Isi = {ISI:000087752000004},
-	Issn = {0065-3233},
-	Journal = {ADVANCES IN PROTEIN CHEMISTRY, VOL 54},
-	Pages = {73--97},
-	Publication-Type = {S},
-	Title = {Amino acid substitution matrices},
-	Volume = {54},
-	Year = {2000}}
-
-@misc{Das:2000tx,
-	Author = {Das, S. and Smith, T. F.},
-	Booktitle = {ADVANCES IN PROTEIN CHEMISTRY},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Isi = {ISI:000087752000007},
-	Issn = {0065-3233},
-	Journal = {ADVANCES IN PROTEIN CHEMISTRY, VOL 54},
-	Pages = {159--183},
-	Publication-Type = {S},
-	Title = {Identifying nature's protein lego set},
-	Volume = {54},
-	Year = {2000}}
-
-@misc{Koonin:2000wj,
-	Author = {Koonin, E. V. and Wolf, Y. I. and Aravind, L.},
-	Booktitle = {ADVANCES IN PROTEIN CHEMISTRY},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Isi = {ISI:000087752000009},
-	Issn = {0065-3233},
-	Journal = {ADVANCES IN PROTEIN CHEMISTRY, VOL 54},
-	Pages = {245--275},
-	Publication-Type = {S},
-	Title = {Protein fold recognition using sequence profiles and its application in structural genomics},
-	Volume = {54},
-	Year = {2000}}
-
-@article{Karplus:1998lk,
-	Abstract = {Motivation: A new hidden Markov model method (SAM-T98) for finding remote homologs of protein sequences is described and evaluated. The method begins with a simple target sequence and iteratively builds a hidden Markov model (HMM) from the sequence and homologs found using die HMM for database search. SAM-T98 is also used to construct model libraries automatically, from sequences in structural databases. Methods: We evaluate the SAM-T98 method with foul datasets. Three of the test sets are fold-recognition tests, where the correct answers are determined by structural similarity. The fourth uses a curated database. The method is compared against WU-BLASTP and against DOUBLE-BLAST, a two-step method similar to ISS, but using BLAST instead of FASTA. Results: SAM-T98 had the fewest errors in all tests- dramatically so for the fold-recognition tests. At the minimum-error point on the SCOP (Structural Classification of Proteins)-domains test, SAM-T98 got 880 flue positives and 68 false positives, DOUBLE-BLAST got 533 true positives with 71 false positives, ann WU-BLASTP got 353 true positives with 24 false positives. The method is optimized to recognize superfamilies, and would require parameter adjustment to be used to find family or fold relationships, One key to the performance of the HMM method is a new score-normalization technique that compares the score to the score with a reversed model rather than to a uniform null model.},
-	Author = {Karplus, K. and Barrett, C. and Hughey, R.},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Isi = {ISI:000078291700004},
-	Issn = {1367-4803},
-	Journal = {BIOINFORMATICS},
-	Number = {10},
-	Pages = {846--856},
-	Publication-Type = {J},
-	Title = {Hidden Markov models for detecting remote protein homologies},
-	Volume = {14},
-	Year = {1998}}
-
-@article{Eddy:1998ng,
-	Abstract = {The recent literature on profile hidden Markov model (profile HMM) methods and software is reviewed. Profile HMMs turn a multiple sequence alignment into a position-specific scoring system suitable for searching databases for remotely homologous sequences. Profile HMM analyses complement standard pail-wise comparison methods for large-scale sequence analysis. Several software implementations and two large libraries of profile HMMs of common protein domains are available. HMM methods peformed comparably to threading methods in the CASP2 structure prediction exercise.},
-	Author = {Eddy, S. R.},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Isi = {ISI:000077489900002},
-	Issn = {1367-4803},
-	Journal = {BIOINFORMATICS},
-	Number = {9},
-	Pages = {755--763},
-	Publication-Type = {J},
-	Title = {Profile hidden Markov models},
-	Volume = {14},
-	Year = {1998}}
-
-@article{Park:1998bq,
-	Abstract = {The sequences of related proteins can diverge beyond the point where their relationship can be recognised by pairwise sequence comparisons. In attempts to overcome this limitation, methods have been developed that use as a query, not a single sequence, but sets of related sequences or a representation of the characteristics shared by related sequences. Here we describe an assessment of three of these methods: the SAM-T98 implementation of a hidden Markov model procedure; PSI-BLAST; and the intermediate sequence search (ISS) procedure. We determined the extent to which these procedures can detect evolutionary relationships between the members of the sequence database PDBD40-J. This database, derived from the structural classification of proteins (SCOP), contains the sequences of proteins of known structure whose sequence identities with each other are 40% or less. The evolutionary relationships that exist between those that have low sequence identities were found by the examination of their structural details and, in many cases, their functional features. For nine false positive predictions out of a possible 432,680, i.e. at a false positive rate of about 1/50,000, SAM-T98 found 35% of the true homologous relationships in PDBD40-J, whilst PSI-BLAST found 30% and ISS found 25%. Overall, this is about twice the number of PDBD40-J relations that can be detected by the pairwise comparison procedures FASTA (17%) and GAP-BLAST (15%). For distantly related sequences in PDBD40-J, those pairs whose sequence identity is less than 30%, SAM-T98 and PSI-BLAST detect three times the number of relationships found by the pairwise methods. (C) 1998 Academic Press.},
-	Author = {Park, J. and Karplus, K. and Barrett, C. and Hughey, R. and Haussler, D. and Hubbard, T. and Chothia, C.},
-	Date = {DEC 11},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Isi = {ISI:000077467300031},
-	Issn = {0022-2836},
-	Journal = {JOURNAL OF MOLECULAR BIOLOGY},
-	Month = {Dec},
-	Number = {4},
-	Pages = {1201--1210},
-	Publication-Type = {J},
-	Title = {Sequence comparisons using multiple sequences detect three times as many remote homologues as pairwise methods},
-	Volume = {284},
-	Year = {1998}}
-
-@article{Tarnas:1998qm,
-	Abstract = {Motivation: Complete forward-backward (Baum-Welch) hidden Markov model training cannot take advantage off the linear space, divide-and-conquer sequence alignment algorithms because of the examination of all possible paths rather than the single best path. Results: This paper discusses the implementation and performance of checkpoint-based reduced space sequence alignment in the SAM hidden Markov modeling package. Implementation of the checkpoint algorithm reduced memory usage from O(mn) to O(m root n) with only a 10% slowdown for small m and n, and vast speed-up for the larger values, such as m = n = 2000, that cause excessive paying on a 96 Mbyte workstation. The results are applicable to other types of dynamic programming.},
-	Author = {Tarnas, C. and Hughey, R.},
-	Date-Added = {2008-03-24 12:09:54 -0700},
-	Date-Modified = {2008-03-24 12:09:54 -0700},
-	Isi = {ISI:000075133400004},
-	Issn = {1367-4803},
-	Journal = {BIOINFORMATICS},
-	Number = {5},
-	Pages = {401--406},
-	Publication-Type = {J},
-	Title = {Reduced space hidden Markov model training},
-	Volume = {14},
-	Year = {1998}}
-
-@article{Sjolander:1996qq,
-	Abstract = {We present a method for condensing the information in multiple alignments of proteins into a mixture of Dirichlet densities over amino acid distributions. Dirichlet mixture densities are designed to be combined with observed amino acid frequencies to form estimates of expected amino acid probabilities at each position in a profile, hidden Markov model or other statistical model. These estimates give a statistical model greater generalization capacity, so that remotely related family members can be more reliably recognized by the model. This paper corrects the previously published formula for estimating these expected probabilities, and contains complete derivations of the Dirichlet mixture formulas, methods for optimizing the mixtures to match particular databases, and suggestions for efficient implementation.},
-	Address = {Baskin Center for Computer Engineering and Information Sciences, University of California at Santa Cruz 95064, USA. kimmen@cse.ucsc.edu},
-	Au = {Sjolander, K and Karplus, K and Brown, M and Hughey, R and Krogh, A and Mian, IS and Haussler, D},
-	Author = {Sj\"{o}lander, K and Karplus, K and Brown, M and Hughey, R and Krogh, A and Mian, I S and Haussler, D},
-	Da = {19970310},
-	Date-Added = {2008-03-22 10:42:51 -0700},
-	Date-Modified = {2008-05-29 12:32:37 -0700},
-	Dcom = {19970310},
-	Edat = {1996/08/01},
-	Gr = {GM17129/GM/United States NIGMS},
-	Group = {Next},
-	Jid = {8511758},
-	Journal = {Comput Appl Biosci},
-	Jt = {Computer applications in the biosciences : CABIOS},
-	Language = {eng},
-	Lr = {20071115},
-	Mh = {Algorithms; Bayes Theorem; Databases, Factual; Evaluation Studies as Topic; *Models, Statistical; Monte Carlo Method; Probability Theory; Proteins/*genetics; Sequence Alignment/*methods/statistics \& numerical data; Sequence Homology, Amino Acid},
-	Mhda = {1996/08/01 00:01},
-	Number = {4},
-	Own = {NLM},
-	Pages = {327--345},
-	Pl = {ENGLAND},
-	Pmid = {8902360},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Comput Appl Biosci. 1996 Aug;12(4):327-45. },
-	Stat = {MEDLINE},
-	Title = {Dirichlet mixtures: a method for improved detection of weak but significant protein sequence homology},
-	Volume = {12},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAIEoG8IMTk5Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAhTtVsQKk5VQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACVNqb2xhbmRlcgAAEAAIAADBcY1JAAAAEQAIAADECvYFAAAAAQAYAgSgbwBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6U2pvbGFuZGVyOjE5OTYucGRmAA4AEgAIADEAOQA5ADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvU2pvbGFuZGVyLzE5OTYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvU2pvbGFuZGVyLzE5OTYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@article{Karplus:1997ph,
-	Abstract = {We discuss how methods based on hidden Markov models performed in the fold-recognition section of the CASP2 experiment. Hidden Markov models were built for a representative set of just over 1,000 structures from the Protein Data Bank (PDB). Each CASP2 target sequence was scored against this library of HMMs. In addition, an HMM was built for each of the target sequences and all of the sequences in PDB were scored against that target model, with a good score on both methods indicating a high probability that the target sequence is homologous to the structure. The method worked well in comparison to other methods used at CASP2 for targets of moderate difficulty, where the closest structure in PDB could be aligned to the target with at least 15% residue identity.},
-	Address = {Department, Jack Baskin School of Engineering, University of California, Santa Cruz 95064, USA. karplus@cse.ucsc.edu},
-	Au = {Karplus, K and Sjolander, K and Barrett, C and Cline, M and Haussler, D and Hughey, R and Holm, L and Sander, C},
-	Author = {Karplus, K and Sjolander, K and Barrett, C and Cline, M and Haussler, D and Hughey, R and Holm, L and Sander, C},
-	Da = {19980324},
-	Date-Added = {2008-03-18 16:14:44 -0700},
-	Date-Modified = {2008-03-18 16:14:44 -0700},
-	Dcom = {19980324},
-	Edat = {1997/01/01 00:00},
-	Gr = {GM17129/GM/United States NIGMS},
-	Group = {Next},
-	Issn = {0887-3585 (Print)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Language = {eng},
-	Lr = {20071114},
-	Mh = {*Markov Chains; *Models, Molecular; Protein Folding; Proteins/*chemistry; Reproducibility of Results; Sequence Alignment},
-	Mhda = {1998/03/05 00:01},
-	Own = {NLM},
-	Pages = {134--139},
-	Pl = {UNITED STATES},
-	Pmid = {9485505},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Proteins. 1997;Suppl 1:134-9. },
-	Stat = {MEDLINE},
-	Title = {Predicting protein structure using hidden Markov models},
-	Volume = {Suppl 1},
-	Year = {1997},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAGTOL8IMTk5Ny5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZM4/8P0ablQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0thcnBsdXMAABAACAAAwXGNSQAAABEACAAAw/TaOQAAAAEAGAGTOL8ARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkthcnBsdXM6MTk5Ny5wZGYADgASAAgAMQA5ADkANwAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9LYXJwbHVzLzE5OTcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvS2FycGx1cy8xOTk3LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Karplus:1999cs,
-	Abstract = {This paper presents results of blind predictions submitted to the CASP3 protein structure prediction experiment. We made predictions using the SAM-T98 method, an iterative hidden Markov model-based method for constructing protein family profiles. The method is purely sequence-based, using no structural information, and yet was able to predict structures as well as all but five of the structure-based methods in CASP3.},
-	Address = {John Baskin School of Engineering, University of California, Santa Cruz 95064, USA. karplus@cse.ucsc.edu},
-	Au = {Karplus, K and Barrett, C and Cline, M and Diekhans, M and Grate, L and Hughey, R},
-	Author = {Karplus, K and Barrett, C and Cline, M and Diekhans, M and Grate, L and Hughey, R},
-	Da = {19991109},
-	Date-Added = {2008-03-18 16:14:44 -0700},
-	Date-Modified = {2008-03-18 16:14:44 -0700},
-	Dcom = {19991109},
-	Edat = {1999/10/20},
-	Group = {Next},
-	Issn = {0887-3585 (Print)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Language = {eng},
-	Lr = {20061115},
-	Mh = {Algorithms; Amino Acid Sequence; Markov Chains; Molecular Sequence Data; Protein Structure, Secondary; Proteins/*chemistry; Sequence Alignment},
-	Mhda = {1999/10/20 00:01},
-	Own = {NLM},
-	Pages = {121--125},
-	Pl = {UNITED STATES},
-	Pmid = {10526360},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Proteins. 1999;Suppl 3:121-5. },
-	Stat = {MEDLINE},
-	Title = {Predicting protein structure using only sequence information},
-	Volume = {Suppl 3},
-	Year = {1999},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAGTOL8IMTk5OS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZM43sP0aZFQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0thcnBsdXMAABAACAAAwXGNSQAAABEACAAAw/TaEQAAAAEAGAGTOL8ARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkthcnBsdXM6MTk5OS5wZGYADgASAAgAMQA5ADkAOQAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9LYXJwbHVzLzE5OTkucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvS2FycGx1cy8xOTk5LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Karplus:2001pi,
-	Abstract = {This article presents results of blind predictions submitted to the CASP4 protein structure prediction experiment. We made two sets of predictions: one using the fully automated SAM-T99 server and one using the improved SAM-T2K method with human intervention. Both methods use iterative hidden Markov model-based methods for constructing protein family profiles, using only sequence information. Although the SAM-T99 method is purely sequence based, the SAM-T2K method uses the predicted secondary structure of the target sequence and the known secondary structure of the templates to improve fold recognition and alignment. In this article, we try to determine what aspects of the SAM-T2K method were responsible for its significantly better performance in the CASP4 experiment in the hopes of producing a better automatic prediction server. The use of secondary structure prediction seems to be the most valuable single improvement, though the combined total of various human interventions is probably at least as important.},
-	Address = {Computer Engineering Department, University of California, Santa Cruz, 95064, USA. karplus@soe.ucsc.edu},
-	Au = {Karplus, K and Karchin, R and Barrett, C and Tu, S and Cline, M and Diekhans, M and Grate, L and Casper, J and Hughey, R},
-	Author = {Karplus, K and Karchin, R and Barrett, C and Tu, S and Cline, M and Diekhans, M and Grate, L and Casper, J and Hughey, R},
-	Ci = {Copyright 2002 Wiley-Liss, Inc.},
-	Da = {20020208},
-	Date-Added = {2008-03-18 16:14:44 -0700},
-	Date-Modified = {2008-03-18 16:14:44 -0700},
-	Dcom = {20020827},
-	Edat = {2002/02/09 10:00},
-	Group = {Next},
-	Issn = {0887-3585 (Print)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Language = {eng},
-	Lr = {20061115},
-	Mh = {Adenosine Triphosphatases/chemistry; Bacterial Proteins/chemistry; Computer Simulation; *DNA-Binding Proteins; Endodeoxyribonucleases/chemistry; Escherichia coli Proteins/chemistry; Lyases/chemistry; *Models, Molecular; MutS DNA Mismatch-Binding Protein; Neural Networks (Computer); *Protein Conformation; Protein Structure, Tertiary; Repressor Proteins/chemistry; Research Design; Sequence Alignment; Sequence Analysis, Protein},
-	Mhda = {2002/08/28 10:01},
-	Own = {NLM},
-	Pages = {86--91},
-	Pii = {10.1002/prot.10021},
-	Pl = {United States},
-	Pmid = {11835485},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Bacterial Proteins); 0 (DNA-Binding Proteins); 0 (Escherichia coli Proteins); 0 (FadR protein, Bacteria); 0 (Repressor Proteins); EC 3.1.- (Endodeoxyribonucleases); EC 3.6.1.- (Adenosine Triphosphatases); EC 3.6.1.3 (MutS DNA Mismatch-Binding Protein); EC 3.6.1.3 (MutS protein, E coli); EC 4.- (Lyases); EC 4.99.1- (magnesium chelatase)},
-	Sb = {IM},
-	So = {Proteins. 2001;Suppl 5:86-91. },
-	Stat = {MEDLINE},
-	Title = {What is the value added by human intervention in protein structure prediction?},
-	Volume = {Suppl 5},
-	Year = {2001},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAGTOL8IMjAwMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZM4r8P0aV5QREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0thcnBsdXMAABAACAAAwXGNSQAAABEACAAAw/TZ3gAAAAEAGAGTOL8ARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkthcnBsdXM6MjAwMS5wZGYADgASAAgAMgAwADAAMQAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9LYXJwbHVzLzIwMDEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvS2FycGx1cy8yMDAxLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Cline:2002jt,
-	Abstract = {MOTIVATION: Protein sequence alignments have a myriad of applications in bioinformatics, including secondary and tertiary structure prediction, homology modeling, and phylogeny. Unfortunately, all alignment methods make mistakes, and mistakes in alignments often yield mistakes in their application. Thus, a method to identify and remove suspect alignment positions could benefit many areas in protein sequence analysis. RESULTS: We tested four predictors of alignment position reliability, including near-optimal alignment information, column score, and secondary structural information. We validated each predictor against a large library of alignments, removing positions predicted as unreliable. Near-optimal alignment information was the best predictor, removing 70% of the substantially-misaligned positions and 58% of the over-aligned positions, while retaining 86% of those aligned accurately.},
-	Address = {Center for Biomolecular Science and Engineering, Jack Baskin School of Engineering, University of California, Santa Cruz, CA 95064, USA. cline@soe.ucsc.edu},
-	Au = {Cline, M and Hughey, R and Karplus, K},
-	Author = {Cline, Melissa and Hughey, Richard and Karplus, Kevin},
-	Da = {20020215},
-	Date-Added = {2008-03-18 16:14:44 -0700},
-	Date-Modified = {2008-03-18 16:14:44 -0700},
-	Dcom = {20020722},
-	Edat = {2002/02/16 10:00},
-	Group = {Next},
-	Issn = {1367-4803 (Print)},
-	Jid = {9808944},
-	Journal = {Bioinformatics},
-	Jt = {Bioinformatics (Oxford, England)},
-	Language = {eng},
-	Lr = {20061115},
-	Mh = {Algorithms; Computational Biology; Neural Networks (Computer); Proteins/*genetics; Sequence Alignment/*statistics \& numerical data; Software},
-	Mhda = {2002/07/23 10:01},
-	Number = {2},
-	Own = {NLM},
-	Pages = {306--314},
-	Pl = {England},
-	Pmid = {11847078},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Bioinformatics. 2002 Feb;18(2):306-14. },
-	Stat = {MEDLINE},
-	Title = {Predicting reliable regions in protein sequence alignments},
-	Volume = {18},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAGTOHYJMjAwMmEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZM4i8P0aTBQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUNsaW5lAAAQAAgAAMFxjUkAAAARAAgAAMP02bAAAAABABgBkzh2AEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpDbGluZToyMDAyYS5wZGYAAA4AFAAJADIAMAAwADIAYQAuAHAAZABmAA8ACAADAGgAcwByABIAPVVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9DbGluZS8yMDAyYS5wZGYAABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAkLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ2xpbmUvMjAwMmEucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJNAlICWwJmAmoCeAJ/AogCrwK0ArcAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACxA==}}
-
-@article{Cline:2002hb,
-	Abstract = {Pairwise contact potentials have a long, successful history in protein structure prediction. They provide an easily-estimated representation of many attributes of protein structures, such as the hydrophobic effect. In order to improve on existing potentials, one should develop a clear understanding of precisely what information they convey. Here, using mutual information, we quantified the information in amino acid potentials, and the importance of hydropathy, charge, disulfide bonding, and burial. Sampling error in mutual information was controlled for by estimating how much information cannot be attributed to sampling bias. We found the information in amino acid contacts to be modest: 0.04 bits per contact. Of that, only 0.01 bits of information could not be attributed to hydropathy, charge, disulfide bonding, or burial.},
-	Address = {Center for Biomolecular Science and Engineering, Baskin School of Engineering, University of California, Santa Cruz, California 95064, USA. cline@soe.ucsc.edu},
-	Au = {Cline, MS and Karplus, K and Lathrop, RH and Smith, TF and Rogers RG, Jr and Haussler, D},
-	Author = {Cline, Melissa S and Karplus, Kevin and Lathrop, Richard H and Smith, Temple F and Rogers, Robert G Jr and Haussler, David},
-	Ci = {Copyright 2002 Wiley-Liss, Inc.},
-	Da = {20020904},
-	Date-Added = {2008-03-18 16:14:44 -0700},
-	Date-Modified = {2008-03-18 16:14:44 -0700},
-	Dcom = {20021004},
-	Doi = {10.1002/prot.10198},
-	Edat = {2002/09/05 10:00},
-	Gr = {GM17129/GM/United States NIGMS},
-	Group = {Next},
-	Issn = {1097-0134 (Electronic)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Language = {eng},
-	Lr = {20071114},
-	Mh = {Amino Acids/*chemistry; Disulfides/chemistry; Electrostatics; Hydrophobicity; *Models, Biological; Molecular Structure; Protein Conformation; Proteins/*chemistry; Solvents/chemistry},
-	Mhda = {2002/10/09 04:00},
-	Number = {1},
-	Own = {NLM},
-	Pages = {7--14},
-	Pl = {United States},
-	Pmid = {12211011},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Disulfides); 0 (Proteins); 0 (Solvents)},
-	Sb = {IM},
-	So = {Proteins. 2002 Oct 1;49(1):7-14. },
-	Stat = {MEDLINE},
-	Title = {Information-theoretic dissection of pairwise contact potentials},
-	Volume = {49},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAGTOHYIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZM4asP0aQpQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUNsaW5lAAAQAAgAAMFxjUkAAAARAAgAAMP02YoAAAABABgBkzh2AEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpDbGluZToyMDAyLnBkZgAOABIACAAyADAAMAAyAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0NsaW5lLzIwMDIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ2xpbmUvMjAwMi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1002/prot.10198}}
-
-@article{Karchin:2003sf,
-	Abstract = {An important problem in computational biology is predicting the structure of the large number of putative proteins discovered by genome sequencing projects. Fold-recognition methods attempt to solve the problem by relating the target proteins to known structures, searching for template proteins homologous to the target. Remote homologs that may have significant structural similarity are often not detectable by sequence similarities alone. To address this, we incorporated predicted local structure, a generalization of secondary structure, into two-track profile hidden Markov models (HMMs). We did not rely on a simple helix-strand-coil definition of secondary structure, but experimented with a variety of local structure descriptions, following a principled protocol to establish which descriptions are most useful for improving fold recognition and alignment quality. On a test set of 1298 nonhomologous proteins, HMMs incorporating a 3-letter STRIDE alphabet improved fold recognition accuracy by 15% over amino-acid-only HMMs and 23% over PSI-BLAST, measured by ROC-65 numbers. We compared two-track HMMs to amino-acid-only HMMs on a difficult alignment test set of 200 protein pairs (structurally similar with 3-24% sequence identity). HMMs with a 6-letter STRIDE secondary track improved alignment quality by 62%, relative to DALI structural alignments, while HMMs with an STR track (an expanded DSSP alphabet that subdivides strands into six states) improved by 40% relative to CE.},
-	Address = {Center for Biomolecular Science and Engineering, Baskin School of Engineering, University of California, Santa Cruz 95064, USA. rachelk@soe.ucsc.edu},
-	Annote = {Not really directly relevant; they combine using the full alphabet information with an HMM describing the local state of the backbone (helix, coil, sheet, etc.)  They have added parameters instead of reducing them.},
-	Au = {Karchin, R and Cline, M and Mandel-Gutfreund, Y and Karplus, K},
-	Author = {Karchin, Rachel and Cline, Melissa and Mandel-Gutfreund, Yael and Karplus, Kevin},
-	Ci = {Copyright 2003 Wiley-Liss, Inc.},
-	Da = {20030604},
-	Date-Added = {2008-03-18 16:14:44 -0700},
-	Date-Modified = {2008-03-18 16:36:17 -0700},
-	Dcom = {20030722},
-	Doi = {10.1002/prot.10369},
-	Edat = {2003/06/05 05:00},
-	Group = {Next},
-	Issn = {1097-0134 (Electronic)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Language = {eng},
-	Lr = {20061115},
-	Mh = {Algorithms; Computational Biology/methods; *Markov Chains; *Protein Folding; *Protein Structure, Secondary; Proteins/*chemistry; Reproducibility of Results; Sequence Alignment/methods},
-	Mhda = {2003/07/23 05:00},
-	Number = {4},
-	Own = {NLM},
-	Pages = {504--514},
-	Pl = {United States},
-	Pmid = {12784210},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Proteins. 2003 Jun 1;51(4):504-14. },
-	Stat = {MEDLINE},
-	Title = {Hidden Markov models that use predicted local structure for fold recognition: alphabets of backbone geometry},
-	Volume = {51},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAGTN/oIMjAwMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZM4UMP0aOxQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0thcmNoaW4AABAACAAAwXGNSQAAABEACAAAw/TZbAAAAAEAGAGTN/oARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkthcmNoaW46MjAwMy5wZGYADgASAAgAMgAwADAAMwAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9LYXJjaGluLzIwMDMucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvS2FyY2hpbi8yMDAzLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1002/prot.10369}}
-
-@article{Karchin:2004yf,
-	Abstract = {Residue burial, which describes a protein residue's exposure to solvent and neighboring atoms, is key to protein structure prediction, modeling, and analysis. We assessed 21 alphabets representing residue burial, according to their predictability from amino acid sequence, conservation in structural alignments, and utility in one fold-recognition scenario. This follows upon our previous work in assessing nine representations of backbone geometry.1 The alphabet found to be most effective overall has seven states and is based on a count of C(beta) atoms within a 14 A-radius sphere centered at the C(beta) of a residue of interest. When incorporated into a hidden Markov model (HMM), this alphabet gave us a 38% performance boost in fold recognition and 23% in alignment quality.},
-	Address = {Department of Biopharmaceutical Sciences, University of California, San Francisco 94143-2240, USA. rachelk@salilab.org},
-	Au = {Karchin, R and Cline, M and Karplus, K},
-	Author = {Karchin, Rachel and Cline, Melissa and Karplus, Kevin},
-	Ci = {Copyright 2004 Wiley-Liss, Inc.},
-	Da = {20040422},
-	Date-Added = {2008-03-18 16:14:44 -0700},
-	Date-Modified = {2008-03-18 16:14:44 -0700},
-	Dcom = {20040624},
-	Doi = {10.1002/prot.20008},
-	Edat = {2004/04/23 05:00},
-	Group = {Next},
-	Issn = {1097-0134 (Electronic)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Language = {eng},
-	Lr = {20061115},
-	Mh = {Amino Acids/*chemistry; Conserved Sequence; Markov Chains; Molecular Structure; *Protein Conformation; Protein Folding; Sequence Analysis, Protein; Solvents/chemistry},
-	Mhda = {2004/06/25 05:00},
-	Number = {3},
-	Own = {NLM},
-	Pages = {508--518},
-	Pl = {United States},
-	Pmid = {15103615},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Evaluation Studies; Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Solvents)},
-	Sb = {IM},
-	So = {Proteins. 2004 May 15;55(3):508-18. },
-	Stat = {MEDLINE},
-	Title = {Evaluation of local structure alphabets based on residue burial},
-	Volume = {55},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAGTN/oIMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAZM35sP0aLFQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0thcmNoaW4AABAACAAAwXGNSQAAABEACAAAw/TZMQAAAAEAGAGTN/oARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkthcmNoaW46MjAwNC5wZGYADgASAAgAMgAwADAANAAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9LYXJjaGluLzIwMDQucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvS2FyY2hpbi8yMDA0LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1002/prot.20008}}
-
-@article{Abkevich:2000kx,
-	Abstract = { We study the impact of disulfide bonds on protein stability and folding. Using lattice model simulations, we show that formation of a disulfide bond stabilizes a protein to an extent that depends on the distance along the chain between linked cysteine residues. However, the impact of disulfide bonds on folding kinetics varies broadly, from acceleration when disulfides are introduced in or close to the folding nucleus, to slowing when disulfides are introduced outside the nucleus. Having established the effect of disulfide bonds on stability, we study the correlation between the number of disulfide bonds and the composition of certain amino acid classes with the goal to use it as a statistical probe into factors that contribute to stability of proteins. We find that the number of disulfides is negatively correlated with aliphatic hydrophobic but not aromatic content. It is surprising that we observe a strong correlation of disulfide content with polar (Q,S,T,N) amino acid content and a strong negative correlation with charged (E,D,K,R) content. These findings provide insights into factors that determine protein stability and principles of protein design as well as possible relations of disulfide bonds and protein function.},
-	Author = {Abkevich, V I and Shakhnovich, E I},
-	Date-Added = {2008-03-18 08:22:56 -0700},
-	Date-Modified = {2008-05-29 12:19:22 -0700},
-	Journal = {J Mol Biol},
-	Keywords = {protein folding; disulfide bonds; kinetics; nucleus; lattice models},
-	Number = {4},
-	Pages = {975--985},
-	Title = {What can Disulfide Bonds Tell Us about Protein Energetics, Function and Folding: Simulations and Bioinformatics Analysis},
-	Ty = {JOUR},
-	Volume = {300},
-	Year = {2000},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa9kIMjAwMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAe7V7sQFLC9QREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEFia2V2aWNoABAACAAAwXGNSQAAABEACAAAxAWOnwAAAAEAGABGa9kARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkFia2V2aWNoOjIwMDAucGRmAAAOABIACAAyADAAMAAwAC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0Fia2V2aWNoLzIwMDAucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0Fia2V2aWNoLzIwMDAucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6WK7-45F519X-BY/2/421b6f5182df4ebe46544653064b97f3}}
-
-@article{McNeil:1984ul,
-	Author = {McNeil, B J and Hanley, J A},
-	Date-Added = {2008-03-17 23:12:59 -0700},
-	Date-Modified = {2008-03-17 23:14:02 -0700},
-	Group = {IBID},
-	Isi = {ISI:A1984SW23400002},
-	Issn = {0272-989X},
-	Journal = {Medical Decision Making},
-	Number = {2},
-	Pages = {137--150},
-	Publication-Type = {J},
-	Title = {Statistical Approaches to the Analysis of Receiver Operating Characteristic (ROC) Curves},
-	Volume = {4},
-	Year = {1984},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAHwPBcIMTk4NC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAfA7+8QGgeQAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABk1jTmVpbAAQAAgAAMFxjUkAAAARAAgAAMQG5FQAAAABABgB8DwXAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpNY05laWw6MTk4NC5wZGYAAA4AEgAIADEAOQA4ADQALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTWNOZWlsLzE5ODQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL01jTmVpbC8xOTg0LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Hanley:1982ly,
-	Author = {Hanley, J A and McNeil, B J},
-	Date-Added = {2008-03-17 23:10:25 -0700},
-	Date-Modified = {2008-05-29 12:28:57 -0700},
-	Group = {IBID},
-	Isi = {ISI:A1982NG95400006},
-	Journal = {Radiology},
-	Number = {1},
-	Pages = {29--36},
-	Publication-Type = {J},
-	Title = {The Meaning and Use of the Area Under a {Receiver Operating Characteristic} ({ROC}) Curve},
-	Volume = {143},
-	Year = {1982},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAHvBQoIMTk4Mi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAe8E7cQFTfZQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkhhbmxleQAQAAgAAMFxjUkAAAARAAgAAMQFsGYAAAABABgB7wUKAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpIYW5sZXk6MTk4Mi5wZGYAAA4AEgAIADEAOQA4ADIALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvSGFubGV5LzE5ODIucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0hhbmxleS8xOTgyLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Bastien:2005ly,
-	Abstract = {Automatic comparison of compositionally biased genomes, such as that of the malarial causative agent Plasmodium falciparum (82% adenosine + thymidine), with genomes of average composition, is currently limited. Indeed, popular tools such as BLAST require that amino acid distributions be similar in aligned sequences. However, the P. falciparum genome is so biased that six amino acids account for more than 50% of the protein composition. One reason for the comparison methods failure lies in the compositional difference between the query and the subject proteomes, which is not taken into account in the amino acid substitution matrices. This paper introduces a method to derive substitution matrices, in particular BLOSUM 62, in the frame of the information theory. It allows the construction of non-symmetrical matrices, taking into account the non-symmetric amino acid distributions. The dirAtPf family of matrices allowing the comparison of P. falciparum and A. thaliana is given as an example. This paper further provides an analysis of the obtained matrices in the frame of the information theory, supporting the discrimination advantage they bring.},
-	Address = {Laboratoire de Physiologie Cellulaire Vegetale, Departement Reponse et Dynamique Cellulaire, UMR 5019, CNRS-CEA-INRA-Universite Joseph-Fourier, CEA Grenoble, 17, rue des Martyrs, 38054 Grenoble, France.},
-	Au = {Bastien, O and Roy, S and Marechal, E},
-	Author = {Bastien, Olivier and Roy, Sylvaine and Marechal, Eric},
-	Da = {20050613},
-	Date-Added = {2008-03-17 14:14:08 -0700},
-	Date-Modified = {2008-03-17 14:14:11 -0700},
-	Dcom = {20050728},
-	Edat = {2005/06/14 09:00},
-	Issn = {1631-0691 (Print)},
-	Jid = {101140040},
-	Journal = {C R Biol},
-	Jt = {Comptes rendus biologies},
-	Language = {eng},
-	Mh = {Amino Acid Sequence; Amino Acid Substitution; Animals; Genome, Protozoan; Models, Genetic; Plasmodium falciparum/*genetics; *Proteome},
-	Mhda = {2005/07/29 09:00},
-	Number = {5},
-	Own = {NLM},
-	Pages = {445--453},
-	Pl = {France},
-	Pmid = {15948633},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteome)},
-	Sb = {IM},
-	So = {C R Biol. 2005 May;328(5):445-53.},
-	Stat = {MEDLINE},
-	Title = {Construction of non-symmetric substitution matrices derived from proteomes with biased amino acid distributions},
-	Volume = {328},
-	Year = {2005}}
-
-@article{Altschul:1991mz,
-	Abstract = {Protein sequence alignments have become an important tool for molecular biologists. Local alignments are frequently constructed with the aid of a "substitution score matrix" that specifies a score for aligning each pair of amino acid residues. Over the years, many different substitution matrices have been proposed, based on a wide variety of rationales. Statistical results, however, demonstrate that any such matrix is implicitly a "log-odds" matrix, with a specific target distribution for aligned pairs of amino acid residues. In the light of information theory, it is possible to express the scores of a substitution matrix in bits and to see that different matrices are better adapted to different purposes. The most widely used matrix for protein sequence comparison has been the PAM-250 matrix. It is argued that for database searches the PAM-120 matrix generally is more appropriate, while for comparing two specific proteins with suspected homology the PAM-200 matrix is indicated. Examples discussed include the lipocalins, human alpha 1 B-glycoprotein, the cystic fibrosis transmembrane conductance regulator and the globins.},
-	Address = {National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894.},
-	Au = {Altschul, SF},
-	Author = {Altschul, S F},
-	Da = {19910719},
-	Date-Added = {2008-03-17 14:13:42 -0700},
-	Date-Modified = {2008-03-17 14:13:46 -0700},
-	Dcom = {19910719},
-	Edat = {1991/06/05},
-	Issn = {0022-2836 (Print)},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology},
-	Language = {eng},
-	Lr = {20061115},
-	Mh = {Algorithms; *Amino Acid Sequence; Models, Statistical; Molecular Sequence Data; Protein Conformation; Proteins/chemistry/*genetics; Sequence Homology, Nucleic Acid; Thermodynamics},
-	Mhda = {1991/06/05 00:01},
-	Number = {3},
-	Own = {NLM},
-	Pages = {555--565},
-	Pii = {0022-2836(91)90193-A},
-	Pl = {ENGLAND},
-	Pmid = {2051488},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {J Mol Biol. 1991 Jun 5;219(3):555-65.},
-	Stat = {MEDLINE},
-	Title = {Amino acid substitution matrices from an information theoretic perspective},
-	Volume = {219},
-	Year = {1991}}
-
-@article{Styczynski:2008xy,
-	Annote = {10.1038/nbt0308-274},
-	Author = {Styczynski, Mark P and Jensen, Kyle L and Rigoutsos, Isidore and Stephanopoulos, Gregory},
-	Date-Added = {2008-03-17 14:10:31 -0700},
-	Date-Modified = {2008-03-17 14:10:31 -0700},
-	Isbn = {1087-0156},
-	Journal = {Nat Biotech},
-	L3 = {http://www.nature.com/nbt/journal/v26/n3/suppinfo/nbt0308-274_S1.html},
-	M3 = {10.1038/nbt0308-274},
-	Number = {3},
-	Pages = {274--275},
-	Publisher = {Nature Publishing Group},
-	Title = {BLOSUM62 miscalculations improve search performance},
-	Ty = {JOUR},
-	Url = {http://dx.doi.org/10.1038/nbt0308-274},
-	Volume = {26},
-	Year = {2008},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGaAAAAAAGaAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAHlZ1QIMjAwOC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAaxbqsP22zEAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAClN0eWN6eW5za2kAEAAIAADBcY1JAAAAEQAIAADD90uxAAAAAQAYAeVnVABGa88ARmrVAEZqGwBGZGgAQIlDAAIARWhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6U3R5Y3p5bnNraToyMDA4LnBkZgAADgASAAgAMgAwADAAOAAuAHAAZABmAA8ACAADAGgAcwByABIAQVVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9TdHljenluc2tpLzIwMDgucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QKC4uLy4uLy4uLy4uL0FydGljbGVzL1N0eWN6eW5za2kvMjAwOC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AlcCXAJlAnACdAKCAokCkgK9AsICxQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALS},
-	Bdsk-File-2 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGcAAAAAAGcAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAHlZ1QJMjAwOGEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAaxbwcP222oAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAClN0eWN6eW5za2kAEAAIAADBcY1JAAAAEQAIAADD90vqAAAAAQAYAeVnVABGa88ARmrVAEZqGwBGZGgAQIlDAAIARmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6U3R5Y3p5bnNraToyMDA4YS5wZGYADgAUAAkAMgAwADAAOABhAC4AcABkAGYADwAIAAMAaABzAHIAEgBCVXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1N0eWN6eW5za2kvMjAwOGEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxApLi4vLi4vLi4vLi4vQXJ0aWNsZXMvU3R5Y3p5bnNraS8yMDA4YS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AlkCXgJnAnICdgKEAosClALAAsUCyAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALV},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1038/nbt0308-274}}
-
-@inproceedings{Davis:2006yq,
-	Author = {Davis, Jesse and Goadrich, Mark},
-	Booktitle = {Proceedings of the 23rd International Conference on Machine Learning},
-	Date-Added = {2008-03-14 09:27:31 -0700},
-	Date-Modified = {2008-03-14 09:27:31 -0700},
-	Group = {Alphabets},
-	Title = {The Relationship Between Precision-Recall and ROC Curves},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAEGu/0IMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP4xh8PaAz5QREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABURhdmlzAAAQAAgAAMFxjUkAAAARAAgAAMPac74AAAABABgBBrv9AEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpEYXZpczoyMDA2LnBkZgAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0RhdmlzLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRGF2aXMvMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Yu:2003qv,
-	Abstract = {Amino acid substitution matrices are central to protein-comparison methods. In most commonly used matrices, the substitution scores take a log-odds form, involving the ratio of "target" to "background" frequencies derived from large, carefully curated sets of protein alignments. However, such matrices often are used to compare protein sequences with amino acid compositions that differ markedly from the background frequencies used for the construction of the matrices. Of course, the target frequencies should be adjusted in such cases, but the lack of an appropriate way to do this has been a long-standing problem. This article shows that if one demands consistency between target and background frequencies, then a log-odds substitution matrix implies a unique set of target and background frequencies as well as a unique scale. Standard substitution matrices therefore are truly appropriate only for the comparison of proteins with standard amino acid composition. Accordingly, we present and evaluate a rationale for transforming the target frequencies implicit in a standard matrix to frequencies appropriate for a nonstandard context. This rationale yields asymmetric matrices for the comparison of proteins with divergent compositions. Earlier approaches are unable to deal with this case in a fully consistent manner. Composition-specific substitution matrix adjustment is shown to be of utility for comparing compositionally biased proteins, including those of organisms with nucleotide-biased, and therefore codon-biased, genomes or isochores.},
-	Address = {National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA.},
-	Annote = {Wootton FLINKYM-PRAWG paper recommended by John Spouge, NIH.
-FLINKYM tend to be encoded by AT-rich DNA
-PRAWG by GC-rich DNA
-Happens to be similar to the 2-letter HSDM alphabet with the exceptions of W and NK
-I don' t think anything obvious jumps out at me here, except that AT vs GC tend to code for H and P (sort of)},
-	Au = {Yu, YK and Wootton, JC and Altschul, SF},
-	Author = {Yu, Yi-Kuo and Wootton, John C and Altschul, Stephen F},
-	Da = {20031224},
-	Date-Added = {2008-03-12 10:10:25 -0700},
-	Date-Modified = {2008-03-13 09:22:22 -0700},
-	Dcom = {20040420},
-	Dep = {20031208},
-	Doi = {10.1073/pnas.2533904100},
-	Edat = {2003/12/10 05:00},
-	Issn = {0027-8424 (Print)},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci U S A},
-	Jt = {Proceedings of the National Academy of Sciences of the United States of America},
-	Language = {eng},
-	Lr = {20061115},
-	Mh = {Amino Acid Sequence; Amino Acid Substitution/*genetics; Animals; Aspartate-Ammonia Ligase/chemistry/genetics; Evolution, Molecular; Gene Frequency; Molecular Sequence Data; *Mutation, Missense; Mycobacterium tuberculosis/genetics; Plasmodium falciparum/genetics; Proteins/genetics; Reproducibility of Results; Sequence Alignment; Sequence Homology, Amino Acid},
-	Mhda = {2004/04/21 05:00},
-	Number = {26},
-	Own = {NLM},
-	Pages = {15688--15693},
-	Phst = {2003/12/08 {$[$}aheadofprint{$]$}},
-	Pii = {2533904100},
-	Pl = {United States},
-	Pmid = {14663142},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, Non-P.H.S.},
-	Pubm = {Print-Electronic},
-	Rn = {0 (Proteins); EC 6.3.1.1 (Aspartate-Ammonia Ligase)},
-	Sb = {IM},
-	So = {Proc Natl Acad Sci U S A. 2003 Dec 23;100(26):15688-93. Epub 2003 Dec 8.},
-	Stat = {MEDLINE},
-	Title = {The compositional adjustment of amino acid substitution matrices},
-	Volume = {100},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGCAAAAAAGCAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbVcIMjAwMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAbTlr8P9WiBQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAAll1ABAACAAAwXGNSQAAABEACAAAw/28kAAAAAEAGABGbVcARmvPAEZq1QBGahsARmRoAECJQwACAD1oc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOll1OjIwMDMucGRmAAAOABIACAAyADAAMAAzAC4AcABkAGYADwAIAAMAaABzAHIAEgA5VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1l1LzIwMDMucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIC4uLy4uLy4uLy4uL0FydGljbGVzL1l1LzIwMDMucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQI/AkQCTQJYAlwCagJxAnoCnQKiAqUAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACsg==},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1073/pnas.2533904100}}
-
-@inproceedings{Cortes:2004lq,
-	Address = {Cambridge, MA},
-	Author = {C. Cortes and M. Mohri},
-	Booktitle = {Advances in Neural Information Processing Systems 16},
-	Date-Added = {2008-02-23 09:04:04 -0800},
-	Date-Modified = {2008-02-23 09:05:32 -0800},
-	Editor = {Sebastian Thrun and Lawrence Saul and Bernhard Scholkopf},
-	Publisher = {MIT Press},
-	Title = {AUC optimization vs. error rate minimization},
-	Url = {citeseer.ist.psu.edu/cortes03auc.html},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAFBRgoIMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAUFErMPlkdtQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkNvcnRlcwAQAAgAAMFxjUkAAAARAAgAAMPmAlsAAAABABgBQUYKAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpDb3J0ZXM6MjAwNC5wZGYAAA4AEgAIADIAMAAwADQALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvQ29ydGVzLzIwMDQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0NvcnRlcy8yMDA0LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=},
-	Bdsk-Url-1 = {citeseer.ist.psu.edu/cortes03auc.html}}
-
-@article{Kraulis:1991bq,
-	Author = {Kraulis, P J},
-	Date-Added = {2007-12-12 17:43:16 -0800},
-	Date-Modified = {2008-05-30 02:47:51 -0700},
-	Journal = {J Appl Crystallogr},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Kraulis/1991.pdf},
-	Month = {Oct},
-	Number = {5},
-	Pages = {946--950},
-	Title = {{{\it MOLSCRIPT}: a program to produce both detailed and schematic plots of protein structures}},
-	Volume = {24},
-	Year = {1991},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAB5knEIMTk5MS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHmSTryyZhsAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0tyYXVsaXMAABAACAAAwXGNSQAAABEACAAAvLLIiwAAAAEAGAB5knEARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOktyYXVsaXM6MTk5MS5wZGYADgASAAgAMQA5ADkAMQAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9LcmF1bGlzLzE5OTEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvS3JhdWxpcy8xOTkxLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1107/S0021889891004399},
-	Bdsk-Url-2 = {http://dx.doi.org/10.1107/S0021889891004399}}
-
-@book{Doolittle:1986rt,
-	Annote = {Includes bibliographical references and index},
-	Author = {Doolittle, R F},
-	Date-Added = {2007-12-12 17:33:55 -0800},
-	Date-Modified = {2008-05-29 12:02:51 -0700},
-	Publisher = {University Science Books},
-	Title = {Of URFs and ORFs: A Primer on How to Analyze Derived Amino Acid Sequences},
-	Year = {1986}}
-
-@article{ODonoghue:2003lq,
-	Abstract = {The aminoacyl-tRNA synthetases are one of the major protein components in the translation machinery. These essential proteins are found in all forms of life and are responsible for charging their cognate tRNAs with the correct amino acid. The evolution of the tRNA synthetases is of fundamental importance with respect to the nature of the biological cell and the transition from an RNA world to the modern world dominated by protein-enzymes. We present a structure-based phylogeny of the aminoacyl-tRNA synthetases. By using structural alignments of all of the aminoacyl-tRNA synthetases of known structure in combination with a new measure of structural homology, we have reconstructed the evolutionary history of these proteins. In order to derive unbiased statistics from the structural alignments, we introduce a multidimensional QR factorization which produces a nonredundant set of structures. Since protein structure is more highly conserved than protein sequence, this study has allowed us to glimpse the evolution of protein structure that predates the root of the universal phylogenetic tree. The extensive sequence-based phylogenetic analysis of the tRNA synthetases (Woese et al., Microbiol. Mol. Biol. Rev. 64:202-236, 2000) has further enabled us to reconstruct the complete evolutionary profile of these proteins and to make connections between major evolutionary events and the resulting changes in protein shape. We also discuss the effect of functional specificity on protein shape over the complex evolutionary course of the tRNA synthetases.},
-	Address = {Department of Chemistry, University of Illinois at Urbana-Champaign, Urbana, Illinois 61801, USA.},
-	Au = {O'Donoghue, P and Luthey-Schulten, Z},
-	Author = {O'Donoghue, Patrick and Luthey-Schulten, Zaida},
-	Da = {20031210},
-	Date-Added = {2007-12-12 17:23:32 -0800},
-	Date-Modified = {2007-12-12 17:28:14 -0800},
-	Dcom = {20040224},
-	Edat = {2003/12/11 05:00},
-	Gr = {5T32GM08276/GM/United States NIGMS},
-	Issn = {1092-2172 (Print)},
-	Jid = {9706653},
-	Journal = {Microbiol Mol Biol Rev},
-	Jt = {Microbiology and molecular biology reviews : MMBR},
-	Keywords = {Amino Acid Sequence; Amino Acyl-tRNA Synthetases/*chemistry/*genetics; Conserved Sequence; Evolution, Molecular; Models, Molecular; Molecular Sequence Data; Phylogeny; Protein Conformation; RNA, Transfer, Amino Acyl/chemistry/genetics; Sequence Alignment},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/O'Donoghue/2003.pdf},
-	Lr = {20071114},
-	Mhda = {2004/02/26 05:00},
-	Number = {4},
-	Own = {NLM},
-	Pages = {550--573},
-	Pl = {United States},
-	Pmid = {14665676},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.; Review},
-	Pubm = {Print},
-	Rf = {62},
-	Rn = {0 (RNA, Transfer, Amino Acyl); EC 6.1.1.- (Amino Acyl-tRNA Synthetases)},
-	Sb = {IM},
-	So = {Microbiol Mol Biol Rev. 2003 Dec;67(4):550-73. },
-	Stat = {MEDLINE},
-	Title = {On the evolution of structure in aminoacyl-tRNA synthetases},
-	Volume = {67},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGaAAAAAAGaAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAAB5j0EIMjAwMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHmPMcOFycZQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACk8nRG9ub2dodWUAEAAIAADBcY1JAAAAEQAIAADDhjpGAAAAAQAYAHmPQQBGa88ARmrVAEZqGwBGZGgAQIlDAAIARWhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6TydEb25vZ2h1ZToyMDAzLnBkZgAADgASAAgAMgAwADAAMwAuAHAAZABmAA8ACAADAGgAcwByABIAQVVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9PJ0Rvbm9naHVlLzIwMDMucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QKC4uLy4uLy4uLy4uL0FydGljbGVzL08nRG9ub2dodWUvMjAwMy5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AlcCXAJlAnACdAKCAokCkgK9AsICxQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALS}}
-
-@article{Miyazawa:1996lr,
-	Abstract = {Attractive inter-residue contact energies for proteins have been re-evaluated with the same assumptions and approximations used originally by us in 1985, but with a significantly larger set of protein crystal structures. An additional repulsive packing energy term, operative at higher densities to prevent overpacking, has also been estimated for all 20 amino acids as a function of the number of contacting residues, based on their observed distributions. The two terms of opposite sign are intended to be used together to provide an estimate of the overall energies of inter-residue interactions in simplified proteins without atomic details. To overcome the problem of how to utilize the many homologous proteins in the Protein Data Bank, a new scheme has been devised to assign different weights to each protein, based on similarities among amino acid sequences. A total of 1168 protein structures containing 1661 subunit sequences are actually used here. After the sequence weights have been applied, these correspond to an effective number of residue-residue contacts of 113,914, or about six times more than were used in the old analysis. Remarkably, the new attractive contact energies are nearly identical to the old ones, except for those with Leu and the rarer amino acids Trp and Met. The largest change found for Leu is surprising. The estimates of hydrophobicity from the contact energies for non-polar side-chains agree well with the experimental values. In an application of these contact energies, the sequences of 88 structurally distinct proteins in the Protein Data Bank are threaded at all possible positions without gaps into 189 different folds of proteins whose sequences differ from each other by at least 35% sequence identity. The native structures for 73 of 88 proteins, excluding 15 exceptional proteins such as membrane proteins, are all demonstrated to have the lowest alignment energies.},
-	Address = {Faculty of Technology Gunma Univrsity, Kiryu Gunma, Japan.},
-	Au = {Miyazawa, S and Jernigan, RL},
-	Author = {Miyazawa, S and Jernigan, R L},
-	Da = {19960516},
-	Date-Added = {2007-10-29 14:48:21 -0700},
-	Date-Modified = {2008-05-29 12:22:06 -0700},
-	Dcom = {19960516},
-	Edat = {1996/03/01},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology},
-	Keywords = {Amino Acids/*chemistry; Computer Simulation; Databases, Factual; Mathematics; *Protein Folding; Proteins/*chemistry; Sequence Homology, Amino Acid},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Miyazawa/1996.pdf},
-	Lr = {20001218},
-	Mhda = {1996/03/01 00:01},
-	Number = {3},
-	Own = {NLM},
-	Pages = {623--644},
-	Pii = {S0022-2836(96)90114-X},
-	Pl = {ENGLAND},
-	Pmid = {8604144},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Proteins)},
-	Sb = {IM},
-	So = {J Mol Biol. 1996 Mar 1;256(3):623-44.},
-	Stat = {MEDLINE},
-	Title = {Residue-residue potentials with a favorable contact pair term and an unfavorable high packing density term, for simulation and threading},
-	Volume = {256},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbL8IMTk5Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/2/sNLoxkAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACE1peWF6YXdhABAACAAAwXGNSQAAABEACAAAw0wFiQAAAAEAGABGbL8ARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOk1peWF6YXdhOjE5OTYucGRmAAAOABIACAAxADkAOQA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL01peWF6YXdhLzE5OTYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL01peWF6YXdhLzE5OTYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1006/jmbi.1996.0114}}
-
-@article{Salton:1991uq,
-	Abstract = {Recent developments in the storage, retrieval, and manipulation of large text files are described. The text analysis problem is examined, and modern approaches leading to the identification and retrieval of selected text items in response to search requests are discussed.},
-	Author = {Salton, G},
-	Da = {20070904},
-	Date-Added = {2007-09-27 11:51:25 -0700},
-	Date-Modified = {2007-09-27 11:52:03 -0700},
-	Doi = {10.1126/science.253.5023.974},
-	Edat = {1991/08/30 00:00},
-	Issn = {1095-9203 (Electronic)},
-	Jid = {0404511},
-	Journal = {Science},
-	Language = {ENG},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Salton/1991.pdf},
-	Mhda = {1991/08/30 00:00},
-	Number = {5023},
-	Own = {NLM},
-	Pages = {974--980},
-	Pii = {253/5023/974},
-	Pmid = {17775340},
-	Pst = {ppublish},
-	Pt = {JOURNAL ARTICLE},
-	Pubm = {Print},
-	So = {Science. 1991 Aug 30;253(5023):974-980.},
-	Stat = {Publisher},
-	Title = {Developments in Automatic Text Retrieval},
-	Volume = {253},
-	Year = {1991},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbPwIMTk5MS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADse7MMhSZVQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABlNhbHRvbgAQAAgAAMFxjUkAAAARAAgAAMMhrAUAAAABABgARmz8AEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpTYWx0b246MTk5MS5wZGYAAA4AEgAIADEAOQA5ADEALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvU2FsdG9uLzE5OTEucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL1NhbHRvbi8xOTkxLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1126/science.253.5023.974}}
-
-@article{Kester:2000qy,
-	Abstract = {The authors present a method to combine several independent studies of the same (continuous or semiquantitative) diagnostic test, where each study reports a complete ROC curve; a plot of the true-positive rate or sensitivity against the false-positive rate or one minus the specificity. The result of the analysis is a pooled ROC curve, with a confidence band, as opposed to earlier proposals that result in a pooled area under the ROC curve. The analysis is based on a two-parameter model for the ROC curve that can be estimated for each individual curve. The parameters are then pooled with a bivariate random-effects meta-analytic method, and a curve can be drawn from the pooled parameters. The authors propose to use a model that specifies a linear relation between the logistic transformations of sensitivity and one minus specificity. Specifically, they define V = In(sensitivity/(1 - sensitivity)) and U = In((1 - specificity)/specificity), and then D = V - U, S = V + U. The model is defined as D = alpha + betaS. The parameters alpha and beta are estimated using weighted linear regression with bootstrapping to get the standard errors, or using maximum likelihood. The authors show how the procedure works with continuous test data and with categorical test data.},
-	Address = {Department of Methodology and Statistics, Maastricht University, The Netherlands. Arnold.Kester@stat.unimaas.NL},
-	Au = {Kester, AD and Buntinx, F},
-	Author = {Kester, A D and Buntinx, F},
-	Da = {20010209},
-	Date-Added = {2007-09-27 11:44:31 -0700},
-	Date-Modified = {2007-09-28 16:29:55 -0700},
-	Dcom = {20010215},
-	Edat = {2000/11/04 11:00},
-	Issn = {0272-989X (Print)},
-	Jid = {8109073},
-	Journal = {Med Decis Making},
-	Jt = {Medical decision making : an international journal of the Society for Medical Decision Making},
-	Keywords = {Alcoholism/*diagnosis; Algorithms; Confidence Intervals; Humans; Likelihood Functions; Linear Models; Logistic Models; *Meta-Analysis; *Models, Statistical; *ROC Curve; Sensitivity and Specificity},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Kester/2000.pdf},
-	Lr = {20061115},
-	Mhda = {2001/03/03 10:01},
-	Number = {4},
-	Own = {NLM},
-	Pages = {430--439},
-	Pl = {UNITED STATES},
-	Pmid = {11059476},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	So = {Med Decis Making. 2000 Oct-Dec;20(4):430-9.},
-	Stat = {MEDLINE},
-	Title = {Meta-analysis of ROC curves},
-	Volume = {20},
-	Year = {2000},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbH4IMjAwMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADtO5MMi3FxQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABktlc3RlcgAQAAgAAMFxjUkAAAARAAgAAMMjPswAAAABABgARmx+AEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpLZXN0ZXI6MjAwMC5wZGYAAA4AEgAIADIAMAAwADAALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvS2VzdGVyLzIwMDAucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0tlc3Rlci8yMDAwLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Gribskov:1996fk,
-	Abstract = {In this paper, we borrow the idea of the receiver operating characteristic (ROC) from clinical medicine and demonstrate its application to sequence comparison. The ROC includes elements of both sensitivity and specificity, and is a quantitative measure of the usefulness of a diagnostic. The ROC is used in this work to investigate the effects of scoring table and gap penalties on database searches. Studies on three families of proteins, 4Fe-4S ferredoxins, lysR bacterial regulatory proteins, and bacterial RNA polymerase sigma-factors lead to the following conclusions: sequence families are quite idiosyncratic, but the best PAM distance for database searches using the Smith-Waterman method is somewhat larger than predicted by theoretical methods, about 200 PAM. The length independent gap penalty (gap initiation penalty) is quite important, but shows a broad peak at values of about 20-24. The length dependent gap penalty (gap extension penalty) is almost irrelevant suggesting that successful database searches rely only to a limited degree on gapped alignments. Taken together, these observations lead to the conclusion that the optimal conditions for alignments and database searches are not, and should not be expected to be, the same.},
-	Address = {San Diego Supercomputer Center, P.O. Box 85608, San Diego, CA 92186-9784, USA.},
-	Au = {Gribskov, M and Robinson, NL},
-	Author = {Gribskov, M and Robinson, N L},
-	Da = {20060427},
-	Date-Added = {2007-09-27 11:31:56 -0700},
-	Date-Modified = {2007-09-27 14:33:27 -0700},
-	Dcom = {20060605},
-	Edat = {1996/03/01 00:00},
-	Gr = {P41 RR08605/RR/NCRR},
-	Issn = {0097-8485 (Print)},
-	Jid = {7607706},
-	Journal = {Comput Chem},
-	Jt = {Computers \& chemistry},
-	Keywords = {Bacterial Proteins/genetics; Ferredoxins/genetics; *ROC Curve; Sequence Alignment; Sequence Analysis/*methods; Sigma Factor/genetics; Transcription Factors/genetics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Gribskov/1996.pdf},
-	Lr = {20061115},
-	Mhda = {2006/06/06 09:00},
-	Number = {1},
-	Own = {NLM},
-	Pages = {25--33},
-	Pii = {S0097-8485(96)80004-0},
-	Pl = {England},
-	Pmid = {16718863},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, N.I.H., Extramural; Research Support, U.S. Gov't, Non-P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Bacterial Proteins); 0 (Ferredoxins); 0 (Sigma Factor); 0 (Transcription Factors); 87609-37-8 (LysR protein, Bacteria)},
-	Sb = {IM},
-	So = {Comput Chem. 1996 Mar;20(1):25-33.},
-	Stat = {MEDLINE},
-	Title = {Use of receiver operating characteristic (ROC) analysis to evaluate sequence matching.},
-	Volume = {20},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbFQIMTk5Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADsolMMhb3JQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEdyaWJza292ABAACAAAwXGNSQAAABEACAAAwyHR4gAAAAEAGABGbFQARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkdyaWJza292OjE5OTYucGRmAAAOABIACAAxADkAOQA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0dyaWJza292LzE5OTYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0dyaWJza292LzE5OTYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Chen:2003lr,
-	Abstract = {MOTIVATION: Comprehensive performance assessment is important for improving sequence database search methods. Sensitivity, selectivity and speed are three major yet usually conflicting evaluation criteria. The average precision (AP) measure aims to combine the sensitivity and selectivity features of a search algorithm. It can be easily visualized and extended to analyze results from a set of queries. Finally, the time-AP plot can clearly show the overall performance of different search methods. RESULTS: Experiments are performed based on the SCOP database. Popular sequence comparison algorithms, namely Smith-Waterman (SSEARCH), FASTA, BLAST and PSI-BLAST are evaluated. We find that (1) the low-complexity segment filtration procedure in BLAST actually harms its overall search quality; (2) AP scores of different search methods are approximately in proportion of the logarithm of search time; and (3) homologs in protein families with many members tend to be more obscure than those in small families. This measure may be helpful for developing new search algorithms and can guide researchers in selecting most suitable search methods. AVAILABILITY: Test sets and source code of this evaluation tool are available upon request.},
-	Address = {Department of Computer Science and Technology, Beijing University, Beijing 100871, People's Republic of China. cray@pku.edu.cn},
-	Au = {Chen, Z},
-	Author = {Chen, Zhuoran},
-	Da = {20031211},
-	Date-Added = {2007-09-27 11:06:13 -0700},
-	Date-Modified = {2007-09-27 11:07:11 -0700},
-	Dcom = {20040817},
-	Edat = {2003/12/12 05:00},
-	Issn = {1367-4803 (Print)},
-	Jid = {9808944},
-	Journal = {Bioinformatics},
-	Jt = {Bioinformatics (Oxford, England)},
-	Keywords = {*Algorithms; *Databases, Protein; Information Storage and Retrieval; Proteins/*chemistry; Reproducibility of Results; Sensitivity and Specificity; Sequence Alignment/*methods; Sequence Analysis, Protein/*methods; User-Computer Interface},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Chen/2003.pdf},
-	Lr = {20061115},
-	Mhda = {2004/08/18 05:00},
-	Number = {18},
-	Own = {NLM},
-	Pages = {2456--2460},
-	Pl = {England},
-	Pmid = {14668231},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Evaluation Studies; Journal Article; Validation Studies},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Bioinformatics. 2003 Dec 12;19(18):2456-60.},
-	Stat = {MEDLINE},
-	Title = {Assessing sequence comparison methods with the average precision criterion},
-	Volume = {19},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbA0IMjAwMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADsa8MMhP0tQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABENoZW4AEAAIAADBcY1JAAAAEQAIAADDIaG7AAAAAQAYAEZsDQBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6Q2hlbjoyMDAzLnBkZgAADgASAAgAMgAwADAAMwAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9DaGVuLzIwMDMucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0NoZW4vMjAwMy5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Wilbur:1994fk,
-	Abstract = {It has been observed that statistical tests are infrequently applied in analysing differences in the performance of different retrieval methods. We believe this is explained on the one hand by the complexity of the subject, and on the other hand by the desire to avoid misleading conclusions. Because practical retrieval methods cannot be explained by simple models, parametric statistical tests are generally not suitable. Some non-parametric tests require a symmetry in the null hypothesis that seems inappropriate to the required task. A second class of non-parametric tests comprise the bootstrap methods. Here, the null hypothesis seems appropriate to practical testing, but the bootstrap assumption (that the sample may adequately represent the whole population) may be in question. If the bootstrap assumption is false, one may be led to erroneous conclusions (type I or type II errors). By using a mathematical model which approximates the behaviour of practical retrieval systems, we show that bootstrap methods perform well in performance comparisons based on actual test sets used in practice. Type I error is appropriately predictable and the power loss of the tests, when compared with the theoretically most powerful tests in the most realistic setting, may not exceed ten percentage points. We conclude that the bootstrap methods provide a practical approach to statistical testing in the field of retrieval performance analysis.},
-	Author = {Wilbur, W. .. J. ..},
-	Co = {JIOSED},
-	Date = {1994},
-	Date-Added = {2007-09-04 09:38:50 -0700},
-	Date-Modified = {2007-09-04 09:39:02 -0700},
-	Isi = {INSPEC:4766055},
-	Iso-Source-Abbreviation = {J. Inf. Sci. (UK)},
-	Issn = {1352-7460},
-	Journal = {Journal of Information Science},
-	Keywords = {[C1994-11-7250-004]},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wilbur/1994.pdf},
-	Number = {4},
-	Pages = {270--284},
-	Ps = {270-84},
-	Publication-Type = {J},
-	Title = {Non-parametric significance tests of retrieval performance comparisons},
-	Volume = {20},
-	Year = {1994},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbUkIMTk5NC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABODIsIUmdhQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABldpbGJ1cgAQAAgAAMFxjUkAAAARAAgAAMIVClgAAAABABgARm1JAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpXaWxidXI6MTk5NC5wZGYAAA4AEgAIADEAOQA5ADQALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvV2lsYnVyLzE5OTQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL1dpbGJ1ci8xOTk0LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Hochreiter:2007lr,
-	Abstract = {MOTIVATION: As more genomes are sequenced, the demand for fast gene classification techniques is increasing. To analyze a newly sequenced genome, first the genes are identified and translated into amino acid sequences which are then classified into structural or functional classes. The best-performing protein classification methods are based on protein homology detection using sequence alignment methods. Alignment methods have recently been enhanced by discriminative methods like support vector machines (SVMs) as well as by position-specific scoring matrices (PSSM) as obtained from PSI-BLAST. However, alignment methods are time consuming if a new sequence must be compared to many known sequences-the same holds for SVMs. Even more time consuming is to construct a PSSM for the new sequence. The best-performing methods would take about 25 days on present-day computers to classify the sequences of a new genome (20,000 genes) as belonging to just one specific class--however, there are hundreds of classes. Another shortcoming of alignment algorithms is that they do not build a model of the positive class but measure the mutual distance between sequences or profiles. Only multiple alignments and hidden Markov models are popular classification methods which build a model of the positive class but they show low classification performance. The advantage of a model is that it can be analyzed for chemical properties common to the class members to obtain new insights into protein function and structure. We propose a fast model-based recurrent neural network for protein homology detection, the 'Long Short-Term Memory' (LSTM). LSTM automatically extracts indicative patterns for the positive class, but in contrast to profile methods it also extracts negative patterns and uses correlations between all detected patterns for classification. LSTM is capable to automatically extract useful local and global sequence statistics like hydrophobicity, polarity, volume, polarizability and combine them with a pattern. These properties make LSTM complementary to alignment-based approaches as it does not use predefined similarity measures like BLOSUM or PAM matrices. RESULTS: We have applied LSTM to a well known benchmark for remote protein homology detection, where a protein must be classified as belonging to a SCOP superfamily. LSTM reaches state-of-the-art classification performance but is considerably faster for classification than other approaches with comparable classification performance. LSTM is five orders of magnitude faster than methods which perform slightly better in classification and two orders of magnitude faster than the fastest SVM-based approaches (which, however, have lower classification performance than LSTM). Only PSI-BLAST and HMM-based methods show comparable time complexity as LSTM, but they cannot compete with LSTM in classification performance. To test the modeling capabilities of LSTM, we applied LSTM to PROSITE classes and interpreted the extracted patterns. In 8 out of 15 classes, LSTM automatically extracted the PROSITE motif. In the remaining 7 cases alternative motifs are generated which give better classification results on average than the PROSITE motifs. AVAILABILITY: The LSTM algorithm is available from http://www.bioinf.jku.at/software/LSTM_protein/.},
-	Address = {Institute of Bioinformatics, Johannes Kepler Universitat Linz, 4040 Linz, Austria. hochreit@bioinf.jku.at},
-	Au = {Hochreiter, S and Heusel, M and Obermayer, K},
-	Author = {Hochreiter, Sepp and Heusel, Martin and Obermayer, Klaus},
-	Da = {20070809},
-	Date-Added = {2007-09-04 08:55:55 -0700},
-	Date-Modified = {2007-09-04 08:56:08 -0700},
-	Dep = {20070508},
-	Doi = {10.1093/bioinformatics/btm247},
-	Edat = {2007/05/10 09:00},
-	Issn = {1460-2059 (Electronic)},
-	Jid = {9808944},
-	Journal = {Bioinformatics},
-	Jt = {Bioinformatics (Oxford, England)},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Hochreiter/2007.pdf},
-	Mhda = {2007/05/10 09:00},
-	Number = {14},
-	Own = {NLM},
-	Pages = {1728--1736},
-	Phst = {2007/05/08 {$[$}aheadofprint{$]$}},
-	Pii = {btm247},
-	Pl = {England},
-	Pmid = {17488755},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print-Electronic},
-	Sb = {IM},
-	So = {Bioinformatics. 2007 Jul 15;23(14):1728-36. Epub 2007 May 8.},
-	Stat = {In-Process},
-	Title = {Fast model-based protein homology detection without alignment},
-	Volume = {23},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGaAAAAAAGaAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbGUIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBCl8Lh/nlQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACkhvY2hyZWl0ZXIAEAAIAADBcY1JAAAAEQAIAADC4mDpAAAAAQAYAEZsZQBGa88ARmrVAEZqGwBGZGgAQIlDAAIARWhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6SG9jaHJlaXRlcjoyMDA3LnBkZgAADgASAAgAMgAwADAANwAuAHAAZABmAA8ACAADAGgAcwByABIAQVVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Ib2NocmVpdGVyLzIwMDcucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QKC4uLy4uLy4uLy4uL0FydGljbGVzL0hvY2hyZWl0ZXIvMjAwNy5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AlcCXAJlAnACdAKCAokCkgK9AsICxQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALS},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/btm247}}
-
-@article{Weathers:2004yq,
-	Abstract = {Intrinsically disordered proteins are an important class of proteins with unique functions and properties. Here, we have applied a support vector machine (SVM) trained on naturally occurring disordered and ordered proteins to examine the contribution of various parameters (vectors) to recognizing proteins that contain disordered regions. We find that a SVM that incorporates only amino acid composition has a recognition accuracy of 87 +/- 2{\%}. This result suggests that composition alone is sufficient to accurately recognize disorder. Interestingly, SVMs using reduced sets of amino acids based on chemical similarity preserve high recognition accuracy. A set as small as four retains an accuracy of 84 +/- 2{\%}; this suggests that general physicochemical properties rather than specific amino acids are important factors contributing to protein disorder.},
-	Author = {Weathers, E A and Paulaitis, M E and Woolf, T B and Hoh, J H},
-	Date-Added = {2007-08-16 13:12:07 -0700},
-	Date-Modified = {2008-05-30 02:51:26 -0700},
-	Journal = {FEBS Lett},
-	Keywords = {Unstructured protein; Support vector machine; Amino acid composition; Protein classification; Sequence complexity},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Weathers/2004.pdf},
-	Number = {3},
-	Pages = {348--352},
-	Title = {Reduced amino acid alphabet is sufficient to accurately recognize intrinsically disordered protein},
-	Ty = {JOUR},
-	Volume = {576},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbUIIMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADE9x8Lp/XxQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACFdlYXRoZXJzABAACAAAwXGNSQAAABEACAAAwupf7AAAAAEAGABGbUIARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOldlYXRoZXJzOjIwMDQucGRmAAAOABIACAAyADAAMAA0AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1dlYXRoZXJzLzIwMDQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL1dlYXRoZXJzLzIwMDQucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6T36-4DD9176-B/2/aa949accd511bc19ce22e52c0b6332c2}}
-
-@article{Via:2007kx,
-	Abstract = {3dLOGO is a web server for the identification and analysis of conserved protein 3D substructures. Given a set of residues in a PDB (Protein Data Bank) chain, the server detects the matching substructure(s) in a set of user-provided protein structures, generates a multiple structure alignment centered on the input substructures and highlights other residues whose structural conservation becomes evident after the defined superposition. Conserved residues are proposed to the user for highlighting functional areas, deriving refined structural motifs or building sequence patterns. Residue structural conservation can be visualized through an expressly designed Java application, 3dProLogo, which is a 3D implementation of a sequence logo. The 3dLOGO server, with related documentation, is available at http://3dlogo.uniroma2.it/},
-	Address = {Centre for Molecular Bioinformatics, Department of Biology, University of Rome Tor Vergata, 00133 Rome, Italy. allegra.via@uniroma2.it},
-	Au = {Via, A and Peluso, D and Gherardini, PF and de Rinaldis, E and Colombo, T and Ausiello, G and Helmer-Citterich, M},
-	Author = {Via, Allegra and Peluso, Daniele and Gherardini, Pier Federico and de Rinaldis, Emanuele and Colombo, Teresa and Ausiello, Gabriele and Helmer-Citterich, Manuela},
-	Da = {20070716},
-	Date-Added = {2007-08-16 13:02:41 -0700},
-	Date-Modified = {2007-08-16 13:02:50 -0700},
-	Dcom = {20070803},
-	Dep = {20070508},
-	Doi = {10.1093/nar/gkm228},
-	Edat = {2007/05/10 09:00},
-	Issn = {1362-4962 (Electronic)},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research},
-	Keywords = {Algorithms; Amino Acid Sequence; Computational Biology/*methods; *Computer Graphics; Conserved Sequence; Databases, Protein; Internet; Models, Molecular; Programming Languages; *Protein Conformation; Sequence Alignment; *Software; Structural Homology, Protein},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Via/2007.pdf},
-	Mhda = {2007/08/04 09:00},
-	Number = {Web Server issue},
-	Own = {NLM},
-	Pages = {W416-9},
-	Phst = {2007/05/08 {$[$}aheadofprint{$]$}},
-	Pii = {gkm228},
-	Pl = {England},
-	Pmid = {17488847},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print-Electronic},
-	Sb = {IM},
-	So = {Nucleic Acids Res. 2007 Jul 1;35(Web Server issue):W416-9. Epub 2007 May 8.},
-	Stat = {MEDLINE},
-	Title = {3dLOGO: a web server for the identification, analysis and use of conserved protein substructures},
-	Volume = {35},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbTsIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADE9IsLp+1RQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA1ZpYQAAEAAIAADBcY1JAAAAEQAIAADC6l3EAAAAAQAYAEZtOwBGa88ARmrVAEZqGwBGZGgAQIlDAAIAPmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6VmlhOjIwMDcucGRmAA4AEgAIADIAMAAwADcALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvVmlhLzIwMDcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvVmlhLzIwMDcucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1093/nar/gkm228}}
-
-@article{Bennett:2003fj,
-	Abstract = {Computational methods such as sequence alignment and motif construction are useful in grouping related proteins into families, as well as helping to annotate new proteins of unknown function. These methods identify conserved amino acids in protein sequences, but cannot determine the specific functional or structural roles of conserved amino acids without additional study. In this work, we present 3MATRIX (http://3matrix.stanford.edu) and 3MOTIF (http://3motif.stanford.edu), a web-based sequence motif visualization system that displays sequence motif information in its appropriate three-dimensional (3D) context. This system is flexible in that users can enter sequences, keywords, structures or sequence motifs to generate visualizations. In 3MOTIF, users can search using discrete sequence motifs such as PROSITE patterns, eMOTIFs, or any other regular expression-like motif. Similarly, 3MATRIX accepts an eMATRIX position-specific scoring matrix, or will convert a multiple sequence alignment block into an eMATRIX for visualization. Each query motif is used to search the protein structure database for matches, in which the motif is then visually highlighted in three dimensions. Important properties of motifs such as sequence conservation and solvent accessible surface area are also displayed in the visualizations, using carefully chosen color shading schemes.},
-	Address = {Department of Biochemistry, Stanford University School of Medicine, Stanford, CA 94305-5307, USA.},
-	Au = {Bennett, SP and Lu, L and Brutlag, DL},
-	Author = {Bennett, Steven P and Lu, Lin and Brutlag, Douglas L},
-	Da = {20030625},
-	Date-Added = {2007-08-16 13:02:25 -0700},
-	Date-Modified = {2007-08-16 13:02:36 -0700},
-	Dcom = {20030818},
-	Edat = {2003/06/26 05:00},
-	Gr = {HG02235/HG/NHGRI},
-	Issn = {1362-4962 (Electronic)},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research},
-	Keywords = {*Amino Acid Motifs; Amino Acids/chemistry/physiology; Animals; Computer Graphics; Conserved Sequence; Databases, Protein; Internet; *Models, Molecular; Protein Conformation; Proteins/chemistry; Sequence Alignment; *Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Bennett/2003.pdf},
-	Lr = {20061115},
-	Mhda = {2003/08/19 05:00},
-	Number = {13},
-	Own = {NLM},
-	Pages = {3328--3332},
-	Pl = {England},
-	Pmid = {12824319},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Proteins)},
-	Sb = {IM},
-	Si = {PDB/1LPL},
-	So = {Nucleic Acids Res. 2003 Jul 1;31(13):3328-32.},
-	Stat = {MEDLINE},
-	Title = {3MATRIX and 3MOTIF: a protein structure visualization system for conserved sequence motifs},
-	Volume = {31},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa+0IMjAwMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADE8zMLp+rBQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0Jlbm5ldHQAABAACAAAwXGNSQAAABEACAAAwupdIAAAAAEAGABGa+0ARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkJlbm5ldHQ6MjAwMy5wZGYADgASAAgAMgAwADAAMwAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9CZW5uZXR0LzIwMDMucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQmVubmV0dC8yMDAzLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Doolittle:1985uq,
-	Au = {Doolittle, RF},
-	Author = {Doolittle, R F},
-	Da = {19860109},
-	Date-Added = {2007-08-16 12:48:32 -0700},
-	Date-Modified = {2007-08-16 13:33:35 -0700},
-	Dcom = {19860109},
-	Edat = {1985/10/01},
-	Group = {IBID},
-	Issn = {0036-8733 (Print)},
-	Jid = {0404400},
-	Journal = {Sci Am},
-	Jt = {Scientific American},
-	Keywords = {Amino Acid Sequence; Amino Acids/metabolism; DNA/genetics; Enzymes/genetics/metabolism/physiology; Evolution; Humans; Hydrogen Bonding; Protein Conformation; Proteins/genetics/*metabolism/physiology; X-Ray Diffraction},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Doolittle/1985.pdf},
-	Lr = {20041117},
-	Mhda = {1985/10/01 00:01},
-	Number = {4},
-	Own = {NLM},
-	Pages = {88--99},
-	Pl = {UNITED STATES},
-	Pmid = {4071032},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Enzymes); 0 (Proteins); 9007-49-2 (DNA)},
-	Sb = {IM},
-	So = {Sci Am. 1985 Oct;253(4):88-99.},
-	Stat = {MEDLINE},
-	Title = {Proteins},
-	Volume = {253},
-	Year = {1985},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbCsIMTk4NS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADE/TsLqAp1QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACURvb2xpdHRsZQAAEAAIAADBcY1JAAAAEQAIAADC6mUNAAAAAQAYAEZsKwBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6RG9vbGl0dGxlOjE5ODUucGRmAA4AEgAIADEAOQA4ADUALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvRG9vbGl0dGxlLzE5ODUucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRG9vbGl0dGxlLzE5ODUucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@article{Thompson:1996qy,
-	Abstract = {We introduce a novel Bayesian probabilistic method for predicting the solvent accessibilities of amino acid residues in globular proteins. Using single sequence data, this method achieves prediction accuracies higher than previously published methods. Substantially improved predictions-comparable to the highest accuracies reported in the literature to date-are obtained by representing alignments of the example proteins and their homologs as strings of residue substitution classes, depending on the side chain types observed at each alignment position. These results demonstrate the applicability of this relatively simple Bayesian approach to structure prediction and illustrate the utility of the classification methodology previously developed to extract information from aligned sets of structurally related proteins.},
-	Address = {Biophysics Research Division, University of Michigan, Ann Arbor 48109-1055, USA.},
-	Au = {Thompson, MJ and Goldstein, RA},
-	Author = {Thompson, M J and Goldstein, R A},
-	Da = {19961105},
-	Date-Added = {2007-08-16 09:45:26 -0700},
-	Date-Modified = {2007-08-16 09:45:51 -0700},
-	Dcom = {19961105},
-	Doi = {10.1002/(SICI)1097-0134(199605)25:1{$<$}38::AID-PROT4{$>$}3.0.CO;2-G},
-	Edat = {1996/05/01},
-	Gr = {R29 LM05770/LM/NLM},
-	Issn = {0887-3585 (Print)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Keywords = {Amino Acid Sequence; Amino Acids/*chemistry; Bayes Theorem; Databases, Factual; Evolution, Molecular; Information Theory; Likelihood Functions; Molecular Sequence Data; Neural Networks (Computer); *Protein Conformation; Protein Folding; Protein Structure, Secondary; Proteins/*chemistry; Sequence Alignment; Solvents},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Thompson/1996a.pdf},
-	Lr = {20061115},
-	Mhda = {2000/06/20 09:00},
-	Number = {1},
-	Own = {NLM},
-	Pages = {38--47},
-	Pii = {10.1002/(SICI)1097-0134(199605)25:1<38::AID-PROT4>3.0.CO;2-G},
-	Pl = {UNITED STATES},
-	Pmid = {8727318},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Proteins); 0 (Solvents)},
-	Sb = {IM},
-	So = {Proteins. 1996 May;25(1):38-47.},
-	Stat = {MEDLINE},
-	Title = {Predicting solvent accessibility: higher accuracy using Bayesian statistics and optimized residue substitution classes},
-	Volume = {25},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbS8JMTk5NmEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADEwBsLpzRtQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACFRob21wc29uABAACAAAwXGNSQAAABEACAAAwuoviwAAAAEAGABGbS8ARmvPAEZq1QBGahsARmRoAECJQwACAERoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlRob21wc29uOjE5OTZhLnBkZgAOABQACQAxADkAOQA2AGEALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvVGhvbXBzb24vMTk5NmEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvVGhvbXBzb24vMTk5NmEucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1002/(SICI)1097-0134(199605)25:1%7B$%3C$%7D38::AID-PROT4%7B$%3E$%7D3.0.CO;2-G}}
-
-@article{Roberts:2006lr,
-	Abstract = {BACKGROUND: Since the publication of the first draft of the human genome in 2000, bioinformatic data have been accumulating at an overwhelming pace. Currently, more than 3 million sequences and 35 thousand structures of proteins and nucleic acids are available in public databases. Finding correlations in and between these data to answer critical research questions is extremely challenging. This problem needs to be approached from several directions: information science to organize and search the data; information visualization to assist in recognizing correlations; mathematics to formulate statistical inferences; and biology to analyze chemical and physical properties in terms of sequence and structure changes. RESULTS: Here we present MultiSeq, a unified bioinformatics analysis environment that allows one to organize, display, align and analyze both sequence and structure data for proteins and nucleic acids. While special emphasis is placed on analyzing the data within the framework of evolutionary biology, the environment is also flexible enough to accommodate other usage patterns. The evolutionary approach is supported by the use of predefined metadata, adherence to standard ontological mappings, and the ability for the user to adjust these classifications using an electronic notebook. MultiSeq contains a new algorithm to generate complete evolutionary profiles that represent the topology of the molecular phylogenetic tree of a homologous group of distantly related proteins. The method, based on the multidimensional QR factorization of multiple sequence and structure alignments, removes redundancy from the alignments and orders the protein sequences by increasing linear dependence, resulting in the identification of a minimal basis set of sequences that spans the evolutionary space of the homologous group of proteins. CONCLUSION: MultiSeq is a major extension of the Multiple Alignment tool that is provided as part of VMD, a structural visualization program for analyzing molecular dynamics simulations. Both are freely distributed by the NIH Resource for Macromolecular Modeling and Bioinformatics and MultiSeq is included with VMD starting with version 1.8.5. The MultiSeq website has details on how to download and use the software: http://www.scs.uiuc.edu/~schulten/multiseq/},
-	Address = {Center for Biophysics and Computational Biology, University of Illinois at Urbana-Champaign, Urbana, IL, USA. erobert3@scs.uiuc.edu},
-	Au = {Roberts, E and Eargle, J and Wright, D and Luthey-Schulten, Z},
-	Author = {Roberts, E and Eargle, J and Wright, D and Luthey-Schulten, Z},
-	Da = {20061003},
-	Date-Added = {2007-08-14 11:45:54 -0700},
-	Date-Modified = {2008-05-29 12:04:11 -0700},
-	Dep = {20060816},
-	Edat = {2006/08/18 09:00},
-	Gr = {PHS2P41RR05969/RR/NCRR; PHS5T32GM08276/GM/NIGMS},
-	Jid = {100965194},
-	Journal = {BMC Bioinformatics},
-	Jt = {BMC bioinformatics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Roberts/2006.pdf},
-	Mhda = {2006/08/18 09:00},
-	Own = {NLM},
-	Pages = {382},
-	Phst = {2006/07/12 {$[$}received{$]$}; 2006/08/16 {$[$}accepted{$]$}; 2006/08/16 {$[$}aheadofprint{$]$}},
-	Pii = {1471-2105-7-382},
-	Pl = {England},
-	Pmid = {16914055},
-	Pst = {epublish},
-	Pt = {Journal Article; Research Support, N.I.H., Extramural; Research Support, U.S. Gov't, Non-P.H.S.},
-	Pubm = {Electronic},
-	Sb = {IM},
-	So = {BMC Bioinformatics. 2006 Aug 16;7:382.},
-	Stat = {In-Process},
-	Title = {{MultiSeq}: unifying sequence and structure data for evolutionary analysis},
-	Volume = {7},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbPMIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADDW08LnRl5QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1JvYmVydHMAABAACAAAwXGNSQAAABEACAAAwueozgAAAAEAGABGbPMARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlJvYmVydHM6MjAwNi5wZGYADgASAAgAMgAwADAANgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Sb2JlcnRzLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUm9iZXJ0cy8yMDA2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1186/1471-2105-7-382}}
-
-@article{Marti-Renom:2007kx,
-	Abstract = {The DBAli tools use a comprehensive set of structural alignments in the DBAli database to leverage the structural information deposited in the Protein Data Bank (PDB). These tools include (i) the DBAlit program that allows users to input the 3D coordinates of a protein structure for comparison by MAMMOTH against all chains in the PDB; (ii) the AnnoLite and AnnoLyze programs that annotate a target structure based on its stored relationships to other structures; (iii) the ModClus program that clusters structures by sequence and structure similarities; (iv) the ModDom program that identifies domains as recurrent structural fragments and (v) an implementation of the COMPARER method in the SALIGN command in MODELLER that creates a multiple structure alignment for a set of related protein structures. Thus, the DBAli tools, which are freely accessible via the World Wide Web at http://salilab.org/DBAli/, allow users to mine the protein structure space by establishing relationships between protein structures and their functions.},
-	Address = {Structural Genomics Unit, and California Institute for Quantitative Biomedical Research, University of California at San Francisco, San Francisco, CA 94158-2330, USA. mmarti@cipf.es},
-	Au = {Marti-Renom, MA and Pieper, U and Madhusudhan, MS and Rossi, A and Eswar, N and Davis, FP and Al-Shahrour, F and Dopazo, J and Sali, A},
-	Author = {Marti-Renom, Marc A and Pieper, Ursula and Madhusudhan, M S and Rossi, Andrea and Eswar, Narayanan and Davis, Fred P and Al-Shahrour, Fatima and Dopazo, Joaquin and Sali, Andrej},
-	Da = {20070716},
-	Date-Added = {2007-08-02 21:07:33 -0700},
-	Date-Modified = {2007-08-02 21:07:43 -0700},
-	Dep = {20070503},
-	Doi = {10.1093/nar/gkm236},
-	Edat = {2007/05/05 09:00},
-	Gr = {GM 62529/GM/NIGMS; GM074929/GM/NIGMS; GM54762/GM/NIGMS; GM71790/GM/NIGMS},
-	Issn = {1362-4962 (Electronic)},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Marti-Renom/2007.pdf},
-	Mhda = {2007/05/05 09:00},
-	Number = {Web Server issue},
-	Own = {NLM},
-	Pages = {W393-7},
-	Phst = {2007/05/03 {$[$}aheadofprint{$]$}},
-	Pii = {gkm236},
-	Pl = {England},
-	Pmid = {17478513},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, N.I.H., Extramural; Research Support, Non-U.S. Gov't},
-	Pubm = {Print-Electronic},
-	Sb = {IM},
-	So = {Nucleic Acids Res. 2007 Jul 1;35(Web Server issue):W393-7. Epub 2007 May 3.},
-	Stat = {In-Process},
-	Title = {DBAli tools: mining the protein structure space},
-	Volume = {35},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGcAAAAAAGcAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbLEIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACeyK8LGL7NQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAC01hcnRpLVJlbm9tAAAQAAgAAMFxjUkAAAARAAgAAMLGkiMAAAABABgARmyxAEZrzwBGatUARmobAEZkaABAiUMAAgBGaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpNYXJ0aS1SZW5vbToyMDA3LnBkZgAOABIACAAyADAAMAA3AC4AcABkAGYADwAIAAMAaABzAHIAEgBCVXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL01hcnRpLVJlbm9tLzIwMDcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxApLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTWFydGktUmVub20vMjAwNy5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AlkCXgJnAnICdgKEAosClALAAsUCyAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALV},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1093/nar/gkm236}}
-
-@article{Mirny:1999fj,
-	Abstract = {Here, we provide an analysis of molecular evolution of five of the most populated protein folds: immunoglobulin fold, oligonucleotide-binding fold, Rossman fold, alpha/beta plait, and TIM barrels. In order to distinguish between "historic", functional and structural reasons for amino acid conservations, we consider proteins that acquire the same fold and have no evident sequence homology. For each fold we identify positions that are conserved within each individual family and coincide when non-homologous proteins are structurally superimposed. As a baseline for statistical assessment we use the conservatism expected based on the solvent accessibility. The analysis is based on a new concept of "conservatism-of-conservatism". This approach allows us to identify the structural features that are stabilized in all proteins having a given fold, despite the fact that actual interactions that provide such stabilization may vary from protein to protein. Comparison with experimental data on thermodynamics, folding kinetics and function of the proteins reveals that such universally conserved clusters correspond to either: (i) super-sites (common location of active site in proteins having common tertiary structures but not function) or (ii) folding nuclei whose stability is an important determinant of folding rate, or both (in the case of Rossman fold). The analysis also helps to clarify the relation between folding and function that is apparent for some folds.},
-	Address = {Department of Chemistry and Chemical Biology, Harvard University, 12 Oxford Street, Cambridge, MA, 02138, USA.},
-	Au = {Mirny, LA and Shakhnovich, EI},
-	Author = {Mirny, L A and Shakhnovich, E I},
-	Ci = {Copyright 1999 Academic Press.},
-	Da = {19990907},
-	Date-Added = {2007-08-02 21:06:00 -0700},
-	Date-Modified = {2008-05-29 12:17:56 -0700},
-	Dcom = {19990907},
-	Edat = {1999/08/10},
-	Gr = {R01 GM52126/GM/NIGMS},
-	Group = {Alphabets; Printed; Reviewed; ROC},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology},
-	Keywords = {Amino Acid Sequence; Conserved Sequence; *Evolution, Molecular; Immunoglobulins/*chemistry; Kinetics; Models, Molecular; Models, Statistical; Molecular Sequence Data; Mutation; *Protein Folding; Sequence Homology, Amino Acid; Solvents/chemistry},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Mirny/1999.pdf},
-	Lr = {20061115},
-	Mhda = {1999/08/10 00:01},
-	Number = {1},
-	Own = {NLM},
-	Pages = {177--196},
-	Pii = {S0022-2836(99)92911-X},
-	Pl = {ENGLAND},
-	Pmid = {10438614},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Read = {Yes},
-	Rn = {0 (Immunoglobulins); 0 (Solvents)},
-	Sb = {IM; S},
-	So = {J Mol Biol. 1999 Aug 6;291(1):177-96.},
-	Stat = {MEDLINE},
-	Title = {Universally conserved positions in protein folds: reading evolutionary signals about stability, folding kinetics and function},
-	Volume = {291},
-	Year = {1999},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbL0IMTk5OS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACR5LMK9RtpQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABU1pcm55AAAQAAgAAMFxjUkAAAARAAgAAMK9qUoAAAABABgARmy9AEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpNaXJueToxOTk5LnBkZgAOABIACAAxADkAOQA5AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL01pcm55LzE5OTkucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTWlybnkvMTk5OS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1006/jmbi.1999.2911}}
-
-@article{Taylor:2007uq,
-	Abstract = {With the number of known protein folds potentially approaching completion, the problems associated with their systematic classification are evaluated. It is argued that it will be difficult, if not impossible, to find a general metric based on pairwise comparison that will provide a satisfactory classification. It is suggested that some progress may be made through comparison against a library of idealised `template' folds, but a proper solution can only be attained if this includes a model of the underlying evolutionary processes. These processes are considered with examples of some unexpected relationships among folds, including circular permutations. The problem is finally set in the wider context of the genetic environment, introducing complications relating to introns, gene fixation and population size.},
-	Author = {Taylor, William R},
-	Date-Added = {2007-08-02 21:04:48 -0700},
-	Date-Modified = {2007-08-02 21:04:54 -0700},
-	Journal = {Current Opinion in Structural Biology},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Taylor/2007.pdf},
-	Number = {3},
-	Pages = {354--361},
-	Title = {Evolutionary transitions in protein fold space},
-	Title1 = {Nucleic acids / Sequences and topology},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6VS6-4P0NC0V-2/2/ae082a076ab089c77217c91d4d521917},
-	Volume = {17},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbSsIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACeyQMLGL8xQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABlRBWUxPUgAQAAgAAMFxjUkAAAARAAgAAMLGkjwAAAABABgARm0rAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpUQVlMT1I6MjAwNy5wZGYAAA4AEgAIADIAMAAwADcALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvVEFZTE9SLzIwMDcucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL1RBWUxPUi8yMDA3LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6VS6-4P0NC0V-2/2/ae082a076ab089c77217c91d4d521917}}
-
-@article{Dill:2007qy,
-	Abstract = {The protein folding problem can be viewed as three different problems: defining the thermodynamic folding code; devising a good computational structure prediction algorithm; and answering Levinthal's question regarding the kinetic mechanism of how proteins can fold so quickly. Once regarded as a grand challenge, protein folding has seen much progress in recent years. Folding codes are now being used to successfully design proteins and non-biological foldable polymers; aided by the Critical Assessment of Techniques for Structure Prediction (CASP) competition, protein structure prediction has now become quite good. Even the once-challenging Levinthal puzzle now seems to have an answer -- a protein can avoid searching irrelevant conformations and fold quickly by making local independent decisions first, followed by non-local global decisions later.},
-	Author = {Dill, Ken A and Ozkan, S Banu and Weikl, Thomas R and Chodera, John D and Voelz, Vincent A},
-	Date-Added = {2007-08-02 21:02:58 -0700},
-	Date-Modified = {2007-08-02 21:03:10 -0700},
-	Journal = {Current Opinion in Structural Biology},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Dill/2007.pdf},
-	Number = {3},
-	Pages = {342--346},
-	Title = {The protein folding problem: when will it be solved?},
-	Title1 = {Nucleic acids / Sequences and topology},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6VS6-4NYSH72-3/2/995168b6b1836828871e4445093d2eb1},
-	Volume = {17},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbCgIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACeyB8LGL4NQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABERpbGwAEAAIAADBcY1JAAAAEQAIAADCxpHzAAAAAQAYAEZsKABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6RGlsbDoyMDA3LnBkZgAADgASAAgAMgAwADAANwAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9EaWxsLzIwMDcucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0RpbGwvMjAwNy5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6VS6-4NYSH72-3/2/995168b6b1836828871e4445093d2eb1}}
-
-@article{Alexander:2007fk,
-	Abstract = {To identify a simplified code for conformational switching, we have redesigned two natural proteins to have 88% sequence identity but different tertiary structures: a 3-alpha helix fold and an alpha/beta fold. We describe the design of these homologous heteromorphic proteins, their structural properties as determined by NMR, their conformational stabilities, and their affinities for their respective ligands: IgG and serum albumin. Each of these proteins is completely folded at 25 degrees C, is monomeric, and retains the native binding activity. The complete binding epitope for both ligands is encoded within each of the proteins. The IgG-binding epitope is functional only in the alpha/beta fold, and the albumin-binding epitope is functional only in the 3-alpha fold. These results demonstrate that two monomeric folds and two different functions can be encoded with only 12% of the amino acids in a protein (7 of 56). The fact that 49 aa in these proteins are compatible with both folds shows that the essential information determining a fold can be highly concentrated in a few amino acids and that a very limited subset of interactions in the protein can tip the balance from one monomer fold to another. This delicate balance helps explain why protein structure prediction is so challenging. Furthermore, because a few mutations can result in both new conformation and new function, the evolution of new folds driven by natural selection for alternative functions may be much more probable than previously recognized.},
-	Address = {Center for Advanced Research in Biotechnology, University of Maryland Biotechnology Institute, 9600 Gudelsky Drive, Rockville, MD 20850.},
-	Author = {Alexander, PA and He, Y and Chen, Y and Orban, J and Bryan, PN},
-	Da = {20070704},
-	Date-Added = {2007-08-02 21:01:31 -0700},
-	Date-Modified = {2007-08-02 21:01:42 -0700},
-	Dep = {20070703},
-	Doi = {10.1073/pnas.0700922104},
-	Edat = {2007/07/05 09:00},
-	Issn = {0027-8424 (Print)},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci U S A},
-	Language = {ENG},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Alexander/2007.pdf},
-	Mhda = {2007/07/05 09:00},
-	Own = {NLM},
-	Pii = {0700922104},
-	Pmid = {17609385},
-	Pst = {aheadofprint},
-	Pt = {JOURNAL ARTICLE},
-	Pubm = {Print-Electronic},
-	So = {Proc Natl Acad Sci U S A. 2007 Jul 3;.},
-	Stat = {Publisher},
-	Title = {The design and characterization of two proteins with 88{\%} sequence identity but different structure and function},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa9sIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACfQPsLGSj1QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACUFsZXhhbmRlcgAAEAAIAADBcY1JAAAAEQAIAADCxqytAAAAAQAYAEZr2wBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6QWxleGFuZGVyOjIwMDcucGRmAA4AEgAIADIAMAAwADcALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvQWxleGFuZGVyLzIwMDcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQWxleGFuZGVyLzIwMDcucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1073/pnas.0700922104}}
-
-@article{Meyerguz:2007lr,
-	Abstract = {Sequence-structure relationships in proteins are highly asymmetric because many sequences fold into relatively few structures. What is the number of sequences that fold into a particular protein structure? Is it possible to switch between stable protein folds by point mutations? To address these questions, we compute a directed graph of sequences and structures of proteins, which is based on 2,060 experimentally determined protein shapes from the Protein Data Bank. The directed graph is highly connected at native energies with "sinks" that attract many sequences from other folds. The sinks are rich in beta-sheets. The number of sequences that transition between folds is significantly smaller than the number of sequences retained by their fold. The sequence flow into a particular protein shape from other proteins correlates with the number of sequences that matches this shape in empirically determined genomes. Properties of strongly connected components of the graph are correlated with protein length and secondary structure.},
-	Address = {Department of Computer Science, Cornell University, Ithaca, NY 14853.},
-	Au = {Meyerguz, L and Kleinberg, J and Elber, R},
-	Author = {Meyerguz, Leonid and Kleinberg, Jon and Elber, Ron},
-	Da = {20070711},
-	Date-Added = {2007-08-02 20:59:12 -0700},
-	Date-Modified = {2007-08-02 20:59:32 -0700},
-	Dep = {20070627},
-	Doi = {10.1073/pnas.0701393104},
-	Edat = {2007/06/29 09:00},
-	Issn = {0027-8424 (Print)},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci U S A},
-	Jt = {Proceedings of the National Academy of Sciences of the United States of America},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Meyerguz/2007.pdf},
-	Mhda = {2007/06/29 09:00},
-	Number = {28},
-	Own = {NLM},
-	Pages = {11627--11632},
-	Phst = {2007/06/27 {$[$}aheadofprint{$]$}},
-	Pii = {0701393104},
-	Pl = {United States},
-	Pmid = {17596339},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print-Electronic},
-	Sb = {IM},
-	So = {Proc Natl Acad Sci U S A. 2007 Jul 10;104(28):11627-32. Epub 2007 Jun 27.},
-	Stat = {In-Data-Review},
-	Title = {The network of sequence flow between protein structures},
-	Volume = {104},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbLkIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACeyXsLGL/1QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACE1leWVyZ3V6ABAACAAAwXGNSQAAABEACAAAwsaSbQAAAAEAGABGbLkARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOk1leWVyZ3V6OjIwMDcucGRmAAAOABIACAAyADAAMAA3AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL01leWVyZ3V6LzIwMDcucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL01leWVyZ3V6LzIwMDcucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1073/pnas.0701393104}}
-
-@article{Chen:2007lr,
-	Abstract = {One proposed strategy for controlling the transmission of insect-borne pathogens uses a drive mechanism to ensure the rapid spread of transgenes conferring disease refractoriness throughout wild populations. Here, we report the creation of maternal-effect selfish genetic elements in Drosophila that drive population replacement and are resistant to recombination-mediated dissociation of drive and disease refractoriness functions. These selfish elements use microRNA-mediated silencing of a maternally expressed gene essential for embryogenesis, which is coupled with early zygotic expression of a rescuing transgene.},
-	Address = {Division of Biology, Mail Code 156-29, California Institute of Technology, Pasadena, CA 91125, USA.},
-	Annote = {This is Jessica's Science paper.},
-	Au = {Chen, CH and Huang, H and Ward, CM and Su, JT and Schaeffer, LV and Guo, M and Hay, BA},
-	Author = {Chen, Chun-Hong and Huang, Haixia and Ward, Catherine M and Su, Jessica T and Schaeffer, Lorian V and Guo, Ming and Hay, Bruce A},
-	Da = {20070427},
-	Date-Added = {2007-08-01 13:48:21 -0700},
-	Date-Modified = {2007-08-01 13:48:33 -0700},
-	Dcom = {20070515},
-	Dep = {20070329},
-	Doi = {10.1126/science. 1138595},
-	Edat = {2007/03/31 09:00},
-	Gr = {GM057422/GM/NIGMS; GM70956/GM/NIGMS; NS042580/NS/NINDS; NS048396/NS/NINDS},
-	Issn = {1095-9203 (Electronic)},
-	Jid = {0404511},
-	Journal = {Science},
-	Jt = {Science (New York, N.Y.)},
-	Keywords = {Adaptor Proteins, Signal Transducing/*genetics/physiology; Animals; Antigens, Differentiation/*genetics/physiology; Crosses, Genetic; DNA Transposable Elements; Drosophila/embryology/*genetics/*physiology; Drosophila Proteins/*genetics/physiology; Embryonic Development; Female; Gene Expression; *Genes, Insect; *Genetic Engineering; Heterozygote; Homozygote; Male; MicroRNAs/genetics; Molecular Sequence Data; *RNA Interference; Receptors, Immunologic/*genetics/physiology; Recombination, Genetic; *Repetitive Sequences, Nucleic Acid; Transgenes; Zygote/physiology},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Chen/2007.pdf},
-	Mhda = {2007/05/16 09:00},
-	Number = {5824},
-	Own = {NLM},
-	Pages = {597--600},
-	Phst = {2007/03/29 {$[$}aheadofprint{$]$}},
-	Pii = {1138595},
-	Pl = {United States},
-	Pmid = {17395794},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, N.I.H., Extramural; Research Support, Non-U.S. Gov't},
-	Pubm = {Print-Electronic},
-	Rn = {0 (Adaptor Proteins, Signal Transducing); 0 (Antigens, Differentiation); 0 (DNA Transposable Elements); 0 (Drosophila Proteins); 0 (MicroRNAs); 0 (Myd88 protein, Drosophila); 0 (Receptors, Immunologic)},
-	Sb = {IM},
-	Si = {GENBANK/EF447105; GENBANK/EF447106},
-	So = {Science. 2007 Apr 27;316(5824):597-600. Epub 2007 Mar 29.},
-	Stat = {MEDLINE},
-	Title = {A synthetic maternal-effect selfish genetic element drives population replacement in Drosophila},
-	Volume = {316},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbA0IMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC4LGsLWOjBQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABENoZW4AEAAIAADBcY1JAAAAEQAIAADC1pygAAAAAQAYAEZsDQBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6Q2hlbjoyMDA3LnBkZgAADgASAAgAMgAwADAANwAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9DaGVuLzIwMDcucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0NoZW4vMjAwNy5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1126/science.%201138595}}
-
-@article{Price:2005yq,
-	Author = {Price, G. A. and Crooks, G. E. and Green, R. E. and Brenner, S. E.},
-	Date-Added = {2007-07-25 14:17:08 -0700},
-	Date-Modified = {2007-07-25 14:18:15 -0700},
-	Doi = {10.1093/bioinformatics/bti735},
-	Eprint = {http://bioinformatics.oxfordjournals.org/cgi/reprint/21/23/4318.pdf},
-	Group = {Statistics},
-	Journal = {Bioinformatics},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Price/2005a.pdf},
-	Number = {23},
-	Pages = {4318-},
-	Title = {{Erratum}},
-	Url = {http://bioinformatics.oxfordjournals.org},
-	Volume = {21},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbN8JMjAwNWEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACq/H8LNC8dQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVByaWNlAAAQAAgAAMFxjUkAAAARAAgAAMLNbjcAAAABABgARmzfAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpQcmljZToyMDA1YS5wZGYAAA4AFAAJADIAMAAwADUAYQAuAHAAZABmAA8ACAADAGgAcwByABIAPVVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9QcmljZS8yMDA1YS5wZGYAABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAkLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUHJpY2UvMjAwNWEucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJNAlICWwJmAmoCeAJ/AogCrwK0ArcAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACxA==},
-	Bdsk-Url-1 = {http://bioinformatics.oxfordjournals.org},
-	Bdsk-Url-2 = {http://dx.doi.org/10.1093/bioinformatics/bti735}}
-
-@article{Hulsen:2006fk,
-	Af = {Hulsen, Tim de Vlieg, Jacob Leunissen, Jack A. M. Groenen, Peter M. A.},
-	Author = {Hulsen, T. and de Vlieg, J. and Leunissen, J. A. M. and Groenen, P. M. A.},
-	Date = {OCT 12},
-	Date-Added = {2007-07-25 14:12:54 -0700},
-	Date-Modified = {2007-07-25 14:15:36 -0700},
-	Di = {ARTN 444},
-	Group = {Statistics},
-	Isi = {ISI:000241469400001},
-	Issn = {1471-2105},
-	Journal = {BMC BIOINFORMATICS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Hulsen/2006.pdf},
-	Month = {Oct},
-	Pages = {444},
-	Publication-Type = {J},
-	Title = {Testing statistical significance scores of sequence comparison methods with structure similarity},
-	Volume = {7},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbGwIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACq9v8LNC3lQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkh1bHNlbgAQAAgAAMFxjUkAAAARAAgAAMLNbekAAAABABgARmxsAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpIdWxzZW46MjAwNi5wZGYAAA4AEgAIADIAMAAwADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvSHVsc2VuLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0h1bHNlbi8yMDA2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{RUBIN:1981uq,
-	Author = {RUBIN, D. B.},
-	Date-Added = {2007-07-25 14:12:54 -0700},
-	Date-Modified = {2007-07-25 14:19:37 -0700},
-	Group = {Statistics},
-	Isi = {ISI:A1981LA53000011},
-	Issn = {0090-5364},
-	Journal = {ANNALS OF STATISTICS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/RUBIN/1981.pdf},
-	Number = {1},
-	Pages = {130--134},
-	Publication-Type = {J},
-	Title = {THE BAYESIAN BOOTSTRAP},
-	Volume = {9},
-	Year = {1981},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbPcIMTk4MS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACrBJMLNDG5QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVJVQklOAAAQAAgAAMFxjUkAAAARAAgAAMLNbt4AAAABABgARmz3AEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpSVUJJTjoxOTgxLnBkZgAOABIACAAxADkAOAAxAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1JVQklOLzE5ODEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUlVCSU4vMTk4MS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{EFRON:1979fj,
-	Author = {EFRON, B.},
-	Date-Added = {2007-07-25 14:12:54 -0700},
-	Date-Modified = {2007-07-25 14:22:42 -0700},
-	Group = {Statistics},
-	Isi = {ISI:A1979GL00300001},
-	Issn = {0090-5364},
-	Journal = {ANNALS OF STATISTICS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/EFRON/1979.pdf},
-	Number = {1},
-	Pages = {1--26},
-	Publication-Type = {J},
-	Title = {1977 RIETZ LECTURE - BOOTSTRAP METHODS - ANOTHER LOOK AT THE JACKKNIFE},
-	Volume = {7},
-	Year = {1979},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbDAIMTk3OS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACrCGcLNDR1QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUVGUk9OAAAQAAgAAMFxjUkAAAARAAgAAMLNb40AAAABABgARmwwAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpFRlJPTjoxOTc5LnBkZgAOABIACAAxADkANwA5AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0VGUk9OLzE5NzkucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRUZST04vMTk3OS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Wheeler:2007lr,
-	Abstract = {MOTIVATION: Multiple sequence alignment is a fundamental task in bioinformatics. Current tools typically form an initial alignment by merging subalignments, and then polish this alignment by repeated splitting and merging of subalignments to obtain an improved final alignment. In general this form-and-polish strategy consists of several stages, and a profusion of methods have been tried at every stage. We carefully investigate: (1) how to utilize a new algorithm for aligning alignments that optimally solves the common subproblem of merging subalignments, and (2) what is the best choice of method for each stage to obtain the highest quality alignment. RESULTS: We study six stages in the form-and-polish strategy for multiple alignment: parameter choice, distance estimation, merge-tree construction, sequence-pair weighting, alignment merging, and polishing. For each stage, we consider novel approaches as well as standard ones. Interestingly, the greatest gains in alignment quality come from (i) estimating distances by a new approach using normalized alignment costs, and (ii) polishing by a new approach using 3-cuts. Experiments with a parameter-value oracle suggest large gains in quality may be possible through an input-dependent choice of alignment parameters, and we present a promising approach for building such an oracle. Combining the best approaches to each stage yields a new tool we call Opal that on benchmark alignments matches the quality of the top tools, without employing alignment consistency or hydrophobic gap penalties. AVAILABILITY: Opal, a multiple alignment tool that implements the best methods in our study, is freely available at http://opal.cs.arizona.edu CONTACT: twheeler@cs.arizona.edu.},
-	Address = {Department of Computer Science, The University of Arizona, Tucson AZ 85721, USA.},
-	Annote = {may be useful in Jessica's project},
-	Au = {Wheeler, TJ and Kececioglu, JD},
-	Author = {Wheeler, Travis J and Kececioglu, John D},
-	Da = {20070724},
-	Date-Added = {2007-07-25 09:54:48 -0700},
-	Date-Modified = {2007-07-25 09:55:09 -0700},
-	Doi = {10.1093/bioinformatics/btm226},
-	Edat = {2007/07/25 09:00},
-	Issn = {1460-2059 (Electronic)},
-	Jid = {9808944},
-	Journal = {Bioinformatics},
-	Jt = {Bioinformatics (Oxford, England)},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wheeler/2007.pdf},
-	Mhda = {2007/07/25 09:00},
-	Number = {13},
-	Own = {NLM},
-	Pages = {i559-i568},
-	Pii = {23/13/i559},
-	Pl = {England},
-	Pmid = {17646343},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	So = {Bioinformatics. 2007 Jul 1;23(13):i559-i568.},
-	Stat = {In-Data-Review},
-	Title = {Multiple alignment by aligning alignments},
-	Volume = {23},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbUcIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAClA/8LMzghQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1doZWVsZXIAABAACAAAwXGNSQAAABEACAAAws0weAAAAAEAGABGbUcARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOldoZWVsZXI6MjAwNy5wZGYADgASAAgAMgAwADAANwAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9XaGVlbGVyLzIwMDcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvV2hlZWxlci8yMDA3LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/btm226}}
-
-@article{Handel:1993qy,
-	Abstract = {The peptide alpha 4 is a designed four-helix bundle that contains a highly simplified hydrophobic core composed exclusively of leucine residues; its tertiary structure is therefore largely dictated by hydrophobic forces. This small protein adopts a structure with properties intermediate between those of the native and molten globule states of proteins: it is compact, globular, and has very stable helices, but its apolar side chains are mobile and not as well packed as in many natural proteins. To induce a more native-like state, two Zn(2+)-binding sites were introduced into the protein, thereby replacing some of the non-specific hydrophobic interactions with more geometrically restrictive metal-ligand interactions. In the metal-bound state, this protein has properties that approach those of native proteins. Thus, hydrophobic interactions alone are sufficient to drive polypeptide chain folding nearly to completion, but specific interactions are required for a unique structure.},
-	Address = {Du Pont Merck Pharmaceutical Company, Wilmington, DE 19880-0328.},
-	Au = {Handel, TM and Williams, SA and DeGrado, WF},
-	Author = {Handel, T M and Williams, S A and DeGrado, W F},
-	Da = {19930908},
-	Date-Added = {2007-07-24 14:55:01 -0700},
-	Date-Modified = {2007-08-01 09:16:00 -0700},
-	Dcom = {19930908},
-	Edat = {1993/08/13},
-	Group = {LitSearch; Reviewed},
-	Issn = {0036-8075 (Print)},
-	Jid = {0404511},
-	Journal = {Science},
-	Jt = {Science (New York, N.Y.)},
-	Keywords = {Amino Acid Sequence; Anilino Naphthalenesulfonates/metabolism; Binding Sites; Histidine/chemistry/metabolism; Magnetic Resonance Spectroscopy; Molecular Sequence Data; *Protein Conformation; Protein Folding; Protein Structure, Secondary; Proteins/chemical synthesis/*chemistry/metabolism; Thermodynamics; Zinc/*chemistry},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Handel/1993.pdf},
-	Lr = {20070319},
-	Mhda = {1993/08/13 00:01},
-	Number = {5123},
-	Own = {NLM},
-	Pages = {879--885},
-	Pl = {UNITED STATES},
-	Pmid = {8346440},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, Non-P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Anilino Naphthalenesulfonates); 0 (Proteins); 71-00-1 (Histidine); 7440-66-6 (Zinc); 82-76-8 (1-anilino-8-naphthalenesulfonate)},
-	Sb = {IM},
-	So = {Science. 1993 Aug 13;261(5123):879-85.},
-	Stat = {MEDLINE},
-	Title = {Metal ion-dependent modulation of the dynamics of a designed protein},
-	Volume = {261},
-	Year = {1993},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbFkIMTk5My5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACkbq8LLxB9QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkhhbmRlbAAQAAgAAMFxjUkAAAARAAgAAMLMJo8AAAABABgARmxZAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpIYW5kZWw6MTk5My5wZGYAAA4AEgAIADEAOQA5ADMALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvSGFuZGVsLzE5OTMucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0hhbmRlbC8xOTkzLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{SHANG:1994uq,
-	Author = {SHANG, Z. G. and ISAAC, V. E. and LI, H. C. and PATEL, L. and CATRON, K. M. and CURRAN, T. and MONTELIONE, G. T. and ABATE, C.},
-	Date = {AUG 30},
-	Date-Added = {2007-07-24 14:55:01 -0700},
-	Date-Modified = {2007-08-01 09:34:16 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1994PE38800014},
-	Issn = {0027-8424},
-	Journal = {PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES OF THE UNITED STATES OF AMERICA},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/SHANG/1994.pdf},
-	Month = {Aug},
-	Number = {18},
-	Pages = {8373--8377},
-	Publication-Type = {J},
-	Title = {DESIGN OF A MINIMAL HOMEODOMAIN - THE N-TERMINAL ARM MODULATES DNA-BINDING AFFINITY AND STABILIZES HOMEODOMAIN STRUCTURE},
-	Volume = {91},
-	Year = {1994},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbQkIMTk5NC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACkbP8LLw15QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVNIQU5HAAAQAAgAAMFxjUkAAAARAAgAAMLMJc4AAAABABgARm0JAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpTSEFORzoxOTk0LnBkZgAOABIACAAxADkAOQA0AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1NIQU5HLzE5OTQucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvU0hBTkcvMTk5NC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{HEINZ:1992fj,
-	Author = {HEINZ, D. W. and BAASE, W. A. and MATTHEWS, B. W.},
-	Date = {MAY 1},
-	Date-Added = {2007-07-24 14:55:01 -0700},
-	Date-Modified = {2007-08-01 09:24:27 -0700},
-	Group = {LitSearch; Printed; Reviewed},
-	Isi = {ISI:A1992HR85300017},
-	Issn = {0027-8424},
-	Journal = {PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES OF THE UNITED STATES OF AMERICA},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/HEINZ/1992.pdf},
-	Month = {May},
-	Number = {9},
-	Pages = {3751--3755},
-	Publication-Type = {J},
-	Title = {FOLDING AND FUNCTION OF A T4 LYSOZYME CONTAINING 10 CONSECUTIVE ALANINES ILLUSTRATE THE REDUNDANCY OF INFORMATION IN AN AMINO-ACID-SEQUENCE},
-	Volume = {89},
-	Year = {1992},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbGEIMTk5Mi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACkbW8LLw35QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUhFSU5aAAAQAAgAAMFxjUkAAAARAAgAAMLMJe4AAAABABgARmxhAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpIRUlOWjoxOTkyLnBkZgAOABIACAAxADkAOQAyAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0hFSU5aLzE5OTIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvSEVJTlovMTk5Mi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{REGAN:1988kx,
-	Author = {REGAN, L. and DEGRADO, W. F.},
-	Date = {AUG 19},
-	Date-Added = {2007-07-24 14:55:01 -0700},
-	Date-Modified = {2007-08-01 09:29:36 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1988P706500038},
-	Issn = {0036-8075},
-	Journal = {SCIENCE},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/REGAN/1988.pdf},
-	Month = {Aug},
-	Number = {4868},
-	Pages = {976--978},
-	Publication-Type = {J},
-	Title = {CHARACTERIZATION OF A HELICAL PROTEIN DESIGNED FROM 1ST PRINCIPLES},
-	Volume = {241},
-	Year = {1988},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbOwIMTk4OC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACkbe8LLw8JQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVJFR0FOAAAQAAgAAMFxjUkAAAARAAgAAMLMJjIAAAABABgARmzsAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpSRUdBTjoxOTg4LnBkZgAOABIACAAxADkAOAA4AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1JFR0FOLzE5ODgucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUkVHQU4vMTk4OC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{SORM:1962fk,
-	Annote = {Page 190 has the Sorm simplified alphabet.
-No ROC.},
-	Author = {SORM, F. and KEIL, B.},
-	Date-Added = {2007-07-24 14:40:00 -0700},
-	Date-Modified = {2007-08-03 11:33:00 -0700},
-	Group = {LitSearch; IBID; Alphabets; Reviewed},
-	Isi = {ISI:A1962WF06600003},
-	Issn = {0065-3233},
-	Journal = {ADVANCES IN PROTEIN CHEMISTRY},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/SORM/1962.pdf},
-	Pages = {167--207},
-	Publication-Type = {J},
-	Title = {REGULARITIES IN THE PRIMARY STRUCTURE OF PROTEINS},
-	Volume = {17},
-	Year = {1962},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbRsIMTk2Mi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACkcocLLx6dQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFNPUk0AEAAIAADBcY1JAAAAEQAIAADCzCoXAAAAAQAYAEZtGwBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6U09STToxOTYyLnBkZgAADgASAAgAMQA5ADYAMgAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9TT1JNLzE5NjIucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1NPUk0vMTk2Mi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Notredame:2002lr,
-	Abstract = {The assembly of a multiple sequence alignment (MSA) has become one of the most common tasks when dealing with sequence analysis. Unfortunately, the wide range of available methods and the differences in the results given by these methods makes it hard for a non-specialist to decide which program is best suited for a given purpose. In this review we briefly describe existing techniques and expose the potential strengths and weaknesses of the most widely used multiple alignment packages.},
-	Address = {Information Genetique et Structurale, UMR 1889, 31 Chemin Joseph Aiguier, 13 006 Marseille, France. cedric.notredame@igs.cnrs-mrs.fr},
-	Au = {Notredame, C},
-	Author = {Notredame, Cedric},
-	Da = {20020422},
-	Date-Added = {2007-07-24 11:44:07 -0700},
-	Date-Modified = {2007-07-24 11:44:16 -0700},
-	Dcom = {20021104},
-	Doi = {10.1517/14622416.3.1.131},
-	Edat = {2002/04/23 10:00},
-	Issn = {1462-2416 (Print)},
-	Jid = {100897350},
-	Journal = {Pharmacogenomics},
-	Jt = {Pharmacogenomics},
-	Keywords = {Algorithms; Phylogeny; *Protein Conformation; Sequence Alignment/*methods},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Notredame/2002.pdf},
-	Lr = {20051116},
-	Mhda = {2002/11/26 04:00},
-	Number = {1},
-	Own = {NLM},
-	Pages = {131--144},
-	Pii = {PGS030117},
-	Pl = {England},
-	Pmid = {11966409},
-	Pst = {ppublish},
-	Pt = {Journal Article; Review},
-	Pubm = {Print},
-	Rf = {84},
-	Sb = {IM},
-	So = {Pharmacogenomics. 2002 Jan;3(1):131-44.},
-	Stat = {MEDLINE},
-	Title = {Recent progress in multiple sequence alignment: a survey},
-	Volume = {3},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbMsIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACkO4cLLlKFQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACU5vdHJlZGFtZQAAEAAIAADBcY1JAAAAEQAIAADCy/cRAAAAAQAYAEZsywBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6Tm90cmVkYW1lOjIwMDIucGRmAA4AEgAIADIAMAAwADIALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTm90cmVkYW1lLzIwMDIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTm90cmVkYW1lLzIwMDIucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1517/14622416.3.1.131}}
-
-@article{Li:2007ys,
-	Af = {Li Jing Wang Wei},
-	Author = {Li, J. and Wang, W.},
-	Date = {JUN},
-	Date-Added = {2007-07-23 17:05:40 -0700},
-	Date-Modified = {2007-08-17 13:43:12 -0700},
-	Group = {LitSearch; IBID},
-	Isi = {ISI:000247829500015},
-	Issn = {1006-9305},
-	Journal = {SCIENCE IN CHINA SERIES C-LIFE SCIENCES},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Li/2007.pdf},
-	Month = {Jun},
-	Number = {3},
-	Pages = {392--402},
-	Publication-Type = {J},
-	Title = {Grouping of amino acids and recognition of protein structurally conserved regions by reduced alphabets of amino acids},
-	Volume = {50},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGCAAAAAAGCAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbKAIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADFV78LrI3pQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAAkxpABAACAAAwXGNSQAAABEACAAAwuuF6gAAAAEAGABGbKAARmvPAEZq1QBGahsARmRoAECJQwACAD1oc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkxpOjIwMDcucGRmAAAOABIACAAyADAAMAA3AC4AcABkAGYADwAIAAMAaABzAHIAEgA5VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0xpLzIwMDcucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIC4uLy4uLy4uLy4uL0FydGljbGVzL0xpLzIwMDcucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQI/AkQCTQJYAlwCagJxAnoCnQKiAqUAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACsg==}}
-
-@article{Price:2005fr,
-	Author = {Price, G A and Crooks, G E and Green, R E and Brenner, S E},
-	Date = {OCT 15},
-	Date-Added = {2007-07-23 17:05:40 -0700},
-	Date-Modified = {2008-05-28 22:29:34 -0700},
-	Group = {LitSearch; Printed; Reviewed; Statistics; Forward; Backward},
-	Isi = {ISI:000232596300003},
-	Journal = {Bioinformatics},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Price/2005.pdf},
-	Month = {Oct},
-	Number = {20},
-	Pages = {3824--3831},
-	Publication-Type = {J},
-	Read = {Yes},
-	Title = {Statistical evaluation of pairwise protein sequence comparison with the {Bayesian} bootstrap},
-	Volume = {21},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbN8IMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACjz6cLKkTFQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVByaWNlAAAQAAgAAMFxjUkAAAARAAgAAMLK86EAAAABABgARmzfAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpQcmljZToyMDA1LnBkZgAOABIACAAyADAAMAA1AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1ByaWNlLzIwMDUucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUHJpY2UvMjAwNS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Sierk:2004zr,
-	Author = {Sierk, M. L. and Pearson, W. R.},
-	Date = {MAR},
-	Date-Added = {2007-07-23 17:05:40 -0700},
-	Date-Modified = {2007-07-24 13:49:29 -0700},
-	Group = {LitSearch; Printed; Reviewed},
-	Isi = {ISI:000189171200022},
-	Issn = {0961-8368},
-	Journal = {PROTEIN SCIENCE},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Sierk/2004.pdf},
-	Month = {Mar},
-	Number = {3},
-	Pages = {773--785},
-	Publication-Type = {J},
-	Title = {Sensitivity and selectivity in protein structure comparison},
-	Volume = {13},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbRMIMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACj0AsLKkVpQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVNpZXJrAAAQAAgAAMFxjUkAAAARAAgAAMLK88oAAAABABgARm0TAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpTaWVyazoyMDA0LnBkZgAOABIACAAyADAAMAA0AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1NpZXJrLzIwMDQucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvU2llcmsvMjAwNC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Raval:2002mz,
-	Author = {Raval, A. and Ghahramani, Z. and Wild, D. L.},
-	Date = {JUN},
-	Date-Added = {2007-07-23 17:05:40 -0700},
-	Date-Modified = {2007-07-24 13:38:43 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:000176553400002},
-	Issn = {1367-4803},
-	Journal = {BIOINFORMATICS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Raval/2002.pdf},
-	Month = {Jun},
-	Number = {6},
-	Pages = {788--801},
-	Publication-Type = {J},
-	Title = {A Bayesian network model for protein fold and remote homologue recognition},
-	Volume = {18},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbOcIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACj0HsLKkX1QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVJhdmFsAAAQAAgAAMFxjUkAAAARAAgAAMLK8+0AAAABABgARmznAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpSYXZhbDoyMDAyLnBkZgAOABIACAAyADAAMAAyAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1JhdmFsLzIwMDIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUmF2YWwvMjAwMi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Russell:2002ly,
-	Author = {Russell, R. B.},
-	Date = {JAN},
-	Date-Added = {2007-07-23 17:05:40 -0700},
-	Date-Modified = {2007-07-24 13:39:47 -0700},
-	Group = {LitSearch; Printed; Reviewed},
-	Isi = {ISI:000173578800002},
-	Issn = {1073-6085},
-	Journal = {MOLECULAR BIOTECHNOLOGY},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Russell/2002.pdf},
-	Month = {Jan},
-	Number = {1},
-	Pages = {17--28},
-	Publication-Type = {J},
-	Title = {Classification of protein folds},
-	Volume = {20},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbPgIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACj0OsLKkbtQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1J1c3NlbGwAABAACAAAwXGNSQAAABEACAAAwsr0KwAAAAEAGABGbPgARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlJ1c3NlbGw6MjAwMi5wZGYADgASAAgAMgAwADAAMgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9SdXNzZWxsLzIwMDIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUnVzc2VsbC8yMDAyLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Ramenskii:2000gf,
-	Annote = {This article is in Russian.},
-	Author = {Ramenskii, V. E. and Vlasov, P. K. and Syunyaev, S. R. and Tumanyan, V. G.},
-	Date = {MAR-APR},
-	Date-Added = {2007-07-23 17:05:40 -0700},
-	Date-Modified = {2007-08-01 09:27:16 -0700},
-	Group = {LitSearch; IBID; Reviewed},
-	Isi = {ISI:000086738000003},
-	Issn = {0006-3029},
-	Journal = {BIOFIZIKA},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Ramenskii/2000.pdf},
-	Month = {Mar-Apr},
-	Number = {2},
-	Pages = {220--227},
-	Publication-Type = {J},
-	Title = {How do point amino acid substitution affect the protein structure?},
-	Volume = {45},
-	Year = {2000},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbOYIMjAwMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC1ulMLTU8VQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACVJhbWVuc2tpaQAAEAAIAADBcY1JAAAAEQAIAADC07Y1AAAAAQAYAEZs5gBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6UmFtZW5za2lpOjIwMDAucGRmAA4AEgAIADIAMAAwADAALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvUmFtZW5za2lpLzIwMDAucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUmFtZW5za2lpLzIwMDAucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@article{Shpaer:1996ve,
-	Author = {Shpaer, E. G. and Robinson, M. and Yee, D. and Candlin, J. D. and Mines, R. and Hunkapiller, T.},
-	Date = {DEC 1},
-	Date-Added = {2007-07-23 17:05:40 -0700},
-	Date-Modified = {2007-07-24 13:45:35 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1996VY32000010},
-	Issn = {0888-7543},
-	Journal = {GENOMICS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Shpaer/1996.pdf},
-	Month = {Dec},
-	Number = {2},
-	Pages = {179--191},
-	Publication-Type = {J},
-	Title = {Sensitivity and selectivity in protein similarity searches: A comparison of Smith-Waterman in hardware to BLAST and FASTA},
-	Volume = {38},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbRIIMTk5Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACj0gsLKkmZQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABlNocGFlcgAQAAgAAMFxjUkAAAARAAgAAMLK9NYAAAABABgARm0SAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpTaHBhZXI6MTk5Ni5wZGYAAA4AEgAIADEAOQA5ADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvU2hwYWVyLzE5OTYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL1NocGFlci8xOTk2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Wang:1998fj,
-	Annote = {Fig. 1 has the aa partitions.  Yet another way to cluster aa's from a matrix.
-No ROC.},
-	Author = {Wang, H. C. and Dopazo, J. and Carazo, J. M.},
-	Date-Added = {2007-07-23 14:22:55 -0700},
-	Date-Modified = {2007-08-03 11:35:43 -0700},
-	Group = {LitSearch; Alphabets; Reviewed; Forward; Backward},
-	Isi = {ISI:000074557000012},
-	Issn = {1367-4803},
-	Journal = {BIOINFORMATICS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wang/1998a.pdf},
-	Number = {4},
-	Pages = {376--377},
-	Publication-Type = {J},
-	Title = {Self-organizing tree growing network for classifying amino acids},
-	Volume = {14},
-	Year = {1998},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbUAJMTk5OGEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACjp7sLKamhQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFdhbmcAEAAIAADBcY1JAAAAEQAIAADCyszYAAAAAQAYAEZtQABGa88ARmrVAEZqGwBGZGgAQIlDAAIAQGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6V2FuZzoxOTk4YS5wZGYADgAUAAkAMQA5ADkAOABhAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1dhbmcvMTk5OGEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvV2FuZy8xOTk4YS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Russell:1997kx,
-	Annote = {Figure 3 has the aa partitions.
-Another way to do clustering on various matrices.
-No ROC.},
-	Author = {Russell, R. B. and Saqi, M. A. S. and Sayle, R. A. and Bates, P. A. and Sternberg, M. J. E.},
-	Date = {JUN 13},
-	Date-Added = {2007-07-23 14:22:55 -0700},
-	Date-Modified = {2007-08-03 11:36:21 -0700},
-	Group = {LitSearch; Alphabets; Reviewed; Forward; Backward},
-	Isi = {ISI:A1997XE22800010},
-	Issn = {0022-2836},
-	Journal = {JOURNAL OF MOLECULAR BIOLOGY},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Russell/1997.pdf},
-	Month = {Jun},
-	Number = {3},
-	Pages = {423--439},
-	Publication-Type = {J},
-	Title = {Recognition of analogous and homologous protein folds: Analysis of sequence and structure conservation},
-	Volume = {269},
-	Year = {1997},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbPgIMTk5Ny5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACjqBMLKapVQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1J1c3NlbGwAABAACAAAwXGNSQAAABEACAAAwsrNBQAAAAEAGABGbPgARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlJ1c3NlbGw6MTk5Ny5wZGYADgASAAgAMQA5ADkANwAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9SdXNzZWxsLzE5OTcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUnVzc2VsbC8xOTk3LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{MOCZ:1995vn,
-	Annote = {No clear partition is made--hey, it's a fuzzy analysis so what do you expect.},
-	Author = {MOCZ, G.},
-	Date = {JUN},
-	Date-Added = {2007-07-23 14:22:55 -0700},
-	Date-Modified = {2007-07-23 14:47:24 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1995RF13500016},
-	Issn = {0961-8368},
-	Journal = {PROTEIN SCIENCE},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/MOCZ/1995.pdf},
-	Month = {Jun},
-	Number = {6},
-	Pages = {1178--1187},
-	Publication-Type = {J},
-	Title = {FUZZY CLUSTER-ANALYSIS OF SIMPLE PHYSICOCHEMICAL PROPERTIES OF AMINO-ACIDS FOR RECOGNIZING SECONDARY STRUCTURE IN PROTEINS},
-	Volume = {4},
-	Year = {1995},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbMAIMTk5NS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACjqRcLKauRQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABE1PQ1oAEAAIAADBcY1JAAAAEQAIAADCys1UAAAAAQAYAEZswABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6TU9DWjoxOTk1LnBkZgAADgASAAgAMQA5ADkANQAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9NT0NaLzE5OTUucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL01PQ1ovMTk5NS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{LIVINGSTONE:1993rt,
-	Author = {LIVINGSTONE, C. D. and BARTON, G. J.},
-	Date = {DEC},
-	Date-Added = {2007-07-23 14:22:55 -0700},
-	Date-Modified = {2007-07-24 13:34:47 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1993MP47100018},
-	Issn = {0266-7061},
-	Journal = {COMPUTER APPLICATIONS IN THE BIOSCIENCES},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/LIVINGSTONE/1993.pdf},
-	Month = {Dec},
-	Number = {6},
-	Pages = {745--756},
-	Publication-Type = {J},
-	Title = {PROTEIN-SEQUENCE ALIGNMENTS - A STRATEGY FOR THE HIERARCHICAL ANALYSIS OF RESIDUE CONSERVATION},
-	Volume = {9},
-	Year = {1993},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGcAAAAAAGcAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbKcIMTk5My5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACj0sMLKkrZQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAC0xJVklOR1NUT05FAAAQAAgAAMFxjUkAAAARAAgAAMLK9SYAAAABABgARmynAEZrzwBGatUARmobAEZkaABAiUMAAgBGaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpMSVZJTkdTVE9ORToxOTkzLnBkZgAOABIACAAxADkAOQAzAC4AcABkAGYADwAIAAMAaABzAHIAEgBCVXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0xJVklOR1NUT05FLzE5OTMucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxApLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTElWSU5HU1RPTkUvMTk5My5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AlkCXgJnAnICdgKEAosClALAAsUCyAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALV}}
-
-@article{Yampolsky:2005lr,
-	Author = {Yampolsky, L. Y. and Stoltzfus, A.},
-	Date = {AUG},
-	Date-Added = {2007-07-23 10:49:15 -0700},
-	Date-Modified = {2007-07-23 13:40:37 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:000232033300003},
-	Issn = {0016-6731},
-	Journal = {GENETICS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Yampolsky/2005.pdf},
-	Month = {Aug},
-	Number = {4},
-	Pages = {1459--1472},
-	Publication-Type = {J},
-	Title = {The exchangeability of amino acids in proteins},
-	Volume = {170},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbVMIMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACjfHcLKOFlQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACVlhbXBvbHNreQAAEAAIAADBcY1JAAAAEQAIAADCyprJAAAAAQAYAEZtUwBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6WWFtcG9sc2t5OjIwMDUucGRmAA4AEgAIADIAMAAwADUALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvWWFtcG9sc2t5LzIwMDUucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvWWFtcG9sc2t5LzIwMDUucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@inproceedings{Weiss:2003fk,
-	Annote = {This is really another hydrophobicity scale.  No partitioning is made.},
-	Author = {Weiss, O. and Ziehe, A. and Herzel, H.},
-	Booktitle = {LECTURE NOTES IN COMPUTER SCIENCE},
-	Date-Added = {2007-07-23 10:49:15 -0700},
-	Date-Modified = {2007-07-23 13:46:55 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:000185378100030},
-	Issn = {0302-9743},
-	Journal = {ARTIFICIAL NEURAL NETWORKS AND NEURAL INFORMATION PROCESSING - ICAN/ICONIP 2003},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Weiss/2003.pdf},
-	Pages = {245--252},
-	Publication-Type = {S},
-	Title = {Optimizing property codes in protein data reveals structural characteristics},
-	Volume = {2714},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbUUIMjAwMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACjfb8LKOTBQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVdlaXNzAAAQAAgAAMFxjUkAAAARAAgAAMLKm6AAAAABABgARm1FAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpXZWlzczoyMDAzLnBkZgAOABIACAAyADAAMAAzAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1dlaXNzLzIwMDMucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvV2Vpc3MvMjAwMy5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{May:1999qy,
-	Annote = {Studied a lot of different aa partitions but doesn't actually write one down.  No comparison of which is "best".  This paper is more concerned with classifying substitution matrices.},
-	Author = {May, A. C. W.},
-	Date = {SEP},
-	Date-Added = {2007-07-23 10:49:15 -0700},
-	Date-Modified = {2007-07-23 14:13:02 -0700},
-	Group = {LitSearch; Scoring Matrices; Reviewed},
-	Isi = {ISI:000083167700001},
-	Issn = {0269-2139},
-	Journal = {PROTEIN ENGINEERING},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/May/1999.pdf},
-	Month = {Sep},
-	Number = {9},
-	Pages = {707--712},
-	Publication-Type = {J},
-	Title = {Towards more meaningful hierarchical classification of amino acid scoring matrices},
-	Volume = {12},
-	Year = {1999},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbLMIMTk5OS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACjfo8LKOY5QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA01heQAAEAAIAADBcY1JAAAAEQAIAADCypv+AAAAAQAYAEZsswBGa88ARmrVAEZqGwBGZGgAQIlDAAIAPmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6TWF5OjE5OTkucGRmAA4AEgAIADEAOQA5ADkALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTWF5LzE5OTkucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTWF5LzE5OTkucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==}}
-
-@article{SNEATH:1966uq,
-	Annote = {Fig. 2 has the aa partition.
-No ROC.},
-	Author = {SNEATH, P. H. A.},
-	Date-Added = {2007-07-23 10:49:15 -0700},
-	Date-Modified = {2007-08-03 11:33:08 -0700},
-	Group = {LitSearch; IBID; Alphabets; Reviewed; Forward; Backward},
-	Isi = {ISI:A19668586500001},
-	Issn = {0022-5193},
-	Journal = {JOURNAL OF THEORETICAL BIOLOGY},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/SNEATH/1966.pdf},
-	Number = {2},
-	Pages = {157--&},
-	Publication-Type = {J},
-	Title = {RELATIONS BETWEEN CHEMICAL STRUCTURE AND BIOLOGICAL ACTIVITY IN PEPTIDES},
-	Volume = {12},
-	Year = {1966},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbRgIMTk2Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACkR8cLLmy5QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABlNORUFUSAAQAAgAAMFxjUkAAAARAAgAAMLL/Z4AAAABABgARm0YAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpTTkVBVEg6MTk2Ni5wZGYAAA4AEgAIADEAOQA2ADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvU05FQVRILzE5NjYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL1NORUFUSC8xOTY2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Kinjo:2004rt,
-	Author = {Kinjo, A. R. and Nishikawa, K.},
-	Date = {NOV 1},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2007-07-24 13:29:36 -0700},
-	Group = {LitSearch; Reviewed; Printed},
-	Isi = {ISI:000225250100002},
-	Issn = {1367-4803},
-	Journal = {BIOINFORMATICS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Kinjo/2004.pdf},
-	Month = {Nov},
-	Number = {16},
-	Pages = {2504--2508},
-	Publication-Type = {J},
-	Title = {Eigenvalue analysis of amino acid substitution matrices reveals a sharp transition of the mode of sequence conservation in proteins},
-	Volume = {20},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbIIIMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAChsVMLGqZ5QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUtpbmpvAAAQAAgAAMFxjUkAAAARAAgAAMLHDA4AAAABABgARmyCAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpLaW5qbzoyMDA0LnBkZgAOABIACAAyADAAMAA0AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0tpbmpvLzIwMDQucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvS2luam8vMjAwNC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Cheon:2004ys,
-	Annote = {Obscure Korean reference.
-No ROC.},
-	Author = {Cheon, M. and Heo, M. and Moon, E. J. and Kim, S. and Chung, K. and Chang, I. and Kim, H.},
-	Date = {AUG},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2007-08-03 11:10:59 -0700},
-	Group = {LitSearch; Alphabets; Reviewed; Forward; Backward},
-	Isi = {ISI:000223321200052},
-	Issn = {0374-4884},
-	Journal = {JOURNAL OF THE KOREAN PHYSICAL SOCIETY},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Cheon/2004a.pdf},
-	Month = {Aug},
-	Number = {2},
-	Pages = {550--557},
-	Publication-Type = {J},
-	Title = {Environment-dependent one-body score function for proteins by perceptron learning and protein threading},
-	Volume = {45},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbA8JMjAwNGEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAChsssLGqmlQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUNoZW9uAAAQAAgAAMFxjUkAAAARAAgAAMLHDNkAAAABABgARmwPAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpDaGVvbjoyMDA0YS5wZGYAAA4AFAAJADIAMAAwADQAYQAuAHAAZABmAA8ACAADAGgAcwByABIAPVVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9DaGVvbi8yMDA0YS5wZGYAABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAkLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ2hlb24vMjAwNGEucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJNAlICWwJmAmoCeAJ/AogCrwK0ArcAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACxA==}}
-
-@article{Liu:2003fr,
-	Annote = {Alphabet schemes are broken down by 2' structure.
-No ROC},
-	Author = {Liu, X. and Zhang, L. M. and Guan, S. and Zheng, W. M.},
-	Date = {MAY},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2007-07-23 10:40:48 -0700},
-	Di = {ARTN 051927},
-	Group = {LitSearch; Alphabets; Reviewed; Forward; Backward},
-	Isi = {ISI:000183482200080},
-	Issn = {1063-651X},
-	Journal = {PHYSICAL REVIEW E},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Liu/2003.pdf},
-	Month = {May},
-	Number = {5},
-	Pages = {051927},
-	Part-Number = {Part 1},
-	Publication-Type = {J},
-	Title = {Distances and classification of amino acids for different protein secondary structures},
-	Volume = {67},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbKYIMjAwMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAChs8MLGqqlQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA0xpdQAAEAAIAADBcY1JAAAAEQAIAADCxw0ZAAAAAQAYAEZspgBGa88ARmrVAEZqGwBGZGgAQIlDAAIAPmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6TGl1OjIwMDMucGRmAA4AEgAIADIAMAAwADMALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTGl1LzIwMDMucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTGl1LzIwMDMucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==}}
-
-@article{Green:2002zr,
-	Author = {Green, R E and Brenner, S E},
-	Date = {DEC},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2008-05-29 12:28:11 -0700},
-	Group = {LitSearch; Reviewed; Printed; Statistics},
-	Isi = {ISI:000179509700003},
-	Journal = {Proc of the IEEE},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Green/2002.pdf},
-	Month = {Dec},
-	Number = {12},
-	Pages = {1834--1847},
-	Publication-Type = {J},
-	Title = {Bootstrapping and normalization for enhanced evaluations of pairwise sequence comparison},
-	Volume = {90},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbFMIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAChtD8LGqtZQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUdyZWVuAAAQAAgAAMFxjUkAAAARAAgAAMLHDUYAAAABABgARmxTAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpHcmVlbjoyMDAyLnBkZgAOABIACAAyADAAMAAyAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0dyZWVuLzIwMDIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvR3JlZW4vMjAwMi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Cannata:2002mz,
-	Annote = {Result is AlphaSimp, a program to automatically do a cluster analysis given a substitution matrix.
-Might be an interesting project for the future.
-No ROC.},
-	Author = {Cannata, N. and Toppo, S. and Romualdi, C. and Valle, G.},
-	Date = {AUG},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2007-08-03 11:17:30 -0700},
-	Group = {LitSearch; Alphabets; Reviewed; Forward; Backward},
-	Isi = {ISI:000177504400009},
-	Issn = {1367-4803},
-	Journal = {BIOINFORMATICS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Cannata/2002.pdf},
-	Month = {Aug},
-	Number = {8},
-	Pages = {1102--1108},
-	Publication-Type = {J},
-	Title = {Simplifying amino acid alphabets by means of a branch and bound algorithm and substitution matrices},
-	Volume = {18},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbAcIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAChtMcLGqxdQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0Nhbm5hdGEAABAACAAAwXGNSQAAABEACAAAwscNhwAAAAEAGABGbAcARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkNhbm5hdGE6MjAwMi5wZGYADgASAAgAMgAwADAAMgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9DYW5uYXRhLzIwMDIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ2FubmF0YS8yMDAyLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Venkatarajan:2001ly,
-	Annote = {Table 3 and Fig. 5 have the clustering, based on an analysis of amino acid properties.
-No ROC.},
-	Author = {Venkatarajan, M. S. and Braun, W.},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2007-08-03 11:30:19 -0700},
-	Group = {LitSearch; Alphabets; Reviewed; Forward; Backward},
-	Isi = {ISI:000175682900004},
-	Issn = {0948-5023},
-	Journal = {JOURNAL OF MOLECULAR MODELING},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Venkatarajan/2001.pdf},
-	Number = {12},
-	Pages = {445--453},
-	Publication-Type = {J},
-	Title = {New quantitative descriptors of amino acids based on multidimensional scaling of a large number of physical-chemical properties},
-	Volume = {7},
-	Year = {2001},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGgAAAAAAGgAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbTkIMjAwMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAChtUcLGq0dQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAADFZlbmthdGFyYWphbgAQAAgAAMFxjUkAAAARAAgAAMLHDbcAAAABABgARm05AEZrzwBGatUARmobAEZkaABAiUMAAgBHaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpWZW5rYXRhcmFqYW46MjAwMS5wZGYAAA4AEgAIADIAMAAwADEALgBwAGQAZgAPAAgAAwBoAHMAcgASAENVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvVmVua2F0YXJhamFuLzIwMDEucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QKi4uLy4uLy4uLy4uL0FydGljbGVzL1ZlbmthdGFyYWphbi8yMDAxLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCXQJiAmsCdgJ6AogCjwKYAsUCygLNAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAto=}}
-
-@article{Xia:1998gf,
-	Author = {Xia, X. H. and Li, W. H.},
-	Date = {NOV},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2007-07-24 13:29:36 -0700},
-	Group = {LitSearch; Reviewed; Printed},
-	Isi = {ISI:000076739900007},
-	Issn = {0022-2844},
-	Journal = {JOURNAL OF MOLECULAR EVOLUTION},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Xia/1998.pdf},
-	Month = {Nov},
-	Number = {5},
-	Pages = {557--564},
-	Publication-Type = {J},
-	Title = {What amino acid properties affect protein evolution?},
-	Volume = {47},
-	Year = {1998},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbVEIMTk5OC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAChtaMLGq2pQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA1hpYQAAEAAIAADBcY1JAAAAEQAIAADCxw3aAAAAAQAYAEZtUQBGa88ARmrVAEZqGwBGZGgAQIlDAAIAPmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6WGlhOjE5OTgucGRmAA4AEgAIADEAOQA5ADgALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvWGlhLzE5OTgucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvWGlhLzE5OTgucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==}}
-
-@article{Irback:1997ve,
-	Author = {Irback, A. and Potthast, F.},
-	Date = {SEP},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2007-07-23 09:49:58 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:000071300700004},
-	Issn = {0269-2139},
-	Journal = {PROTEIN ENGINEERING},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Irback/1997.pdf},
-	Month = {Sep},
-	Number = {9},
-	Pages = {1013--1017},
-	Publication-Type = {J},
-	Title = {Binary assignments of amino acids from pattern conservation},
-	Volume = {10},
-	Year = {1997},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbG8IMTk5Ny5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAChtjcLGq6RQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABklyYmFjawAQAAgAAMFxjUkAAAARAAgAAMLHDhQAAAABABgARmxvAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpJcmJhY2s6MTk5Ny5wZGYAAA4AEgAIADEAOQA5ADcALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvSXJiYWNrLzE5OTcucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0lyYmFjay8xOTk3LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Bradley:1997ul,
-	Author = {Bradley, A. P.},
-	Date = {JUL},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2007-07-24 13:29:36 -0700},
-	Group = {LitSearch; Reviewed; Printed},
-	Isi = {ISI:A1997XE56500009},
-	Issn = {0031-3203},
-	Journal = {PATTERN RECOGNITION},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Bradley/1997.pdf},
-	Month = {Jul},
-	Number = {7},
-	Pages = {1145--1159},
-	Publication-Type = {J},
-	Title = {The use of the area under the roc curve in the evaluation of machine learning algorithms},
-	Volume = {30},
-	Year = {1997},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa/sIMTk5Ny5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACjmuMLKXrRQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0JyYWRsZXkAABAACAAAwXGNSQAAABEACAAAwsrBJAAAAAEAGABGa/sARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkJyYWRsZXk6MTk5Ny5wZGYADgASAAgAMQA5ADkANwAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9CcmFkbGV5LzE5OTcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQnJhZGxleS8xOTk3LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Stanfel:1996qf,
-	Annote = {Table 2-5 have the clusterings.  Table 5 w/ 10 classes is probably the most interesting.  Based on aa properties.
-No ROC.},
-	Author = {Stanfel, L. E.},
-	Date = {NOV 21},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2007-08-03 11:39:10 -0700},
-	Group = {LitSearch; Alphabets; Reviewed; Forward; Backward},
-	Isi = {ISI:A1996VV62700007},
-	Issn = {0022-5193},
-	Journal = {JOURNAL OF THEORETICAL BIOLOGY},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Stanfel/1996.pdf},
-	Month = {Nov},
-	Number = {2},
-	Pages = {195--205},
-	Publication-Type = {J},
-	Title = {A new approach to clustering the amino acids},
-	Volume = {183},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbR4IMTk5Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAChtscLGq+9QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1N0YW5mZWwAABAACAAAwXGNSQAAABEACAAAwscOXwAAAAEAGABGbR4ARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlN0YW5mZWw6MTk5Ni5wZGYADgASAAgAMQA5ADkANgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9TdGFuZmVsLzE5OTYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvU3RhbmZlbC8xOTk2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{SMITH:1990pd,
-	Author = {SMITH, R. F. and SMITH, T. F.},
-	Date = {JAN},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2007-07-23 09:57:54 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1990CH19000025},
-	Issn = {0027-8424},
-	Journal = {PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES OF THE UNITED STATES OF AMERICA},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/SMITH/1990.pdf},
-	Month = {Jan},
-	Number = {1},
-	Pages = {118--122},
-	Publication-Type = {J},
-	Title = {AUTOMATIC-GENERATION OF PRIMARY SEQUENCE PATTERNS FROM SETS OF RELATED PROTEIN SEQUENCES},
-	Volume = {87},
-	Year = {1990},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbRcIMTk5MC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACht0MLGrBdQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVNtaXRoAAAQAAgAAMFxjUkAAAARAAgAAMLHDocAAAABABgARm0XAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpTbWl0aDoxOTkwLnBkZgAOABIACAAxADkAOQAwAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1NtaXRoLzE5OTAucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvU21pdGgvMTk5MC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{REIDHAAROLSON:1988bh,
-	Author = {REIDHAAROLSON, J. F. and SAUER, R. T.},
-	Date = {JUL 1},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2007-07-24 13:29:36 -0700},
-	Group = {LitSearch; Reviewed; Printed},
-	Isi = {ISI:A1988P049100024},
-	Issn = {0036-8075},
-	Journal = {SCIENCE},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/REIDHAAROLSON/1988.pdf},
-	Month = {Jul},
-	Number = {4861},
-	Pages = {53--57},
-	Publication-Type = {J},
-	Title = {COMBINATORIAL CASSETTE MUTAGENESIS AS A PROBE OF THE INFORMATIONAL CONTENT OF PROTEIN SEQUENCES},
-	Volume = {241},
-	Year = {1988},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGiAAAAAAGiAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbO0IMTk4OC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACht7sLGrEhQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAADVJFSURIQUFST0xTT04AABAACAAAwXGNSQAAABEACAAAwscOuAAAAAEAGABGbO0ARmvPAEZq1QBGahsARmRoAECJQwACAEhoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlJFSURIQUFST0xTT046MTk4OC5wZGYADgASAAgAMQA5ADgAOAAuAHAAZABmAA8ACAADAGgAcwByABIARFVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9SRUlESEFBUk9MU09OLzE5ODgucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxArLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUkVJREhBQVJPTFNPTi8xOTg4LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCXwJkAm0CeAJ8AooCkQKaAsgCzQLQAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAt0=}}
-
-@article{TAYLOR:1986lq,
-	Annote = {Stanfel (1996) claims that this paper is rather arbitrary and doesn't actually "get 'er done" as far as producing a partition of the amino acids.
-Indeed this paper makes no clear aa partition; nice look at aa properties though.},
-	Author = {TAYLOR, W. R.},
-	Date = {MAR 21},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2007-07-23 14:15:55 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1986A790500008},
-	Issn = {0022-5193},
-	Journal = {JOURNAL OF THEORETICAL BIOLOGY},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/TAYLOR/1986.pdf},
-	Month = {Mar},
-	Number = {2},
-	Pages = {205--&},
-	Publication-Type = {J},
-	Title = {THE CLASSIFICATION OF AMINO-ACID CONSERVATION},
-	Volume = {119},
-	Year = {1986},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbSsIMTk4Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACjmrcLKXqVQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABlRBWUxPUgAQAAgAAMFxjUkAAAARAAgAAMLKwRUAAAABABgARm0rAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpUQVlMT1I6MTk4Ni5wZGYAAA4AEgAIADEAOQA4ADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvVEFZTE9SLzE5ODYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL1RBWUxPUi8xOTg2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{SJOSTROM:1985dq,
-	Annote = {No clear aa partition made.},
-	Author = {SJOSTROM, M. and WOLD, S.},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2007-07-23 14:17:01 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1985AUU2900008},
-	Issn = {0022-2844},
-	Journal = {JOURNAL OF MOLECULAR EVOLUTION},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/SJOSTROM/1985.pdf},
-	Number = {3},
-	Pages = {272--277},
-	Publication-Type = {J},
-	Title = {A MULTIVARIATE STUDY OF THE RELATIONSHIP BETWEEN THE GENETIC-CODE AND THE PHYSICAL-CHEMICAL PROPERTIES OF AMINO-ACIDS},
-	Volume = {22},
-	Year = {1985},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbRUIMTk4NS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACjmnsLKXpBQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACFNKT1NUUk9NABAACAAAwXGNSQAAABEACAAAwsrBAAAAAAEAGABGbRUARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlNKT1NUUk9NOjE5ODUucGRmAAAOABIACAAxADkAOAA1AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1NKT1NUUk9NLzE5ODUucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL1NKT1NUUk9NLzE5ODUucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{KIDERA:1985rr,
-	Annote = {No aa partition is made.  Properties of the aa's are studied.},
-	Author = {KIDERA, A. and KONISHI, Y. and OKA, M. and OOI, T. and SCHERAGA, H. A.},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2007-07-23 14:21:08 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1985AQR4500002},
-	Issn = {0277-8033},
-	Journal = {JOURNAL OF PROTEIN CHEMISTRY},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/KIDERA/1985.pdf},
-	Number = {1},
-	Pages = {23--55},
-	Publication-Type = {J},
-	Title = {STATISTICAL-ANALYSIS OF THE PHYSICAL-PROPERTIES OF THE 20 NATURALLY-OCCURRING AMINO-ACIDS},
-	Volume = {4},
-	Year = {1985},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbIAIMTk4NS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACjmj8LKXoNQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABktJREVSQQAQAAgAAMFxjUkAAAARAAgAAMLKwPMAAAABABgARmyAAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpLSURFUkE6MTk4NS5wZGYAAA4AEgAIADEAOQA4ADUALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvS0lERVJBLzE5ODUucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0tJREVSQS8xOTg1LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{FRENCH:1983cr,
-	Annote = {Again no clear partition of the aa's is made.  Study the clustering of aa's based on 2' structure.},
-	Author = {FRENCH, S. and ROBSON, B.},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2007-07-23 14:22:22 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1983QL21300007},
-	Issn = {0022-2844},
-	Journal = {JOURNAL OF MOLECULAR EVOLUTION},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/FRENCH/1983.pdf},
-	Number = {2},
-	Pages = {171--175},
-	Publication-Type = {J},
-	Title = {WHAT IS A CONSERVATIVE SUBSTITUTION},
-	Volume = {19},
-	Year = {1983},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbD4IMTk4My5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACjmfcLKXmxQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkZSRU5DSAAQAAgAAMFxjUkAAAARAAgAAMLKwNwAAAABABgARmw+AEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpGUkVOQ0g6MTk4My5wZGYAAA4AEgAIADEAOQA4ADMALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvRlJFTkNILzE5ODMucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0ZSRU5DSC8xOTgzLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{GRANTHAM:1974wd,
-	Author = {GRANTHAM, R.},
-	Date-Added = {2007-07-20 18:03:15 -0700},
-	Date-Modified = {2007-07-23 10:00:45 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1974T914800023},
-	Issn = {0036-8075},
-	Journal = {SCIENCE},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/GRANTHAM/1974.pdf},
-	Number = {4154},
-	Pages = {862--864},
-	Publication-Type = {J},
-	Title = {AMINO-ACID DIFFERENCE FORMULA TO HELP EXPLAIN PROTEIN EVOLUTION},
-	Volume = {185},
-	Year = {1974},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbFIIMTk3NC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAChuPsLGrRZQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEdSQU5USEFNABAACAAAwXGNSQAAABEACAAAwscPhgAAAAEAGABGbFIARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkdSQU5USEFNOjE5NzQucGRmAAAOABIACAAxADkANwA0AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0dSQU5USEFNLzE5NzQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0dSQU5USEFNLzE5NzQucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Wu:1996kx,
-	Abstract = {This paper introduces a method for identifying empirically conserved amino acid substitution groups. In contrast with existing approaches that view amino acid substitution as a pairwise phenomenon, the method presented here identifies conserved groups of amino acids using a data structure called a conditional distribution matrix. The conditional distribution matrix extends the concept of a pairwise substitution matrix by changing the context of substitution from a single amino acid to a group of amino acids. The matrix tabulates information from a database of protein families that contains numerous aligned positions. Each row in the matrix contains the distribution of amino acids in those aligned positions that contain a given conditioning group of amino acids. The method converts a database of protein families into a conditional distribution matrix and then examines each possible substitution group for evidence of conservation. The algorithm is applied to the BLOCKS and HSSP databases. Twenty amino acid substitution groups are found to be conserved empirically in both databases. These groups provide insight into biochemical properties that are conserved in protein evolution.},
-	Address = {Department of Medicine, Standford University Medical Center, California 94305, USA. twu@camis.stanford.edu},
-	Annote = {This may be useful for Jessica's project.},
-	Au = {Wu, TD and Brutlag, DL},
-	Author = {Wu, T D and Brutlag, D L},
-	Da = {19970130},
-	Date-Added = {2007-07-20 17:09:38 -0700},
-	Date-Modified = {2007-07-23 14:54:12 -0700},
-	Dcom = {19970130},
-	Edat = {1996/01/01},
-	Gr = {LM 05716/LM/NLM; LM 07033/LM/NLM},
-	Group = {LitSearch; Reviewed},
-	Issn = {1553-0833 (Print)},
-	Jid = {9509125},
-	Journal = {Proc Int Conf Intell Syst Mol Biol},
-	Jt = {Proceedings / ... International Conference on Intelligent Systems for Molecular Biology ; ISMB. International Conference on Intelligent Systems for Molecular Biology},
-	Keywords = {Algorithms; Amino Acid Sequence; Conserved Sequence; *Databases, Factual; Proteins/*chemistry; Sequence Alignment/methods; Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wu/1996.pdf},
-	Lr = {20061115},
-	Mhda = {1996/01/01 00:01},
-	Own = {NLM},
-	Pages = {230--240},
-	Pl = {UNITED STATES},
-	Pmid = {8877523},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Proc Int Conf Intell Syst Mol Biol. 1996;4:230-40.},
-	Stat = {MEDLINE},
-	Title = {Discovering empirically conserved amino acid substitution groups in databases of protein families.},
-	Volume = {4},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGCAAAAAAGCAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbVAIMTk5Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACjsAcLKbqlQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAAld1ABAACAAAwXGNSQAAABEACAAAwsrRGQAAAAEAGABGbVAARmvPAEZq1QBGahsARmRoAECJQwACAD1oc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOld1OjE5OTYucGRmAAAOABIACAAxADkAOQA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA5VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1d1LzE5OTYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIC4uLy4uLy4uLy4uL0FydGljbGVzL1d1LzE5OTYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQI/AkQCTQJYAlwCagJxAnoCnQKiAqUAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACsg==}}
-
-@article{Wang:2000yq,
-	Abstract = {The folding characteristics of sequences reduced with a possibly simplified representation of five types of residues are shown to be similar to their original ones with the natural set of residues (20 types or 20 letters). The reduced sequences have a good foldability and fold to the same native structure of their optimized original ones. A large ground state gap for the native structure shows the thermodynamic stability of the reduced sequences. The general validity of such a five-letter reduction is further studied via the correlation between the reduced sequences and the original ones. As a comparison, a reduction with two letters is found not to reproduce the native structure of the original sequences due to its homopolymeric features.},
-	Address = {National Laboratory of Solid State Microstructure and Physics Department, Nanjing University, Nanjing 210093, China.},
-	Annote = {In this paper Wang and Wang further study the folding characteristics of their 5 letter alphabet introduced in the 1999 Nat Struct Biol paper.},
-	Au = {Wang, J and Wang, W},
-	Author = {Wang, J and Wang, W},
-	Da = {20001211},
-	Date-Added = {2007-07-20 17:09:38 -0700},
-	Date-Modified = {2007-07-23 09:45:51 -0700},
-	Dcom = {20001222},
-	Edat = {2000/11/23 11:00},
-	Group = {LitSearch; Reviewed},
-	Issn = {1063-651X (Print)},
-	Jid = {9887340},
-	Journal = {Phys Rev E Stat Phys Plasmas Fluids Relat Interdiscip Topics},
-	Jt = {Physical review. E, Statistical physics, plasmas, fluids, and related interdisciplinary topics},
-	Keywords = {Amino Acid Sequence; Amino Acids/chemistry; Kinetics; *Models, Chemical; Molecular Sequence Data; *Protein Folding; Proteins/*chemistry; Thermodynamics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wang/2000.pdf},
-	Lr = {20061115},
-	Mhda = {2001/02/28 10:01},
-	Number = {6 Pt B},
-	Own = {NLM},
-	Pages = {6981--6986},
-	Phst = {1999/11/05 {$[$}received{$]$}; 2000/02/18 {$[$}revised{$]$}},
-	Pl = {UNITED STATES},
-	Pmid = {11088391},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Proteins)},
-	Sb = {IM},
-	So = {Phys Rev E Stat Phys Plasmas Fluids Relat Interdiscip Topics. 2000 Jun;61(6 Pt B):6981-6.},
-	Stat = {MEDLINE},
-	Title = {Modeling study on the validity of a possibly simplified representation of proteins.},
-	Volume = {61},
-	Year = {2000},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbUAIMjAwMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAChqDcLGniBQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFdhbmcAEAAIAADBcY1JAAAAEQAIAADCxwCQAAAAAQAYAEZtQABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6V2FuZzoyMDAwLnBkZgAADgASAAgAMgAwADAAMAAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9XYW5nLzIwMDAucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1dhbmcvMjAwMC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Mintseris:2004vn,
-	Abstract = {The problem of describing a protein representation by breaking up the amino acids atoms into functionally similar atom groups has been addressed by many researchers in the past 25 years. They have used a variety of physical, chemical and biological criteria of varying degrees of rigor to essentially impose our understanding of protein structures onto various atom-typing schemes used in studies of protein folding, protein-protein and protein-ligand interactions, and others. Here, instead, we have chosen to rely primarily on the data and use information-theoretic techniques to dissect it. We show that we can obtain an optimized protein representation for a given alphabet size from protein monomers or protein interface datasets that are in agreement with general concepts of protein energetics. Closer inspection of the atom partitions led to interesting observations pointing to the greater importance of the hydrophobic interactions in protein monomers compared to interfaces and, conversely, greater importance of polar/charged interaction in protein interfaces. Comparing the atom partitions from the two datasets we show that the two are strikingly similar at alphabet size of five, proving that despite some differences, the general energetic concepts are very similar for folding and binding. Implications for further structural studies are discussed.},
-	Address = {Bioinformatics Program, Boston University, Boston MA 02215, USA. julianm@bu.edu},
-	Annote = {They classify atoms, not residues.
-No ROC},
-	Au = {Mintseris, J and Weng, Z},
-	Author = {Mintseris, Julian and Weng, Zhiping},
-	Da = {20050215},
-	Date-Added = {2007-07-20 17:09:38 -0700},
-	Date-Modified = {2007-07-23 10:37:44 -0700},
-	Dcom = {20050512},
-	Edat = {2005/02/16 09:00},
-	Group = {LitSearch; Alphabets; Reviewed; Forward; Backward},
-	Issn = {0919-9454 (Print)},
-	Jid = {101280573},
-	Journal = {Genome Inform},
-	Jt = {Genome informatics. International Conference on Genome Informatics},
-	Keywords = {Binding Sites; *Databases, Protein; Models, Biological; Models, Molecular; Monte Carlo Method; Protein Conformation; Proteins/*chemistry/metabolism},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Mintseris/2004.pdf},
-	Lr = {20061115},
-	Mhda = {2005/05/13 09:00},
-	Number = {1},
-	Own = {NLM},
-	Pages = {160--169},
-	Pl = {Japan},
-	Pmid = {15712119},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Genome Inform. 2004;15(1):160-9.},
-	Stat = {MEDLINE},
-	Title = {Optimizing protein representations with information theory},
-	Volume = {15},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbLwIMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAChp7cLGneJQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACU1pbnRzZXJpcwAAEAAIAADBcY1JAAAAEQAIAADCxwBSAAAAAQAYAEZsvABGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6TWludHNlcmlzOjIwMDQucGRmAA4AEgAIADIAMAAwADQALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTWludHNlcmlzLzIwMDQucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTWludHNlcmlzLzIwMDQucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@article{Rykunov:2007lr,
-	Af = {Rykunov, Dmitry Fiser, Andras},
-	Author = {Rykunov, D. and Fiser, A.},
-	Date = {MAY 15},
-	Date-Added = {2007-07-20 14:47:24 -0700},
-	Date-Modified = {2007-07-23 08:51:47 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:000245743100006},
-	Issn = {0887-3585},
-	Journal = {PROTEINS-STRUCTURE FUNCTION AND BIOINFORMATICS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Rykunov/2007.pdf},
-	Month = {May},
-	Number = {3},
-	Pages = {559--568},
-	Publication-Type = {J},
-	Title = {Effects of amino acid composition, finite size of proteins, and sparse statistics on distance-dependent statistical pair potentials},
-	Volume = {67},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbPkIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACgn8cLGeQFQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1J5a3Vub3YAABAACAAAwXGNSQAAABEACAAAwsbbcQAAAAEAGABGbPkARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlJ5a3Vub3Y6MjAwNy5wZGYADgASAAgAMgAwADAANwAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9SeWt1bm92LzIwMDcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUnlrdW5vdi8yMDA3LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@misc{Zou:2006fk,
-	Af = {Zou, Shuxue Huang, Yanxin Wang, Yan Zhou, Chunguang},
-	Author = {Zou, S. X. and Huang, Y. X. and Wang, Y. and Zhou, C. G.},
-	Booktitle = {LECTURE NOTES IN COMPUTER SCIENCE},
-	Date-Added = {2007-07-20 14:47:24 -0700},
-	Date-Modified = {2007-07-23 08:54:14 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:000239485300099},
-	Issn = {0302-9743},
-	Journal = {ADVANCES IN NEURAL NETWORKS - ISNN 2006, PT 3, PROCEEDINGS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Zou/2006.pdf},
-	Pages = {674--681},
-	Publication-Type = {S},
-	Title = {Prediction of protein domains from sequence information using support vector machines},
-	Volume = {3973},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbV4IMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAChX38LGkDpQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA1pvdQAAEAAIAADBcY1JAAAAEQAIAADCxvKqAAAAAQAYAEZtXgBGa88ARmrVAEZqGwBGZGgAQIlDAAIAPmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6Wm91OjIwMDYucGRmAA4AEgAIADIAMAAwADYALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvWm91LzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvWm91LzIwMDYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==}}
-
-@article{Demchenko:1999qy,
-	Author = {Demchenko, A. P. and Chinarov, V. A.},
-	Date = {JUN},
-	Date-Added = {2007-07-20 14:47:24 -0700},
-	Date-Modified = {2007-07-24 13:13:54 -0700},
-	Group = {LitSearch; IBID; Printed; Reviewed},
-	Isi = {ISI:000081303500001},
-	Issn = {0929-8665},
-	Journal = {PROTEIN AND PEPTIDE LETTERS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Demchenko/1999.pdf},
-	Month = {Jun},
-	Number = {3},
-	Pages = {115--129},
-	Publication-Type = {J},
-	Title = {Tolerance of protein structures to the changes of amino acid sequences and their interactions. The nature of the folding code},
-	Volume = {6},
-	Year = {1999},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbCUIMTk5OS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACkVSsLLrQhQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACURlbWNoZW5rbwAAEAAIAADBcY1JAAAAEQAIAADCzA94AAAAAQAYAEZsJQBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6RGVtY2hlbmtvOjE5OTkucGRmAA4AEgAIADEAOQA5ADkALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvRGVtY2hlbmtvLzE5OTkucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRGVtY2hlbmtvLzE5OTkucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@article{LESSEL:1994uq,
-	Author = {LESSEL, U. and SCHOMBURG, D.},
-	Date = {OCT},
-	Date-Added = {2007-07-20 14:47:24 -0700},
-	Date-Modified = {2007-07-23 09:53:26 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1994PM11300001},
-	Issn = {0269-2139},
-	Journal = {PROTEIN ENGINEERING},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/LESSEL/1994.pdf},
-	Month = {Oct},
-	Number = {10},
-	Pages = {1175--1187},
-	Publication-Type = {J},
-	Title = {SIMILARITIES BETWEEN PROTEIN 3-D STRUCTURES},
-	Volume = {7},
-	Year = {1994},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbJ0IMTk5NC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAChXxcLGkDBQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkxFU1NFTAAQAAgAAMFxjUkAAAARAAgAAMLG8qAAAAABABgARmydAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpMRVNTRUw6MTk5NC5wZGYAAA4AEgAIADEAOQA5ADQALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTEVTU0VMLzE5OTQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0xFU1NFTC8xOTk0LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@inbook{Dayhoff:1969rt,
-	Annote = {Figure 9-12 has the breakdown.
-No ROC.},
-	Author = {Dayhoff, M O and Eck, E V and Park, C M},
-	Booktitle = {Atlas of Protein Sequence and Structure},
-	Chapter = {9},
-	Date-Added = {2007-07-19 10:37:32 -0700},
-	Date-Modified = {2007-08-03 11:33:15 -0700},
-	Editor = {Dayhoff, M O},
-	Group = {LitSearch; Reviewed; Alphabets; Backward; Forward},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Dayhoff/1969.pdf},
-	Title = {A Model of Evolutionary Change in Proteins},
-	Volume = {5},
-	Year = {1969},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbCEIMTk2OS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAmlMKpTFFQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0RheWhvZmYAABAACAAAwXGNSQAAABEACAAAwqmuwQAAAAEAGABGbCEARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkRheWhvZmY6MTk2OS5wZGYADgASAAgAMQA5ADYAOQAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9EYXlob2ZmLzE5NjkucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRGF5aG9mZi8xOTY5LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@inbook{Rackovsky:2006yq,
-	Author = {Rackovsky, S},
-	Booktitle = {Power Laws, Scale-Free Networks and Genome Biology},
-	Chapter = {11},
-	Date-Added = {2007-07-19 10:32:56 -0700},
-	Date-Modified = {2007-07-20 13:14:25 -0700},
-	Editor = {Koonin, Eugene V and Wolf, Yuri I and Karev, Georgy P},
-	Group = {LitSearch; Printed; Reviewed},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Rackovsky/2006.pdf},
-	Pages = {194-205},
-	Title = {The Protein Universes: Some Informatic Issues in Protein Classification},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbOUIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAmlsKpTFRQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACVJhY2tvdnNreQAAEAAIAADBcY1JAAAAEQAIAADCqa7EAAAAAQAYAEZs5QBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6UmFja292c2t5OjIwMDYucGRmAA4AEgAIADIAMAAwADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvUmFja292c2t5LzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUmFja292c2t5LzIwMDYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@article{Porto:2006lr,
-	Author = {Porto, A. H. L. and Barbosa, V. C.},
-	Booktitle = {LECTURE NOTES IN COMPUTER SCIENCE},
-	Date-Added = {2007-07-19 10:24:48 -0700},
-	Date-Modified = {2007-07-20 13:12:13 -0700},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:000237228900012},
-	Issn = {0302-9743},
-	Journal = {APPLICATIONS OF EVOLUTIONARY COMPUTING, PROCEEDINGS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Porto/2006.pdf},
-	Pages = {127--137},
-	Publication-Type = {S},
-	Title = {Multiple sequence alignment based on set covers},
-	Volume = {3907},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbN0IMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACewOsLGLK9QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVBvcnRvAAAQAAgAAMFxjUkAAAARAAgAAMLGjx8AAAABABgARmzdAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpQb3J0bzoyMDA2LnBkZgAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1BvcnRvLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUG9ydG8vMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Porto:2006fk,
-	Author = {Porto, A. H. L. and Barbosa, V. C.},
-	Booktitle = {LECTURE NOTES IN COMPUTER SCIENCE},
-	Date-Added = {2007-07-19 10:24:48 -0700},
-	Date-Modified = {2007-07-20 13:13:42 -0700},
-	Group = {LitSearch; Scoring Matrices; Reviewed},
-	Isi = {ISI:000237228900013},
-	Issn = {0302-9743},
-	Journal = {APPLICATIONS OF EVOLUTIONARY COMPUTING, PROCEEDINGS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Porto/2006a.pdf},
-	Pages = {138--148},
-	Publication-Type = {S},
-	Title = {A methodology for determining amino-acid substitution matrices from set covers},
-	Volume = {3907},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbN0JMjAwNmEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACewQsLGLLRQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVBvcnRvAAAQAAgAAMFxjUkAAAARAAgAAMLGjyQAAAABABgARmzdAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpQb3J0bzoyMDA2YS5wZGYAAA4AFAAJADIAMAAwADYAYQAuAHAAZABmAA8ACAADAGgAcwByABIAPVVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Qb3J0by8yMDA2YS5wZGYAABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAkLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUG9ydG8vMjAwNmEucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJNAlICWwJmAmoCeAJ/AogCrwK0ArcAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACxA==}}
-
-@article{Cheon:2005qy,
-	Author = {Cheon, M. and Heo, M. and Chang, I. and Kim, C.},
-	Date = {NOV},
-	Date-Added = {2007-07-19 10:24:48 -0700},
-	Date-Modified = {2007-07-19 10:59:06 -0700},
-	Isi = {ISI:000233306000027},
-	Issn = {0374-4884},
-	Journal = {JOURNAL OF THE KOREAN PHYSICAL SOCIETY},
-	Month = {Nov},
-	Number = {5},
-	Pages = {895--899},
-	Publication-Type = {J},
-	Title = {A self-organizing map of amino acids with their local environments in proteins by using pairwise-contact energy parameters},
-	Volume = {47},
-	Year = {2005}}
-
-@article{Shakhnovich:1990ve,
-	Abstract = {Natural proteins exhibit essentially two-state thermodynamics, with one stable fold that dominates thermodynamically over a vast number of possible folds, a number that increases exponentially with the size of the protein. Here we address the question of whether this feature of proteins is a rare property selected by evolution or whether it is in fact true of a significant proportion of all possible protein sequences. Using statistical procedures developed to study spin glasses, we show that, given certain assumptions, the probability that a randomly synthesized protein chain will have a dominant fold (which is the global minimum of free energy) is a function of temperature, and that below a critical temperature the probability rapidly increases as the temperature decreases. Our results suggest that a significant proportion of all possible protein sequences could have a thermodynamically dominant fold.},
-	Address = {Institute of Protein Research, Academy of Sciences of the USSR, Moscow Region.},
-	Au = {Shakhnovich, EI and Gutin, AM},
-	Author = {Shakhnovich, E I and Gutin, A M},
-	Da = {19900927},
-	Date-Added = {2007-07-13 14:37:28 -0700},
-	Date-Modified = {2007-07-13 14:37:37 -0700},
-	Dcom = {19900927},
-	Doi = {10.1038/346773a0},
-	Edat = {1990/08/23},
-	Issn = {0028-0836 (Print)},
-	Jid = {0410462},
-	Journal = {Nature},
-	Jt = {Nature},
-	Keywords = {*Amino Acid Sequence; *Evolution; Mathematics; Models, Theoretical; Probability; *Protein Conformation; Thermodynamics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Shakhnovich/1990.pdf},
-	Lr = {20001218},
-	Mhda = {1990/08/23 00:01},
-	Number = {6286},
-	Own = {NLM},
-	Pages = {773--775},
-	Pl = {ENGLAND},
-	Pmid = {2388698},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	So = {Nature. 1990 Aug 23;346(6286):773-5.},
-	Stat = {MEDLINE},
-	Title = {Implications of thermodynamics of protein folding for evolution of primary sequences},
-	Volume = {346},
-	Year = {1990},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGcAAAAAAGcAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbQgIMTk5MC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAIwsKoe9dQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAC1NoYWtobm92aWNoAAAQAAgAAMFxjUkAAAARAAgAAMKo3kcAAAABABgARm0IAEZrzwBGatUARmobAEZkaABAiUMAAgBGaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpTaGFraG5vdmljaDoxOTkwLnBkZgAOABIACAAxADkAOQAwAC4AcABkAGYADwAIAAMAaABzAHIAEgBCVXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1NoYWtobm92aWNoLzE5OTAucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxApLi4vLi4vLi4vLi4vQXJ0aWNsZXMvU2hha2hub3ZpY2gvMTk5MC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AlkCXgJnAnICdgKEAosClALAAsUCyAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALV},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1038/346773a0}}
-
-@article{Shih:2006kx,
-	Abstract = {In recent years it has been shown that bacteria contain a number of cytoskeletal structures. The bacterial cytoplasmic elements include homologs of the three major types of eukaryotic cytoskeletal proteins (actin, tubulin, and intermediate filament proteins) and a fourth group, the MinD-ParA group, that appears to be unique to bacteria. The cytoskeletal structures play important roles in cell division, cell polarity, cell shape regulation, plasmid partition, and other functions. The proteins self-assemble into filamentous structures in vitro and form intracellular ordered structures in vivo. In addition, there are a number of filamentous bacterial elements that may turn out to be cytoskeletal in nature. This review attempts to summarize and integrate the in vivo and in vitro aspects of these systems and to evaluate the probable future directions of this active research field.},
-	Address = {Department of Molecular, Microbial and Structural Biology, University of Connecticut Health Center, 263 Farmington Avenue, Farmington, CT 06032, USA.},
-	Au = {Shih, YL and Rothfield, L},
-	Author = {Shih, Yu-Ling and Rothfield, Lawrence},
-	Da = {20060908},
-	Date-Added = {2007-07-13 14:22:16 -0700},
-	Date-Modified = {2007-07-13 14:22:27 -0700},
-	Dcom = {20061017},
-	Doi = {10.1128/MMBR.00017-06},
-	Edat = {2006/09/09 09:00},
-	Gr = {GM R37-06032/GM/NIGMS},
-	Issn = {1092-2172 (Print)},
-	Jid = {9706653},
-	Journal = {Microbiol Mol Biol Rev},
-	Jt = {Microbiology and molecular biology reviews : MMBR},
-	Keywords = {Actins/chemistry/metabolism; Bacteria/cytology/*metabolism; Bacterial Proteins/chemistry/metabolism/physiology; Cytoskeletal Proteins/chemistry/*metabolism; Cytoskeleton/chemistry/*metabolism; Models, Biological; Models, Molecular; Protein Structure, Secondary; Tubulin/chemistry/metabolism},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Shih/2006.pdf},
-	Lr = {20061115},
-	Mhda = {2006/10/18 09:00},
-	Number = {3},
-	Own = {NLM},
-	Pages = {729--754},
-	Pii = {70/3/729},
-	Pl = {United States},
-	Pmid = {16959967},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, N.I.H., Extramural; Review},
-	Pubm = {Print},
-	Rf = {226},
-	Rn = {0 (Actins); 0 (Bacterial Proteins); 0 (Cytoskeletal Proteins); 0 (Tubulin)},
-	Sb = {IM},
-	So = {Microbiol Mol Biol Rev. 2006 Sep;70(3):729-54.},
-	Stat = {MEDLINE},
-	Title = {The bacterial cytoskeleton},
-	Volume = {70},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbQ8IMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACHuccK0J9RQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFNoaWgAEAAIAADBcY1JAAAAEQAIAADCtIpEAAAAAQAYAEZtDwBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6U2hpaDoyMDA2LnBkZgAADgASAAgAMgAwADAANgAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9TaGloLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1NoaWgvMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1128/MMBR.00017-06}}
-
-@article{Li:2001fk,
-	Abstract = {We present a fast and flexible program for clustering large protein databases at different sequence identity levels. It takes less than 2 h for the all-against-all sequence comparison and clustering of the non-redundant protein database of over 560,000 sequences on a high-end PC. The output database, including only the representative sequences, can be used for more efficient and sensitive database searches.},
-	Address = {San Diego Supercomputer Center, La Jolla, CA 92093, USA. liwz{\char64}sdsc.edu},
-	Au = {Li, W and Jaroszewski, L and Godzik, A},
-	Author = {Li, W and Jaroszewski, L and Godzik, A},
-	Da = {20010411},
-	Date-Added = {2007-07-12 18:32:49 -0700},
-	Date-Modified = {2007-07-12 18:32:57 -0700},
-	Dcom = {20010705},
-	Edat = {2001/04/11 10:00},
-	Gr = {GM60049/GM/NIGMS},
-	Issn = {1367-4803 (Print)},
-	Jid = {9808944},
-	Journal = {Bioinformatics},
-	Jt = {Bioinformatics (Oxford, England)},
-	Keywords = {Algorithms; *Databases, Factual; Proteins/*analysis; Sequence Analysis; *Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Li/2001.pdf},
-	Lr = {20061115},
-	Mhda = {2001/07/06 10:01},
-	Number = {3},
-	Own = {NLM},
-	Pages = {282--283},
-	Pl = {England},
-	Pmid = {11294794},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Bioinformatics. 2001 Mar;17(3):282-3.},
-	Stat = {MEDLINE},
-	Title = {Clustering of highly homologous sequences to reduce the size of large protein databases},
-	Volume = {17},
-	Year = {2001},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGCAAAAAAGCAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbKAIMjAwMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACQHeMK8JCtQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAAkxpABAACAAAwXGNSQAAABEACAAAwryGmwAAAAEAGABGbKAARmvPAEZq1QBGahsARmRoAECJQwACAD1oc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkxpOjIwMDEucGRmAAAOABIACAAyADAAMAAxAC4AcABkAGYADwAIAAMAaABzAHIAEgA5VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0xpLzIwMDEucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIC4uLy4uLy4uLy4uL0FydGljbGVzL0xpLzIwMDEucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQI/AkQCTQJYAlwCagJxAnoCnQKiAqUAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACsg==}}
-
-@article{Holm:1998lr,
-	Abstract = {MOTIVATION: To maximize the chances of biological discovery, homology searching must use an up-to-date collection of sequences. However, the available sequence databases are growing rapidly and are partially redundant in content. This leads to increasing strain on CPU resources and decreasing density of first-hand annotation. RESULTS: These problems are addressed by clustering closely similar sequences to yield a covering of sequence space by a representative subset of sequences. No pair of sequences in the representative set has >90% mutual sequence identity. The representative set is derived by an exhaustive search for close similarities in the sequence database in which the need for explicit sequence alignment is significantly reduced by applying deca- and pentapeptide composition filters. The algorithm was applied to the union of the Swissprot, Swissnew, Trembl, Tremblnew, Genbank, PIR, Wormpep and PDB databases. The all-against-all comparison required to generate a representative set at 90% sequence identity was accomplished in 2 days CPU time, and the removal of fragments and close similarities yielded a size reduction of 46%, from 260 000 unique sequences to 140 000 representative sequences. The practical implications are (i) faster homology searches using, for example, Fasta or Blast, and (ii) unified annotation for all sequences clustered around a representative. As tens of thousands of sequence searches are performed daily world-wide, appropriate use of the non-redundant database can lead to major savings in computer resources, without loss of efficacy. AVAILABILITY: A regularly updated non-redundant protein sequence database (nrdb90), a server for homology searches against nrdb90, and a Perl script (nrdb90.pl) implementing the algorithm are available for academic use from http://www.embl-ebi.ac. uk/holm/nrdb90. CONTACT: holm@embl-ebi.ac.uk},
-	Address = {EMBL-EBI, Cambridge CB10 1SD, UK.},
-	Au = {Holm, L and Sander, C},
-	Author = {Holm, L and Sander, C},
-	Da = {19980908},
-	Date-Added = {2007-07-12 18:31:36 -0700},
-	Date-Modified = {2007-07-12 18:31:45 -0700},
-	Dcom = {19980908},
-	Edat = {1998/07/31},
-	Issn = {1367-4803 (Print)},
-	Jid = {9808944},
-	Journal = {Bioinformatics},
-	Jt = {Bioinformatics (Oxford, England)},
-	Keywords = {Algorithms; Animals; Computational Biology; *Databases, Factual; Fungal Proteins/genetics; Genome, Fungal; Humans; Muscle Proteins/genetics; Protein Kinases/genetics; Proteins/classification/*genetics; Saccharomyces cerevisiae/genetics; Sequence Alignment/*methods/statistics \& numerical data; Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Holm/1998.pdf},
-	Lr = {20061115},
-	Mhda = {1998/07/31 00:01},
-	Number = {5},
-	Own = {NLM},
-	Pages = {423--429},
-	Pii = {btb053},
-	Pl = {ENGLAND},
-	Pmid = {9682055},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Fungal Proteins); 0 (Muscle Proteins); 0 (Proteins); 0 (connectin); EC 2.7.1.37 (Protein Kinases)},
-	Sb = {IM},
-	So = {Bioinformatics. 1998 Jun;14(5):423-9.},
-	Stat = {MEDLINE},
-	Title = {Removing near-neighbour redundancy from large protein sequence collections},
-	Volume = {14},
-	Year = {1998},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbGgIMTk5OC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACQF4MK8IkZQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABEhvbG0AEAAIAADBcY1JAAAAEQAIAADCvIS2AAAAAQAYAEZsaABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6SG9sbToxOTk4LnBkZgAADgASAAgAMQA5ADkAOAAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Ib2xtLzE5OTgucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0hvbG0vMTk5OC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Goodson:2002lr,
-	Abstract = {Members of the actin family have well-characterized cytoskeletal functions, but actin and actin-related proteins (ARPs) have also been implicated in nuclear activities. Previous analyses of the actin family have identified four conserved subfamilies, but many actin-related proteins (ARPs) do not fall into these groups. A new systematic phylogenetic analysis reveals that at least eight ARP subfamilies are conserved from humans to yeast, indicating that these ARPs are part of the core set of eukaryotic proteins. Members of at least three subfamilies appear to be involved in chromatin remodeling, suggesting that ARPs play ancient, fundamental roles in this nuclear process.},
-	Address = {Department of Chemistry and Biochemistry, University of Notre Dame, Notre Dame, IN 46556, USA. hgoodson@nd.edu},
-	Au = {Goodson, HV and Hawse, WF},
-	Author = {Goodson, Holly V and Hawse, William F},
-	Da = {20020621},
-	Date-Added = {2007-07-12 13:04:55 -0700},
-	Date-Modified = {2007-07-12 13:05:03 -0700},
-	Dcom = {20021126},
-	Edat = {2002/06/22 10:00},
-	Issn = {0021-9533 (Print)},
-	Jid = {0052457},
-	Journal = {J Cell Sci},
-	Jt = {Journal of cell science},
-	Keywords = {Actins/*genetics/*metabolism; Animals; Eukaryotic Cells/cytology/*metabolism; *Evolution, Molecular; Genomic Library; Humans; *Microfilament Proteins; Molecular Biology; Molecular Sequence Data; *Phylogeny; Predictive Value of Tests; Sequence Homology, Amino Acid; Sequence Homology, Nucleic Acid; *src Homology Domains},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Goodson/2002.pdf},
-	Lr = {20061115},
-	Mhda = {2002/11/28 04:00},
-	Number = {Pt 13},
-	Own = {NLM},
-	Pages = {2619--2622},
-	Pl = {England},
-	Pmid = {12077353},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print},
-	Rn = {0 (ACTRT1 protein, human); 0 (Actins); 0 (HIP-55 protein, human); 0 (Microfilament Proteins)},
-	Sb = {IM},
-	So = {J Cell Sci. 2002 Jul 1;115(Pt 13):2619-22.},
-	Stat = {MEDLINE},
-	Title = {Molecular evolution of the actin family},
-	Volume = {115},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbEsIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACPtYMK71zdQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0dvb2Rzb24AABAACAAAwXGNSQAAABEACAAAwrw5pwAAAAEAGABGbEsARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkdvb2Rzb246MjAwMi5wZGYADgASAAgAMgAwADAAMgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Hb29kc29uLzIwMDIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvR29vZHNvbi8yMDAyLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Gutin:1993yf,
-	Annote = {This is the ref with an interesting estimate of how many random chains will be able to adobt a native globular fold.},
-	Author = {Gutin, A M and Shakhnovich, E I},
-	Date-Added = {2007-07-12 13:01:33 -0700},
-	Date-Modified = {2008-05-30 02:49:16 -0700},
-	Journal = {J Chem Phys},
-	Keywords = {MODIFICATIONS; PROBABILITY; COPOLYMERS; GROUND STATES; DEGENERATION; RANDOMNESS; TEMPERATURE DEPENDENCE; PHASE STUDIES},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Gutin/1993.pdf},
-	Number = {10},
-	Pages = {8174-8177},
-	Publisher = {AIP},
-	Title = {Ground state of random copolymers and the discrete random energy model},
-	Volume = {98},
-	Year = {1993},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbFcIMTk5My5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACPsBcK71j1QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUd1dGluAAAQAAgAAMFxjUkAAAARAAgAAMK8OK0AAAABABgARmxXAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpHdXRpbjoxOTkzLnBkZgAOABIACAAxADkAOQAzAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0d1dGluLzE5OTMucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvR3V0aW4vMTk5My5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://link.aip.org/link/?JCP/98/8174/1},
-	Bdsk-Url-2 = {http://dx.doi.org/10.1063/1.464522}}
-
-@article{Sasidharan:2007lr,
-	Abstract = {We have determined the general constraints that govern sequence divergence in proteins that retain entirely, or very largely, the same structure and function. To do this we collected data from three different groups of orthologous sequences: those found in humans and mice, in humans and chickens, and in Escherichia coli and Salmonella enterica. In total, these organisms have 21,738 suitable pairs of orthologs, and these contain nearly 2 million mutations. The three groups differ greatly in the taxa from which they come and/or in the time that separates them from their last common ancestor. Nevertheless, the results we obtain from the three different groups are strikingly similar. For each group, the orthologous sequence pairs were assigned to six different divergence categories on the basis of their sequence identities. For categories with the same divergence, common accepted mutations have similar frequencies and rank orders in the three groups. With divergence, the width of the range of common mutations grows in the same manner in each group. We examined the distribution of mutations in protein structures. With increasing divergence, mutations increase at different rates in the buried, intermediate, and exposed regions of protein structures in a manner that explains the exponential relationship between the divergence of structure and sequence. This work implies that commonly allowed mutations are selected by a set of general constraints that are well defined and whose nature varies with divergence.},
-	Address = {*Medical Research Council Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, United Kingdom.},
-	Annote = {This wasn't as interesting as I'd hoped.  They explicitly exclude the twilight zone from their study.
-Research idea: apply this analysis to DALI conserved residues},
-	Au = {Sasidharan, R and Chothia, C},
-	Author = {Sasidharan, Rajkumar and Chothia, Cyrus},
-	Da = {20070613},
-	Date-Added = {2007-07-02 11:49:53 -0700},
-	Date-Modified = {2007-07-02 11:50:33 -0700},
-	Dep = {20070531},
-	Doi = {10.1073/pnas.0703737104},
-	Edat = {2007/06/02 09:00},
-	Issn = {0027-8424 (Print)},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci U S A},
-	Jt = {Proceedings of the National Academy of Sciences of the United States of America},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Sasidharan/2007.pdf},
-	Mhda = {2007/06/02 09:00},
-	Number = {24},
-	Own = {NLM},
-	Pages = {10080--10085},
-	Phst = {2007/05/31 {$[$}aheadofprint{$]$}},
-	Pii = {0703737104},
-	Pl = {United States},
-	Pmid = {17540730},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print-Electronic},
-	Sb = {IM},
-	So = {Proc Natl Acad Sci U S A. 2007 Jun 12;104(24):10080-5. Epub 2007 May 31.},
-	Stat = {In-Data-Review},
-	Title = {The selection of acceptable protein mutations},
-	Volume = {104},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGaAAAAAAGaAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbQAIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAj/8KpQ9VQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAClNhc2lkaGFyYW4AEAAIAADBcY1JAAAAEQAIAADCqaZFAAAAAQAYAEZtAABGa88ARmrVAEZqGwBGZGgAQIlDAAIARWhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6U2FzaWRoYXJhbjoyMDA3LnBkZgAADgASAAgAMgAwADAANwAuAHAAZABmAA8ACAADAGgAcwByABIAQVVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9TYXNpZGhhcmFuLzIwMDcucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QKC4uLy4uLy4uLy4uL0FydGljbGVzL1Nhc2lkaGFyYW4vMjAwNy5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AlcCXAJlAnACdAKCAokCkgK9AsICxQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALS},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1073/pnas.0703737104}}
-
-@article{Abkevich:1996lr,
-	Abstract = {In this work, we discuss a possible origin of the first biopolymers with stable unique structures. We suggest that at the prebiotic stage of evolution, long organic polymers had to be compact to avoid hydrolysis and had to be soluble and thus must not be exceedingly hydrophobic. We present an algorithm that generates such sequences for model proteins. The evolved sequences turn out to have a stable unique structure, into which they quickly fold. This result illustrates the idea that the unique three-dimensional native structures of first biopolymers could have evolved as a side effect of nonspecific physicochemical factors acting at the prebiotic stage of evolution.},
-	Address = {Harvard University, Department of Chemistry, Cambridge, MA 02138, USA.},
-	Annote = {the main interest to me of this paper is that they mention a result:
-exp(-alpha * N)
-for the number of sequences that fold to stable conformation as a function of the length of the chain: N monomers},
-	Au = {Abkevich, VI and Gutin, AM and Shakhnovich, EI},
-	Author = {Abkevich, V I and Gutin, A M and Shakhnovich, E I},
-	Da = {19960301},
-	Date-Added = {2007-06-27 20:55:12 -0700},
-	Date-Modified = {2007-06-27 20:55:29 -0700},
-	Dcom = {19960301},
-	Edat = {1996/01/23},
-	Issn = {0027-8424 (Print)},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci U S A},
-	Jt = {Proceedings of the National Academy of Sciences of the United States of America},
-	Keywords = {*Algorithms; *Biogenesis; Biopolymers; *Evolution; Peptides/*chemistry; *Protein Folding},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Abkevich/1996.pdf},
-	Lr = {20061115},
-	Mhda = {1996/01/23 00:01},
-	Number = {2},
-	Own = {NLM},
-	Pages = {839--844},
-	Pl = {UNITED STATES},
-	Pmid = {8570645},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print},
-	Rn = {0 (Biopolymers); 0 (Peptides)},
-	Sb = {IM; S},
-	So = {Proc Natl Acad Sci U S A. 1996 Jan 23;93(2):839-44.},
-	Stat = {MEDLINE},
-	Title = {How the first biopolymers could have evolved},
-	Volume = {93},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa9kIMTk5Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAjjx/8KoXhVQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEFia2V2aWNoABAACAAAwXGNSQAAABEACAAAwqjAhQAAAAEAGABGa9kARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkFia2V2aWNoOjE5OTYucGRmAAAOABIACAAxADkAOQA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0Fia2V2aWNoLzE5OTYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0Fia2V2aWNoLzE5OTYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Munson:1996fk,
-	Abstract = {Here we describe how the systematic redesign of a protein's hydrophobic core alters its structure and stability. We have repacked the hydrophobic core of the four-helix-bundle protein, Rop, with altered packing patterns and various side chain shapes and sizes. Several designs reproduce the structure and native-like properties of the wild-type, while increasing the thermal stability. Other designs, either with similar sizes but different shapes, or with decreased sizes of the packing residues, destabilize the protein. Finally, overpacking the core with the larger side chains causes a loss of native-like structure. These results allow us to further define the roles of tight residue packing and the burial of hydrophobic surface area in the construction of native-like proteins.},
-	Address = {Department of Molecular Biophysics and Biochemistry, Yale University, New Haven, Connecticut 06520, USA.},
-	Au = {Munson, M and Balasubramanian, S and Fleming, KG and Nagi, AD and O'Brien, R and Sturtevant, JM and Regan, L},
-	Author = {Munson, M and Balasubramanian, S and Fleming, K G and Nagi, A D and O'Brien, R and Sturtevant, J M and Regan, L},
-	Da = {19970206},
-	Date-Added = {2007-06-27 15:36:39 -0700},
-	Date-Modified = {2007-06-27 15:36:56 -0700},
-	Dcom = {19970206},
-	Edat = {1996/08/01},
-	Gr = {GM16769/GM/NIGMS; GM49146-01A1/GM/NIGMS},
-	Issn = {0961-8368 (Print)},
-	Jid = {9211750},
-	Journal = {Protein Sci},
-	Jt = {Protein science : a publication of the Protein Society},
-	Keywords = {Bacterial Proteins/*chemistry/genetics/metabolism; Calorimetry, Differential Scanning; Circular Dichroism; Guanidine; Guanidines/chemistry; Heat; Magnetic Resonance Spectroscopy; Mutation; Protein Binding; *Protein Conformation; Protein Denaturation; RNA-Binding Proteins/*chemistry/genetics/metabolism; Recombinant Proteins/chemistry/genetics/metabolism; Structure-Activity Relationship; Thermodynamics; Ultracentrifugation},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Munson/1996.pdf},
-	Lr = {20061115},
-	Mhda = {1996/08/01 00:01},
-	Number = {8},
-	Own = {NLM},
-	Pages = {1584--1593},
-	Pl = {UNITED STATES},
-	Pmid = {8844848},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Bacterial Proteins); 0 (Guanidines); 0 (RNA-Binding Proteins); 0 (Recombinant Proteins); 0 (Rop protein, ColE1 plasmid); 113-00-8 (Guanidine)},
-	Sb = {IM},
-	So = {Protein Sci. 1996 Aug;5(8):1584-93.},
-	Stat = {MEDLINE},
-	Title = {What makes a protein a protein? Hydrophobic core designs that specify stability and structural properties},
-	Volume = {5},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbMMIMTk5Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB/hz8KoM+hQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABk11bnNvbgAQAAgAAMFxjUkAAAARAAgAAMKollgAAAABABgARmzDAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpNdW5zb246MTk5Ni5wZGYAAA4AEgAIADEAOQA5ADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTXVuc29uLzE5OTYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL011bnNvbi8xOTk2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Magliery:2004lr,
-	Abstract = {Combinatorial methodologies have revolutionized studies in biomolecular function, but they have so far proven less useful for understanding macromolecular structure and stability. This is largely because of the difficulty of screening libraries of molecules for biophysical properties, and the difficulty of interpreting structural effects in complicated molecules. Here, we report a novel, robust, cell-based screen for function of the four-helix bundle protein, Rop. By expression of green fluorescent protein from a ColE1 plasmid, the screen reports the copy number of the plasmid, which is modulated in Escherichia coli by Rop. We have engineered the screen so that the fluorescent phenotype can correspond to either Rop activity or lack thereof. We have used the screen to demonstrate with systematically constructed Rop core variants that not all molecules that bind small stem-loop RNAs in vitro are active in vivo. Rop is well understood from structural work and systematic mutations, which makes it possible to construct rational, targeted libraries. This screen makes it possible to rapidly interrogate such libraries effectively for proper protein folding and stability. In addition to its intended utility for combinatorial experiments in biophysics, the screen will allow further dissection of the mechanism of Rop-mediated plasmid copy number regulation in vivo.},
-	Address = {Department of Molecular Biophysics and Biochemistry, Yale University, New Haven, CT 06520-8114, USA.},
-	Annote = {developed an assay which showed that all but one (the least mutated) of their Rop designs is NOT functionally active in vivo!
-
-this to me shows the difficulty of designing a functional protein de novo (much easier to get the structure right!)
-
-to get functional proteins, the method of Riddle et al. using random combinations and selection (sort of how nature does it) seems like a good strategy},
-	Au = {Magliery, TJ and Regan, L},
-	Author = {Magliery, T J and Regan, L},
-	Da = {20040226},
-	Date-Added = {2007-06-27 15:36:18 -0700},
-	Date-Modified = {2008-05-29 12:08:50 -0700},
-	Dcom = {20040909},
-	Edat = {2004/02/27 05:00},
-	Gr = {GM065750-01/GM/NIGMS; GM49146-09/GM/NIGMS},
-	Jid = {101186484},
-	Journal = {Protein Eng Des Sel},
-	Jt = {Protein engineering, design \& selection : PEDS},
-	Keywords = {Bacterial Proteins/*chemistry; Biophysics/*methods; Crystallography, X-Ray; Escherichia coli/metabolism; Gene Library; Genes, Reporter; Genetic Vectors; Green Fluorescent Proteins; Kinetics; Luminescent Proteins/metabolism; Mutation; Nucleic Acid Conformation; Peptide Library; Phenotype; Plasmids/metabolism; Protein Conformation; Protein Structure, Secondary; RNA/chemistry; RNA-Binding Proteins/*chemistry},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Magliery/2004.pdf},
-	Lr = {20061115},
-	Mhda = {2004/09/10 05:00},
-	Number = {1},
-	Own = {NLM},
-	Pages = {77--83},
-	Pii = {17/1/77},
-	Pl = {England},
-	Pmid = {14985540},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Bacterial Proteins); 0 (Luminescent Proteins); 0 (Peptide Library); 0 (RNA-Binding Proteins); 0 (Rop protein, ColE1 plasmid); 147336-22-9 (Green Fluorescent Proteins); 63231-63-0 (RNA)},
-	Sb = {IM},
-	So = {Protein Eng Des Sel. 2004 Jan;17(1):77-83.},
-	Stat = {MEDLINE},
-	Title = {A cell-based screen for function of the four-helix bundle protein {Rop}: a new tool for combinatorial experiments in biophysics},
-	Volume = {17},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbK8IMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB/gdcKoMFdQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACE1hZ2xpZXJ5ABAACAAAwXGNSQAAABEACAAAwqiSxwAAAAEAGABGbK8ARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOk1hZ2xpZXJ5OjIwMDQucGRmAAAOABIACAAyADAAMAA0AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL01hZ2xpZXJ5LzIwMDQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL01hZ2xpZXJ5LzIwMDQucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1093/protein/gzh010}}
-
-@article{Blake:2001th,
-	Abstract = {Improved sequence alignment at low pairwise identity is important for identifying potential remote homologues in database searches and for obtaining accurate alignments as a prelude to modeling structures by homology. Our work is motivated by two observations: structural data provide superior training examples for developing techniques to improve the alignment of remote homologues; and general substitution patterns for remote homologues differ from those of closely related proteins. We introduce a new set of amino acid residue interchange matrices built from structural superposition data. These matrices exploit known structural homology as a means of characterizing the effect evolution has on residue-substitution profiles. Given their origin, it is not surprising that the individual residue-residue interchange frequencies are chemically sensible.The structural interchange matrices show a significant increase both in pairwise alignment accuracy and in functional annotation/fold recognition accuracy across distantly related sequences. We demonstrate improved pairwise alignment by using superpositions of homologous domains extracted from a structural database as a gold standard and go on to show an increase in fold recognition accuracy using a database of homologous fold families. This was applied to the unassigned open reading frames from the genome of Helicobacter pylori to identify five matches, two of which are not represented by new annotations in the sequence databases. In addition, we describe a new cyclic permutation strategy to identify distant homologues that experienced gene duplication and subsequent deletions. Using this method, we have identified a potential homologue to one additional previously unassigned open reading frame from the H. pylori genome.},
-	Address = {Department of Cellular and Molecular Pharmacology, University of California, Box 0450, San Francisco, CA 94143, USA.},
-	Au = {Blake, JD and Cohen, FE},
-	Author = {Blake, J D and Cohen, F E},
-	Ci = {Copyright 2001 Academic Press.},
-	Da = {20010320},
-	Date-Added = {2007-06-26 11:15:18 -0700},
-	Date-Modified = {2007-07-23 16:50:52 -0700},
-	Dcom = {20010419},
-	Doi = {10.1006/jmbi.2001.4495},
-	Edat = {2001/03/20 10:00},
-	Group = {LitSearch; Reviewed; Scoring Matrices; Printed},
-	Issn = {0022-2836 (Print)},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology},
-	Keywords = {Amino Acid Sequence; Amino Acids/*chemistry; Databases, Factual; Genome, Bacterial; Helicobacter pylori/genetics; Hemoglobins; Immunoglobulins; Molecular Sequence Data; Myoglobin; Open Reading Frames; Protein Structure, Secondary; Sequence Alignment/*methods/*statistics \& numerical data; *Sequence Homology, Amino Acid},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Blake/2001.pdf},
-	Lr = {20061115},
-	Mhda = {2001/04/21 10:01},
-	Number = {2},
-	Own = {NLM},
-	Pages = {721--735},
-	Pii = {S0022-2836(01)94495-X},
-	Pl = {England},
-	Pmid = {11254392},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Hemoglobins); 0 (Immunoglobulins); 0 (Myoglobin)},
-	Sb = {IM},
-	So = {J Mol Biol. 2001 Mar 23;307(2):721-35.},
-	Stat = {MEDLINE},
-	Title = {Pairwise sequence alignment below the twilight zone},
-	Volume = {307},
-	Year = {2001},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa/IIMjAwMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB82vsKl7T9QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUJsYWtlAAAQAAgAAMFxjUkAAAARAAgAAMKmT68AAAABABgARmvyAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpCbGFrZToyMDAxLnBkZgAOABIACAAyADAAMAAxAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0JsYWtlLzIwMDEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQmxha2UvMjAwMS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1006/jmbi.2001.4495}}
-
-@article{Tang:2003ij,
-	Abstract = {Structural alignments often reveal relationships between proteins that cannot be detected using sequence alignment alone. However, profile search methods based entirely on structural alignments alone have not been found to be effective in finding remote homologs. Here, we explore the role of structural information in remote homolog detection and sequence alignment. To this end, we develop a series of hybrid multidimensional alignment profiles that combine sequence, secondary and tertiary structure information into hybrid profiles. Sequence-based profiles are profiles whose position-specific scoring matrix is derived from sequence alignment alone; structure-based profiles are those derived from multiple structure alignments. We compare pure sequence-based profiles to pure structure-based profiles, as well as to hybrid profiles that use combined sequence-and-structure-based profiles, where sequence-based profiles are used in loop/motif regions and structural information is used in core structural regions. All of the hybrid methods offer significant improvement over simple profile-to-profile alignment. We demonstrate that both sequence-based and structure-based profiles contribute to remote homology detection and alignment accuracy, and that each contains some unique information. We discuss the implications of these results for further improvements in amino acid sequence and structural analysis.},
-	Address = {Department of Biochemistry and Molecular Biophysics, Howard Hughes Medical Institute, Columbia University, New York, NY 10032, USA.},
-	Au = {Tang, CL and Xie, L and Koh, IY and Posy, S and Alexov, E and Honig, B},
-	Author = {Tang, Christopher L and Xie, Lei and Koh, Ingrid Y Y and Posy, Shoshana and Alexov, Emil and Honig, Barry},
-	Da = {20031203},
-	Date-Added = {2007-06-26 11:14:02 -0700},
-	Date-Modified = {2007-07-20 13:24:48 -0700},
-	Dcom = {20040116},
-	Edat = {2003/12/04 05:00},
-	Gr = {GM-30518/GM/NIGMS},
-	Group = {LitSearch; Printed; Reviewed},
-	Issn = {0022-2836 (Print)},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology},
-	Keywords = {Amino Acid Sequence; Molecular Sequence Data; Protein Conformation; Protein Folding; Proteins/*chemistry; *Sequence Alignment; Sequence Homology, Amino Acid},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Tang/2003.pdf},
-	Lr = {20061115},
-	Mhda = {2004/01/17 05:00},
-	Number = {5},
-	Own = {NLM},
-	Pages = {1043--1062},
-	Pii = {S0022283603012920},
-	Pl = {England},
-	Pmid = {14643665},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {J Mol Biol. 2003 Dec 12;334(5):1043-62.},
-	Stat = {MEDLINE},
-	Title = {On the role of structural information in remote homology detection and sequence alignment: new methods using hybrid sequence profiles},
-	Volume = {334},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbSgIMjAwMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB82xcKl7UlQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFRhbmcAEAAIAADBcY1JAAAAEQAIAADCpk+5AAAAAQAYAEZtKABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6VGFuZzoyMDAzLnBkZgAADgASAAgAMgAwADAAMwAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9UYW5nLzIwMDMucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1RhbmcvMjAwMy5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Solis:2007sp,
-	Abstract = {We examine the informatic characteristics of amino acid representations based on physical properties. We demonstrate that sequences rewritten using contracted alphabets based on physical properties do not encode local folding information well. The best four-character alphabet can only encode approximately 57% of the maximum possible amount of structural information. This result suggests that property-based representations that operate on a local length scale are not likely to be useful in homology searches and fold-recognition exercises.},
-	Address = {Department of Pharmacology and Biological Chemistry, Mount Sinai School of Medicine, One Gustave L. Levy Place, New York, New York 10029, USA.},
-	Au = {Solis, AD and Rackovsky, S},
-	Author = {Solis, A D and Rackovsky, S},
-	Ci = {2007 Wiley-Liss, Inc.},
-	Da = {20070511},
-	Date-Added = {2007-06-26 11:10:03 -0700},
-	Date-Modified = {2007-07-23 16:51:21 -0700},
-	Dcom = {20070607},
-	Doi = {10.1002/prot.21434},
-	Edat = {2007/03/28 09:00},
-	Gr = {2R01 LM-06789/LM/NLM},
-	Group = {LitSearch; Reviewed; Printed},
-	Issn = {1097-0134 (Electronic)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Keywords = {Amino Acid Sequence; Cluster Analysis; Molecular Sequence Data; *Protein Folding; Proteins/*chemistry/*metabolism},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Solis/2007.pdf},
-	Mhda = {2007/06/08 09:00},
-	Number = {4},
-	Own = {NLM},
-	Pages = {785--788},
-	Pl = {United States},
-	Pmid = {17387739},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, N.I.H., Extramural},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Proteins. 2007 Jun 1;67(4):785-8.},
-	Stat = {MEDLINE},
-	Title = {Property-based sequence representations do not adequately encode local protein folding information},
-	Volume = {67},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbRoIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB8wJcKluc5QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVNvbGlzAAAQAAgAAMFxjUkAAAARAAgAAMKmHD4AAAABABgARm0aAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpTb2xpczoyMDA3LnBkZgAOABIACAAyADAAMAA3AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1NvbGlzLzIwMDcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvU29saXMvMjAwNy5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1002/prot.21434}}
-
-@article{Solis:2006hc,
-	Abstract = {We show that statistical potentials and threading score functions, derived from finite data sets, are informatic functions, and that their performance depends on the manner in which data are classified and compressed. The choice of sequence and structural parameters affects estimates of the conditional probabilities P(C|S), the quantification of the effect of sequence S on conformation C, and determines the amount of information extracted from the data set, as measured by information gain. The mathematical link between information gain and mean conformational energy, established in this work using the local backbone potential as model, demonstrates that manipulation of descriptive parameters also alters the "energy" values assigned to native conformation and to decoy structures in the test pool, and consequently, the performance of such statistical potential functions in fold recognition exercises. We show that sequence and structural partitions that maximize information gain also minimize the mean energy of the ensemble of native conformations. Moreover, we establish an informatic basis for the placement of the native score within an energy spectrum given by the decoy pool in a threading exercise. We discover that, among all informatic quantities, information gain is the best predictor of threading success, even better than the standard Z-score. Consequently, the choices of sequence and structural descriptors, extent of compression, and levels of discretization that maximize information gain must also produce the best potential functions. Strategies to optimize these parameters with respect to information extraction are therefore relevant to building better statistical potentials. Last, we demonstrate that the backbone torsion potential, defined by the trimer sequence, can be an effective tool in greatly reducing the set of possible conformations from a vast decoy pool.},
-	Address = {Department of Pharmacology and Biological Chemistry, Mount Sinai School of Medicine, Box 1215, New York, New York 10029, USA.},
-	Annote = {The contracted alphabets here are for trimers of aa's.
-No ROC},
-	Au = {Solis, AD and Rackovsky, S},
-	Author = {Solis, Armando D and Rackovsky, S},
-	Ci = {2006 Wiley-Liss, Inc.},
-	Da = {20060215},
-	Date-Added = {2007-06-26 11:05:35 -0700},
-	Date-Modified = {2007-07-20 16:24:53 -0700},
-	Dcom = {20060331},
-	Doi = {10.1002/prot.20501},
-	Edat = {2006/01/06 09:00},
-	Group = {LitSearch; Alphabets; Printed; Reviewed; Forward; Backward},
-	Issn = {1097-0134 (Electronic)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Keywords = {Amino Acid Sequence; Databases, Protein; Entropy; Information Systems; Models, Statistical; Probability; Protein Conformation; Proteins/*chemistry/metabolism; Sequence Alignment; Sequence Homology, Amino Acid; Thermodynamics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Solis/2006.pdf},
-	Mhda = {2006/04/01 09:00},
-	Number = {4},
-	Own = {NLM},
-	Pages = {892--908},
-	Pl = {United States},
-	Pmid = {16395676},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Proteins. 2006 Mar 1;62(4):892-908.},
-	Stat = {MEDLINE},
-	Title = {Improvement of statistical potentials and threading score functions using information maximization},
-	Volume = {62},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbRoIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB8zgsKl5QpQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVNvbGlzAAAQAAgAAMFxjUkAAAARAAgAAMKmR3oAAAABABgARm0aAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpTb2xpczoyMDA2LnBkZgAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1NvbGlzLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvU29saXMvMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1002/prot.20501}}
-
-@article{Tan:2006dp,
-	Abstract = {Aligning distantly related protein sequences is a long-standing problem in bioinformatics, and a key for successful protein structure prediction. Its importance is increasing recently in the context of structural genomics projects because more and more experimentally solved structures are available as templates for protein structure modeling. Toward this end, recent structure prediction methods employ profile-profile alignments, and various ways of aligning two profiles have been developed. More fundamentally, a better amino acid similarity matrix can improve a profile itself; thereby resulting in more accurate profile-profile alignments. Here we have developed novel amino acid similarity matrices from knowledge-based amino acid contact potentials. Contact potentials are used because the contact propensity to the other amino acids would be one of the most conserved features of each position of a protein structure. The derived amino acid similarity matrices are tested on benchmark alignments at three different levels, namely, the family, the superfamily, and the fold level. Compared to BLOSUM45 and the other existing matrices, the contact potential-based matrices perform comparably in the family level alignments, but clearly outperform in the fold level alignments. The contact potential-based matrices perform even better when suboptimal alignments are considered. Comparing the matrices themselves with each other revealed that the contact potential-based matrices are very different from BLOSUM45 and the other matrices, indicating that they are located in a different basin in the amino acid similarity matrix space.},
-	Address = {Department of Computer Sciences, College of Science, Purdue University, West Lafayette, Indiana 47907, USA. dkihara@purdue.edu},
-	Annote = {Fig. 2(A) has the hierarchy I need; also need to get the frequency counts for the matrix.},
-	Au = {Tan, YH and Huang, H and Kihara, D},
-	Author = {Tan, Yen Hock and Huang, He and Kihara, Daisuke},
-	Da = {20060821},
-	Date-Added = {2007-06-26 11:04:09 -0700},
-	Date-Modified = {2007-08-03 10:52:32 -0700},
-	Dcom = {20061004},
-	Doi = {10.1002/prot.21020},
-	Edat = {2006/06/27 09:00},
-	Gr = {R01 GM-075004/GM/NIGMS},
-	Group = {LitSearch; Scoring Matrices; Alphabets; Printed; Reviewed; Forward; Backward},
-	Issn = {1097-0134 (Electronic)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Keywords = {Amino Acids/*chemistry/genetics; Computational Biology/methods; Databases, Protein/statistics \& numerical data; Protein Folding; Proteins/*chemistry/genetics; Reproducibility of Results; Sequence Alignment/*methods/statistics \& numerical data; Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Tan/2006.pdf},
-	Lr = {20061115},
-	Mhda = {2006/10/05 09:00},
-	Number = {3},
-	Own = {NLM},
-	Pages = {587--600},
-	Pl = {United States},
-	Pmid = {16799934},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, N.I.H., Extramural; Research Support, Non-U.S. Gov't},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Proteins)},
-	Sb = {IM},
-	So = {Proteins. 2006 Aug 15;64(3):587-600.},
-	Stat = {MEDLINE},
-	Title = {Statistical potential-based amino acid similarity matrices for aligning distantly related protein sequences},
-	Volume = {64},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbScIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB82tcKl7TdQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA1RhbgAAEAAIAADBcY1JAAAAEQAIAADCpk+nAAAAAQAYAEZtJwBGa88ARmrVAEZqGwBGZGgAQIlDAAIAPmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6VGFuOjIwMDYucGRmAA4AEgAIADIAMAAwADYALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvVGFuLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvVGFuLzIwMDYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1002/prot.21020}}
-
-@article{Zhou:2005tg,
-	Abstract = {Recognizing structural similarity without significant sequence identity has proved to be a challenging task. Sequence-based and structure-based methods as well as their combinations have been developed. Here, we propose a fold-recognition method that incorporates structural information without the need of sequence-to-structure threading. This is accomplished by generating sequence profiles from protein structural fragments. The structure-derived sequence profiles allow a simple integration with evolution-derived sequence profiles and secondary-structural information for an optimized alignment by efficient dynamic programming. The resulting method (called SP(3)) is found to make a statistically significant improvement in both sensitivity of fold recognition and accuracy of alignment over the method based on evolution-derived sequence profiles alone (SP) and the method based on evolution-derived sequence profile and secondary structure profile (SP(2)). SP(3) was tested in SALIGN benchmark for alignment accuracy and Lindahl, PROSPECTOR 3.0, and LiveBench 8.0 benchmarks for remote-homology detection and model accuracy. SP(3) is found to be the most sensitive and accurate single-method server in all benchmarks tested where other methods are available for comparison (although its results are statistically indistinguishable from the next best in some cases and the comparison is subjected to the limitation of time-dependent sequence and/or structural library used by different methods.). In LiveBench 8.0, its accuracy rivals some of the consensus methods such as ShotGun-INBGU, Pmodeller3, Pcons4, and ROBETTA. SP(3) fold-recognition server is available on http://theory.med.buffalo.edu.},
-	Address = {Howard Hughes Medical Institute Center for Single Molecule Biophysics, Department of Physiology & Biophysics, State University of New York at Buffalo, 14214, USA.},
-	Au = {Zhou, H and Zhou, Y},
-	Author = {Zhou, Hongyi and Zhou, Yaoqi},
-	Ci = {(c) 2004 Wiley-Liss, Inc.},
-	Da = {20041229},
-	Date-Added = {2007-06-26 11:02:51 -0700},
-	Date-Modified = {2007-07-20 13:32:27 -0700},
-	Dcom = {20060612},
-	Doi = {10.1002/prot.20308},
-	Edat = {2004/11/04 09:00},
-	Gr = {R01 GM 068530/GM/NIGMS; R01 GM 966049/GM/NIGMS; R01 GM066049-02/GM/NIGMS; R01 GM068530-02/GM/NIGMS},
-	Group = {LitSearch; Reviewed},
-	Issn = {1097-0134 (Electronic)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Keywords = {Algorithms; Amino Acid Sequence; Computational Biology/*methods; Databases, Protein; Evolution; Evolution, Molecular; Models, Statistical; Peptide Library; Protein Conformation; Protein Folding; Protein Structure, Secondary; Sensitivity and Specificity; Sequence Alignment/*methods; Sequence Analysis, Protein; Sequence Homology, Amino Acid; Software; Time Factors},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Zhou/2005a.pdf},
-	Lr = {20061115},
-	Mhda = {2006/06/13 09:00},
-	Number = {2},
-	Own = {NLM},
-	Pages = {321--328},
-	Pl = {United States},
-	Pmid = {15523666},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, N.I.H., Extramural; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Peptide Library)},
-	Sb = {IM},
-	So = {Proteins. 2005 Feb 1;58(2):321-8.},
-	Stat = {MEDLINE},
-	Title = {Fold recognition by combining sequence profiles derived from evolution and from depth-dependent structural alignment of fragments},
-	Volume = {58},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbVwJMjAwNWEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB82n8Kl7SFQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFpob3UAEAAIAADBcY1JAAAAEQAIAADCpk+RAAAAAQAYAEZtXABGa88ARmrVAEZqGwBGZGgAQIlDAAIAQGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6WmhvdToyMDA1YS5wZGYADgAUAAkAMgAwADAANQBhAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1pob3UvMjAwNWEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvWmhvdS8yMDA1YS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1002/prot.20308}}
-
-@misc{gorban-2005,
-	Annote = {Random article on arxiv.  Interesting but not a blockbuster.
-No ROC.},
-	Author = {A.~N. Gorban and M. Kudryashev and T. Popova},
-	Date-Added = {2007-06-26 10:53:10 -0700},
-	Date-Modified = {2007-08-03 10:56:00 -0700},
-	Group = {LitSearch; Alphabets; Printed; Reviewed; Forward; Backward},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Gorban/2005.pdf},
-	Title = {On the Way to Protein Alphabet: Informational Classification of Amino Acids in Comparison to Other Classifications},
-	Url = {http://www.citebase.org/abstract?id=oai:arXiv.org:q-bio/0501019},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbEwIMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB81hsKl6qJQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkdvcmJhbgAQAAgAAMFxjUkAAAARAAgAAMKmTRIAAAABABgARmxMAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpHb3JiYW46MjAwNS5wZGYAAA4AEgAIADIAMAAwADUALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvR29yYmFuLzIwMDUucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0dvcmJhbi8yMDA1LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=},
-	Bdsk-Url-1 = {http://www.citebase.org/abstract?id=oai:arXiv.org:q-bio/0501019}}
-
-@article{Wang:2005ai,
-	Abstract = {BACKGROUND: Predicting the subcellular localization of proteins is important for determining the function of proteins. Previous works focused on predicting protein localization in Gram-negative bacteria obtained good results. However, these methods had relatively low accuracies for the localization of extracellular proteins. This paper studies ways to improve the accuracy for predicting extracellular localization in Gram-negative bacteria. RESULTS: We have developed a system for predicting the subcellular localization of proteins for Gram-negative bacteria based on amino acid subalphabets and a combination of multiple support vector machines. The recall of the extracellular site and overall recall of our predictor reach 86.0% and 89.8%, respectively, in 5-fold cross-validation. To the best of our knowledge, these are the most accurate results for predicting subcellular localization in Gram-negative bacteria. CONCLUSION: Clustering 20 amino acids into a few groups by the proposed greedy algorithm provides a new way to extract features from protein sequences to cover more adjacent amino acids and hence reduce the dimensionality of the input vector of protein features. It was observed that a good amino acid grouping leads to an increase in prediction performance. Furthermore, a proper choice of a subset of complementary support vector machines constructed by different features of proteins maximizes the prediction accuracy.},
-	Address = {Bioinformatics Institute, Matrix, Singapore 138671. jiren@bii.a-star.edu.sg},
-	Au = {Wang, J and Sung, WK and Krishnan, A and Li, KB},
-	Author = {Wang, Jiren and Sung, Wing-Kin and Krishnan, Arun and Li, Kuo-Bin},
-	Da = {20050825},
-	Date-Added = {2007-06-26 10:46:39 -0700},
-	Date-Modified = {2007-06-26 10:47:03 -0700},
-	Dcom = {20060322},
-	Dep = {20050713},
-	Doi = {10.1186/1471-2105-6-174},
-	Edat = {2005/07/14 09:00},
-	Issn = {1471-2105 (Electronic)},
-	Jid = {100965194},
-	Journal = {BMC Bioinformatics},
-	Jt = {BMC bioinformatics},
-	Keywords = {Algorithms; Cluster Analysis; Gram-Negative Bacteria/*chemistry/*genetics; Models, Statistical; Predictive Value of Tests; Sequence Analysis, Protein/*methods; Statistics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wang/2005.pdf},
-	Mhda = {2006/03/23 09:00},
-	Own = {NLM},
-	Pages = {174},
-	Phst = {2005/02/14 {$[$}received{$]$}; 2005/07/13 {$[$}accepted{$]$}; 2005/07/13 {$[$}aheadofprint{$]$}},
-	Pii = {1471-2105-6-174},
-	Pl = {England},
-	Pmid = {16011808},
-	Pst = {epublish},
-	Pt = {Journal Article; Validation Studies},
-	Pubm = {Electronic},
-	Sb = {IM},
-	So = {BMC Bioinformatics. 2005 Jul 13;6:174.},
-	Stat = {MEDLINE},
-	Title = {Protein subcellular localization prediction for Gram-negative bacteria using amino acid subalphabets and a combination of multiple support vector machines},
-	Volume = {6},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbUAIMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB8wxMKluwNQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFdhbmcAEAAIAADBcY1JAAAAEQAIAADCph1zAAAAAQAYAEZtQABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6V2FuZzoyMDA1LnBkZgAADgASAAgAMgAwADAANQAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9XYW5nLzIwMDUucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1dhbmcvMjAwNS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1186/1471-2105-6-174}}
-
-@article{Rackovsky:1993kl,
-	Abstract = {This paper investigates quantitatively the characteristics of the local folding code. The overlapping four-residue fragments which make up the amino acid sequences of 114 proteins are divided into classes on the basis of the physical properties of their constituent amino acids. The distribution of structural types associated with each class of sequence fragment is determined and compared with an ensemble of random structural distributions of the same size selected from the actual protein structures. A criterion is proposed, based on the relative entropies of the two types of distribution, and on a hypothesis as to the characteristics of fragments which code for local structure, that makes it possible to identify those four-residue sequence elements which encode specific time-averaged structure. It is determined that, by this criterion, only 60-70% of the four-residue fragments encode specific structures. It is suggested that the remaining sequence fragments intrinsically encode susceptibility to conformational alteration under the influence of long-range interactions and that this susceptibility is required for correct folding of the molecule. This feature introduces an inherent indeterminacy into the local folding code. The implications of this observation for the prediction of protein structure by various methods are briefly discussed.},
-	Address = {Department of Biophysics, School of Medicine and Dentistry, University of Rochester, NY 14642.},
-	Annote = {Table 1 gives the relevant reduced alphabet of 5 letters.},
-	Au = {Rackovsky, S},
-	Author = {Rackovsky, S},
-	Da = {19930218},
-	Date-Added = {2007-06-26 10:44:36 -0700},
-	Date-Modified = {2007-08-03 11:40:50 -0700},
-	Dcom = {19930218},
-	Edat = {1993/01/15},
-	Group = {LitSearch; Alphabets; Printed; Reviewed; Forward; Backward},
-	Issn = {0027-8424 (Print)},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci U S A},
-	Jt = {Proceedings of the National Academy of Sciences of the United States of America},
-	Keywords = {Amino Acids/chemistry; Dipeptides/chemistry; Mathematical Computing; *Models, Chemical; Protein Conformation; *Protein Folding; Thermodynamics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Rackovsky/1993.pdf},
-	Lr = {20061115},
-	Mhda = {1993/01/15 00:01},
-	Number = {2},
-	Own = {NLM},
-	Pages = {644--648},
-	Pl = {UNITED STATES},
-	Pmid = {8421700},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, Non-P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Dipeptides)},
-	Sb = {IM},
-	So = {Proc Natl Acad Sci U S A. 1993 Jan 15;90(2):644-8.},
-	Stat = {MEDLINE},
-	Title = {On the nature of the protein folding code},
-	Volume = {90},
-	Year = {1993},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbOUIMTk5My5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB84C8Kl8LZQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACVJhY2tvdnNreQAAEAAIAADBcY1JAAAAEQAIAADCplMmAAAAAQAYAEZs5QBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6UmFja292c2t5OjE5OTMucGRmAA4AEgAIADEAOQA5ADMALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvUmFja292c2t5LzE5OTMucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUmFja292c2t5LzE5OTMucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@article{Zhang:2005qe,
-	Abstract = {Here, we report a novel protein sequence descriptor-based remote homology identification method, able to infer fold relationships without the explicit knowledge of structure. In a first phase, we have individually benchmarked 13 different descriptor types in fold identification experiments in a highly diverse set of protein sequences. The relevant descriptors were related to the fold class membership by using simple similarity measures in the descriptor spaces, such as the cosine angle. Our results revealed that the three best-performing sets of descriptors were the sequence-alignment-based descriptor using PSI-BLAST e-values, the descriptors based on the alignment of secondary structural elements (SSEA), and the descriptors based on the occurrence of PROSITE functional motifs. In a second phase, the three top-performing descriptors were combined to obtain a final method with improved performance, which we named DescFold. Class membership was predicted by Support Vector Machine (SVM) learning. In comparison with the individual PSI-BLAST-based descriptor, the rate of remote homology identification increased from 33.7% to 46.3%. We found out that the composite set of descriptors was able to identify the true remote homolog for nearly every sixth sequence at the 95% confidence level, or some 10% more than a single PSI-BLAST search. We have benchmarked the DescFold method against several other state-of-the-art fold recognition algorithms for the 172 LiveBench-8 targets, and we concluded that it was able to add value to the existing techniques by providing a confident hit for at least 10% of the sequences not identifiable by the previously known methods.},
-	Address = {Nestle Research Center, BioAnalyti-cal Science, CH-1000 Lausanne 26, Switzerland. Ziding. Zhang@rdls.nestle.com.},
-	Au = {Zhang, Z and Kochhar, S and Grigorov, MG},
-	Author = {Zhang, Ziding and Kochhar, Sunil and Grigorov, Martin G},
-	Da = {20050120},
-	Date-Added = {2007-06-26 10:43:15 -0700},
-	Date-Modified = {2007-07-20 13:31:40 -0700},
-	Dcom = {20050721},
-	Dep = {20050104},
-	Doi = {10.1110/ps.041035505},
-	Edat = {2005/01/06 09:00},
-	Group = {LitSearch; Reviewed},
-	Issn = {0961-8368 (Print)},
-	Jid = {9211750},
-	Journal = {Protein Sci},
-	Jt = {Protein science : a publication of the Protein Society},
-	Keywords = {Algorithms; Amino Acid Motifs; Amino Acid Sequence; Artificial Intelligence; Databases, Protein; Models, Molecular; Models, Statistical; Molecular Sequence Data; Protein Conformation; Protein Folding; Protein Structure, Secondary; Protein Structure, Tertiary; Proteomics/*methods; Sequence Analysis, Protein; Sequence Homology, Amino Acid; Software; Structural Homology, Protein},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Zhang/2005.pdf},
-	Mhda = {2005/07/22 09:00},
-	Number = {2},
-	Own = {NLM},
-	Pages = {431--444},
-	Phst = {2005/01/04 {$[$}aheadofprint{$]$}},
-	Pii = {ps.041035505},
-	Pl = {United States},
-	Pmid = {15632283},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print-Electronic},
-	Sb = {IM},
-	So = {Protein Sci. 2005 Feb;14(2):431-44. Epub 2005 Jan 4.},
-	Stat = {MEDLINE},
-	Title = {Descriptor-based protein remote homology identification},
-	Volume = {14},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbVkIMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB83f8Kl78lQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVpoYW5nAAAQAAgAAMFxjUkAAAARAAgAAMKmUjkAAAABABgARm1ZAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpaaGFuZzoyMDA1LnBkZgAOABIACAAyADAAMAA1AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1poYW5nLzIwMDUucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvWmhhbmcvMjAwNS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1110/ps.041035505}}
-
-@article{Edgar:2004eu,
-	Abstract = {Methods for discovery of local similarities and estimation of evolutionary distance by identifying k-mers (contiguous subsequences of length k) common to two sequences are described. Given unaligned sequences of length L, these methods have O(L) time complexity. The ability of compressed amino acid alphabets to extend these techniques to distantly related proteins was investigated. The performance of these algorithms was evaluated for different alphabets and choices of k using a test set of 1848 pairs of structurally alignable sequences selected from the FSSP database. Distance measures derived from k-mer counting were found to correlate well with percentage identity derived from sequence alignments. Compressed alphabets were seen to improve performance in local similarity discovery, but no evidence was found of improvements when applied to distance estimates. The performance of our local similarity discovery method was compared with the fast Fourier transform (FFT) used in MAFFT, which has O(L log L) time complexity. The method for achieving comparable coverage to FFT is revealed here, and is more than an order of magnitude faster. We suggest using k-mer distance for fast, approximate phylogenetic tree construction, and show that a speed improvement of more than three orders of magnitude can be achieved relative to standard distance methods, which require alignments.},
-	Address = {bob{\char64}drive5.com},
-	Annote = {Table 1 gives (some of) the alphabets, SE-B/V.  Created using BL62 or VTML and optimizing an information function.  Describe possible performance benefits to using a compressed alphabet!},
-	Au = {Edgar, RC},
-	Author = {Edgar, Robert C},
-	Da = {20040119},
-	Date-Added = {2007-06-26 10:39:43 -0700},
-	Date-Modified = {2007-08-03 11:09:14 -0700},
-	Dcom = {20040211},
-	Dep = {20040116},
-	Doi = {10.1093/nar/gkh180},
-	Edat = {2004/01/20 05:00},
-	Group = {LitSearch; Alphabets; Printed; Reviewed; Forward; Backward},
-	Issn = {1362-4962 (Electronic)},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research},
-	Keywords = {Algorithms; Amino Acids/*analysis; Computational Biology/*methods; *Evolution, Molecular; Molecular Sequence Data; Phylogeny; Proteins/chemistry; Sequence Alignment/methods; *Sequence Homology, Amino Acid; Software; Time Factors},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Edgar/2004.pdf},
-	Mhda = {2004/02/12 05:00},
-	Number = {1},
-	Own = {NLM},
-	Pages = {380--385},
-	Phst = {2004 {$[$}ppublish{$]$}},
-	Pii = {32/1/380},
-	Pl = {England},
-	Pmid = {14729922},
-	Pst = {epublish},
-	Pt = {Journal Article},
-	Pubm = {Electronic-Print},
-	Rn = {0 (Amino Acids); 0 (Proteins)},
-	Sb = {IM},
-	So = {Nucleic Acids Res. 2004 Jan 16;32(1):380-5. Print 2004.},
-	Stat = {MEDLINE},
-	Title = {Local homology recognition and distance measures in linear time using compressed amino acid alphabets},
-	Volume = {32},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbC8IMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB81u8Kl60NQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUVkZ2FyAAAQAAgAAMFxjUkAAAARAAgAAMKmTbMAAAABABgARmwvAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpFZGdhcjoyMDA0LnBkZgAOABIACAAyADAAMAA0AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0VkZ2FyLzIwMDQucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRWRnYXIvMjAwNC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1093/nar/gkh180}}
-
-@article{Friedberg:2002nx,
-	Abstract = {Many protein pairs that share the same fold do not have any detectable sequence similarity, providing a valuable source of information for studying sequence-structure relationship. In this study, we use a stringent data set of structurally similar, sequence-dissimilar protein pairs to characterize residues that may play a role in the determination of protein structure and/or function. For each protein in the database, we identify amino-acid positions that show residue conservation within both close and distant family members. These positions are termed "persistently conserved". We then proceed to determine the "mutually" persistently conserved (MPC) positions: those structurally aligned positions in a protein pair that are persistently conserved in both pair mates. Because of their intra- and interfamily conservation, these positions are good candidates for determining protein fold and function. We find that 45% of the persistently conserved positions are mutually conserved. A significant fraction of them are located in critical positions for secondary structure determination, they are mostly buried, and many of them form spatial clusters within their protein structures. A substitution matrix based on the subset of MPC positions shows two distinct characteristics: (i) it is different from other available matrices, even those that are derived from structural alignments; (ii) its relative entropy is high, emphasizing the special residue restrictions imposed on these positions. Such a substitution matrix should be valuable for protein design experiments.},
-	Address = {Department of Molecular Genetics and Biotechnology, The Hebrew University-Hadassah Medical School, Jerusalem 91120, Israel.},
-	Au = {Friedberg, I and Margalit, H},
-	Author = {Friedberg, Iddo and Margalit, Hanah},
-	Da = {20020115},
-	Date-Added = {2007-06-26 10:35:40 -0700},
-	Date-Modified = {2007-07-20 12:50:29 -0700},
-	Dcom = {20020312},
-	Edat = {2002/01/16 10:00},
-	Group = {LitSearch; Reviewed},
-	Issn = {0961-8368 (Print)},
-	Jid = {9211750},
-	Journal = {Protein Sci},
-	Jt = {Protein science : a publication of the Protein Society},
-	Keywords = {Amino Acid Motifs; Animals; Databases, Factual; Humans; Hydrolases/*chemistry; Lipase/*chemistry; Peptides; Protein Conformation; Protein Folding; Proteins/analysis/*chemistry/classification/genetics; Sequence Alignment/*methods; Solvents; Xanthobacter/enzymology},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Friedberg/2002.pdf},
-	Lr = {20061115},
-	Mhda = {2002/03/13 10:01},
-	Number = {2},
-	Own = {NLM},
-	Pages = {350--360},
-	Pl = {United States},
-	Pmid = {11790845},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print},
-	Rn = {0 (Peptides); 0 (Proteins); 0 (Solvents); EC 3.- (Hydrolases); EC 3.1.1.- (lipase B, Candida antarctica); EC 3.1.1.3 (Lipase); EC 3.8.1.5 (haloalkane dehalogenase)},
-	Sb = {IM},
-	So = {Protein Sci. 2002 Feb;11(2):350-60.},
-	Stat = {MEDLINE},
-	Title = {Persistently conserved positions in structurally similar, sequence dissimilar proteins: roles in preserving protein fold and function},
-	Volume = {11},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbEAIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB82mMKl7RdQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACUZyaWVkYmVyZwAAEAAIAADBcY1JAAAAEQAIAADCpk+HAAAAAQAYAEZsQABGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6RnJpZWRiZXJnOjIwMDIucGRmAA4AEgAIADIAMAAwADIALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvRnJpZWRiZXJnLzIwMDIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRnJpZWRiZXJnLzIwMDIucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@article{Kolinski:2004wd,
-	Abstract = {Protein modeling could be done on various levels of structural details, from simplified lattice or continuous representations, through high resolution reduced models, employing the united atom representation, to all-atom models of the molecular mechanics. Here I describe a new high resolution reduced model, its force field and applications in the structural proteomics. The model uses a lattice representation with 800 possible orientations of the virtual alpha carbon-alpha carbon bonds. The sampling scheme of the conformational space employs the Replica Exchange Monte Carlo method. Knowledge-based potentials of the force field include: generic protein-like conformational biases, statistical potentials for the short-range conformational propensities, a model of the main chain hydrogen bonds and context-dependent statistical potentials describing the side group interactions. The model is more accurate than the previously designed lattice models and in many applications it is complementary and competitive in respect to the all-atom techniques. The test applications include: the ab initio structure prediction, multitemplate comparative modeling and structure prediction based on sparse experimental data. Especially, the new approach to comparative modeling could be a valuable tool of the structural proteomics. It is shown that the new approach goes beyond the range of applicability of the traditional methods of the protein comparative modeling.},
-	Address = {Faculty of Chemistry, Warsaw University, Warszawa, Poland. Kolinski@chem.uw.edu.pl},
-	Au = {Kolinski, A},
-	Author = {Kolinski, Andrzej},
-	Da = {20040625},
-	Date-Added = {2007-06-26 10:34:10 -0700},
-	Date-Modified = {2007-07-20 12:59:45 -0700},
-	Dcom = {20050225},
-	Doi = {035001349},
-	Edat = {2004/06/26 05:00},
-	Group = {LitSearch; Reviewed},
-	Issn = {0001-527X (Print)},
-	Jid = {14520300R},
-	Journal = {Acta Biochim Pol},
-	Jt = {Acta biochimica Polonica},
-	Keywords = {Amino Acid Sequence; Animals; Carbon/chemistry; Crystallography, X-Ray; Databases; Humans; Hydrogen Bonding; Mathematics; Models, Molecular; Models, Theoretical; Molecular Sequence Data; Protein Conformation; Protein Structure, Tertiary; Proteins/*chemistry; Proteomics/*methods},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Kolinski/2004.pdf},
-	Lr = {20051116},
-	Mhda = {2005/02/26 09:00},
-	Number = {2},
-	Own = {NLM},
-	Pages = {349--371},
-	Phst = {2004/04/19 {$[$}received{$]$}; 2004/05/27 {$[$}accepted{$]$}},
-	Pl = {Poland},
-	Pmid = {15218533},
-	Pst = {ppublish},
-	Pt = {Journal Article; Review},
-	Pubm = {Print},
-	Rf = {90},
-	Rn = {0 (Proteins); 7440-44-0 (Carbon)},
-	Sb = {IM},
-	So = {Acta Biochim Pol. 2004;51(2):349-71.},
-	Stat = {MEDLINE},
-	Title = {Protein modeling and structure prediction with a reduced representation},
-	Volume = {51},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbIUIMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB8zYMKl5LZQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEtvbGluc2tpABAACAAAwXGNSQAAABEACAAAwqZHJgAAAAEAGABGbIUARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOktvbGluc2tpOjIwMDQucGRmAAAOABIACAAyADAAMAA0AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0tvbGluc2tpLzIwMDQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0tvbGluc2tpLzIwMDQucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==},
-	Bdsk-Url-1 = {http://dx.doi.org/035001349}}
-
-@article{Doi:2005cr,
-	Abstract = {Searching for functional proteins among random-sequence libraries is a major challenge of protein engineering; the difficulties include the poor solubility of many random-sequence proteins. A library in which most of the polypeptides are soluble and stable would therefore be of great benefit. Although modern proteins consist of 20 amino acids, it has been suggested that early proteins evolved from a reduced alphabet. Here, we have constructed a library of random-sequence proteins consisting of only five amino acids, Ala, Gly, Val, Asp and Glu, which are believed to have been the most abundant in the prebiotic environment. Expression and characterization of arbitrarily chosen proteins in the library indicated that five-alphabet random-sequence proteins have higher solubility than do 20-alphabet random-sequence proteins with a similar level of hydrophobicity. The results support the reduced-alphabet hypothesis of the primordial genetic code and should also be helpful in constructing optimized protein libraries for evolutionary protein engineering.},
-	Address = {Department of Biosciences and Informatics, Keio University, 3-14-1 Hiyoshi, Kohoku-ku, Yokohama 223-8522, Japan.},
-	Au = {Doi, N and Kakukawa, K and Oishi, Y and Yanagawa, H},
-	Author = {Doi, Nobuhide and Kakukawa, Koichi and Oishi, Yuko and Yanagawa, Hiroshi},
-	Da = {20050616},
-	Date-Added = {2007-06-26 10:32:06 -0700},
-	Date-Modified = {2007-07-23 16:55:25 -0700},
-	Dcom = {20050913},
-	Dep = {20050531},
-	Doi = {10.1093/protein/gzi034},
-	Edat = {2005/06/02 09:00},
-	Group = {LitSearch; Reviewed; Printed},
-	Issn = {1741-0126 (Print)},
-	Jid = {101186484},
-	Journal = {Protein Eng Des Sel},
-	Jt = {Protein engineering, design \& selection : PEDS},
-	Keywords = {Amino Acid Sequence; Amino Acids/*chemistry; Base Sequence; Cloning, Molecular; Escherichia coli; Evolution, Molecular; *Gene Library; Hydrophobicity; Molecular Sequence Data; Protein Biosynthesis; *Protein Engineering; Proteins/*chemistry/genetics/metabolism; Solubility},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Doi/2005.pdf},
-	Lr = {20061115},
-	Mhda = {2005/09/15 09:00},
-	Number = {6},
-	Own = {NLM},
-	Pages = {279--284},
-	Phst = {2005/05/31 {$[$}aheadofprint{$]$}},
-	Pii = {gzi034},
-	Pl = {England},
-	Pmid = {15928003},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print-Electronic},
-	Rn = {0 (Amino Acids); 0 (Proteins)},
-	Sb = {IM},
-	So = {Protein Eng Des Sel. 2005 Jun;18(6):279-84. Epub 2005 May 31.},
-	Stat = {MEDLINE},
-	Title = {High solubility of random-sequence proteins consisting of five kinds of primitive amino acids},
-	Volume = {18},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbCoIMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB83ScKl72RQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA0RvaQAAEAAIAADBcY1JAAAAEQAIAADCplHUAAAAAQAYAEZsKgBGa88ARmrVAEZqGwBGZGgAQIlDAAIAPmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6RG9pOjIwMDUucGRmAA4AEgAIADIAMAAwADUALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvRG9pLzIwMDUucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRG9pLzIwMDUucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1093/protein/gzi034}}
-
-@article{Wrabl:2005rr,
-	Abstract = {Understanding of amino acid type co-occurrence in trusted multiple sequence alignments is a prerequisite for improved sequence alignment and remote homology detection algorithms. Two objective approaches were used to investigate co-occurrence, both based on variance maximization of the weighted residue frequencies in columns taken from a large alignment database. The first approach discretely grouped amino acid types, and the second approach extracted orthogonal properties of amino acids using principal components analysis. The grouping results corresponded to amino acid physical properties such as side chain hydrophobicity, size, or backbone flexibility, and an optimal arrangement of approximately eight groups was observed. However, interpretation of the orthogonal properties was more complex. Although the principal components accounting for the largest variances exhibited modest correlations with hydrophobicity and conservation of glycine, in general principal components did not correspond to physical properties of amino acids. Although not intuitive, these amino acid mathematical properties were demonstrated to be robust and to improve local pairwise alignment accuracy, relative to 20 amino acid frequencies alone, for a simple test case.},
-	Address = {Howard Hughes Medical Institute, University of Texas Southwestern Medical Center, Dallas 75390-9050, USA.},
-	Annote = {Table 1 has the groupings.  Paper says that groups 5-10 had a broad maximum, with 8 being the strongest score.},
-	Au = {Wrabl, JO and Grishin, NV},
-	Author = {Wrabl, James O and Grishin, Nick V},
-	Ci = {(c) 2005 Wiley-Liss, Inc.},
-	Da = {20051027},
-	Date-Added = {2007-06-26 10:29:21 -0700},
-	Date-Modified = {2007-08-03 11:00:38 -0700},
-	Dcom = {20060417},
-	Doi = {10.1002/prot.20648},
-	Edat = {2005/09/27 09:00},
-	Gr = {GM67165/GM/NIGMS},
-	Group = {LitSearch; Alphabets; Printed; Reviewed; Forward; Backward},
-	Issn = {1097-0134 (Electronic)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Keywords = {Amino Acids/*chemistry; Hydrophobicity; Models, Chemical; Pliability; Principal Component Analysis; Proteins/*chemistry; *Sequence Alignment},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wrabl/2005.pdf},
-	Lr = {20061115},
-	Mhda = {2006/04/18 09:00},
-	Number = {3},
-	Own = {NLM},
-	Pages = {523--534},
-	Pl = {United States},
-	Pmid = {16184599},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, N.I.H., Extramural; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Proteins)},
-	Sb = {IM},
-	So = {Proteins. 2005 Nov 15;61(3):523-34.},
-	Stat = {MEDLINE},
-	Title = {Grouping of amino acid types and extraction of amino acid properties from multiple sequence alignments using variance maximization},
-	Volume = {61},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbU8IMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB8zjsKl5S1QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVdyYWJsAAAQAAgAAMFxjUkAAAARAAgAAMKmR50AAAABABgARm1PAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpXcmFibDoyMDA1LnBkZgAOABIACAAyADAAMAA1AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1dyYWJsLzIwMDUucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvV3JhYmwvMjAwNS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1002/prot.20648}}
-
-@article{Delarue:2007lr,
-	Abstract = {Aminoacyl-tRNA synthetases (aaRSs) are responsible for creating the pool of correctly charged aminoacyl-tRNAs that are necessary for the translation of genetic information (mRNA) by the ribosome. Each aaRS belongs to either one of only two classes with two different mechanisms of aminoacylation, making use of either the 2'OH (Class I) or the 3'OH (Class II) of the terminal A76 of the tRNA and approaching the tRNA either from the minor groove (2'OH) or the major groove (3'OH). Here, an asymmetric pattern typical of differentiation is uncovered in the partition of the codon repertoire, as defined by the mechanism of aminoacylation of each corresponding tRNA. This pattern can be reproduced in a unique cascade of successive binary decisions that progressively reduces codon ambiguity. The deduced order of differentiation is manifestly driven by the reduction of translation errors. A simple rule can be defined, decoding each codon sequence in its binary class, thereby providing both the code and the key to decode it. Assuming that the partition into two mechanisms of tRNA aminoacylation is a relic that dates back to the invention of the genetic code in the RNA World, a model for the assignment of amino acids in the codon table can be derived. The model implies that the stop codon was always there, as the codon whose tRNA cannot be charged with any amino acid, and makes the prediction of an ultimate differentiation step, which is found to correspond to the codon assignment of the 22nd amino acid pyrrolysine in archaebacteria.},
-	Address = {WOODBURY},
-	Af = {Delarue, Marc},
-	Author = {Delarue, M.},
-	Author-Address = {Inst Pasteur, CNRS, URA 2185, Unite Dynam Struct Macromol, F-75015 Paris, France.},
-	Author-Keywords = {genetic code; evolution; aminoacylation mechanism; codon assignment; translation errors},
-	Date = {FEB},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-07-20 11:51:21 -0700},
-	Document-Type = {Article},
-	E-Mail-Address = {delarue@pasteur.fr},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:000243753500001},
-	Isi-Document-Delivery-Number = {129TR},
-	Issn = {1355-8382},
-	Journal = {RNA-A PUBLICATION OF THE RNA SOCIETY},
-	Keywords = {TRANSFER-RNA-SYNTHETASES; AMINO-ACIDS; CLASS-I; ESCHERICHIA-COLI; ACCEPTOR STEM; PROTEIN; RECOGNITION; MODEL; AMINOACYLATION; PYRROLYSINE},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Delarue/2007.pdf},
-	Month = {Feb},
-	Number = {2},
-	Pages = {161--169},
-	Publication-Type = {J},
-	Publisher = {COLD SPRING HARBOR LAB PRESS, PUBLICATIONS DEPT},
-	Publisher-Address = {500 SUNNYSIDE BLVD, WOODBURY, NY 11797-2924 USA},
-	Reprint-Address = {Delarue, M, Inst Pasteur, CNRS, URA 2185, Unite Dynam Struct Macromol, 25 Rue Dr Roux, F-75015 Paris, France.},
-	Title = {An asymmetric underlying rule in the assignment of codons: Possible clue to a quick early evolution of the genetic code via successive binary choices},
-	Volume = {13},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbCMIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB9Q/MKmkJVQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0RlbGFydWUAABAACAAAwXGNSQAAABEACAAAwqbzBQAAAAEAGABGbCMARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkRlbGFydWU6MjAwNy5wZGYADgASAAgAMgAwADAANwAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9EZWxhcnVlLzIwMDcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRGVsYXJ1ZS8yMDA3LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Li:2006fk,
-	Abstract = {Alignment-free comparison is a recently developed method for sequence alignment, which has high computational efficiency and suitable to the low identical sequences. Alignment-free comparison was successfully applied in the DNA analysis. However, the accuracy of analysis is not high when it was applied in protein analysis because the complexity of protein is larger than DNA by consisting of 20 types of residues. Thus, residues are clustered into a few groups based on their similarity of physicochemical features. Using such simplified alphabets, the complexity of protein sequences is reduced and at the same time the key information encoded in the sequences remains. Therefore, the accuracy of alignment-free comparison is improved.},
-	Address = {BEIJING},
-	Af = {Li Jing Li Feng-Bo Wang Wei},
-	Annote = {Article is in Chinese.
-No ROC.},
-	Author = {Li, J. and Li, F. B. and Wang, W.},
-	Author-Address = {Nanjing Univ, Natl Lab Solid State Microstruct, Nanjing 210093, Peoples R China. Nanjing Univ, Dept Phys, Nanjing 210093, Peoples R China.},
-	Author-Keywords = {alignment-free comparison; grouping of amino acids; simplification of protein sequence},
-	Date = {DEC},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-08-03 10:53:26 -0700},
-	Document-Type = {Article},
-	E-Mail-Address = {wangwei@nju.edu.cn},
-	Group = {LitSearch; Alphabets; Printed; Reviewed; Forward; Backward},
-	Isi = {ISI:000243097200010},
-	Isi-Document-Delivery-Number = {120PN},
-	Issn = {1000-3282},
-	Journal = {PROGRESS IN BIOCHEMISTRY AND BIOPHYSICS},
-	Keywords = {ALGORITHMS; DISTANCE; CLASSIFICATION; SIMILARITY},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Li/2006.pdf},
-	Month = {Dec},
-	Number = {12},
-	Pages = {1215--1222},
-	Publication-Type = {J},
-	Publisher = {SCIENCE CHINA PRESS},
-	Publisher-Address = {16 DONGHUANGCHENGGEN NORTH ST, BEIJING 100717, PEOPLES R CHINA},
-	Reprint-Address = {Wang, W, Nanjing Univ, Natl Lab Solid State Microstruct, Nanjing 210093, Peoples R China.},
-	Title = {Simplification of protein sequence and alignment-free sequence analysis},
-	Volume = {33},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGCAAAAAAGCAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbKAIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAmdsKpTC1QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAAkxpABAACAAAwXGNSQAAABEACAAAwqmunQAAAAEAGABGbKAARmvPAEZq1QBGahsARmRoAECJQwACAD1oc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkxpOjIwMDYucGRmAAAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA5VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0xpLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIC4uLy4uLy4uLy4uL0FydGljbGVzL0xpLzIwMDYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQI/AkQCTQJYAlwCagJxAnoCnQKiAqUAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACsg==}}
-
-@article{Cheon:2005fk,
-	Abstract = {We present the clustering properties of amino acids, which are building blocks of proteins, according to their physico-chemical characters. To classify the 20 kinds of amino acids, we employ a Self-Organizing Map (SOM) analysis for the Miyazawa-Jernigan (MJ) pairwise-contact matrix, the Environment-dependent One-body energy Parameters (EOP) and the one-body energy parameters incorporating the Ramachandran angle information (EOPR) over the EOP in proteins. We provide the new result of the SOM clustering for amino acids based on the EOPR and compare that with those from the MJ and the EOP matrix. All three kinds of energy parameters capture the leading role played by the hydrophobicity and the hydrophilicity of amino acids in protein folding. Our SOM analysis generally illustrates that both the EOP and the EOPR can provide the collective clustering of amino acids by the side chain characteristics and the secondary structure information. However, EOP is better at classifying amino acids according to their side chain characteristics whereas EOPR is better with secondary structure. We show that the EOP and the EOPR matrix manifests more detailed physico-chemical classification of amino acids than those from the MJ matrix, which does not contain a local environmental information of amino acids in the protein structures.},
-	Address = {SINGAPORE},
-	Annote = {Obscure Korean article.
-No ROC.},
-	Author = {Cheon, M. and Heo, M. and Chang, I. and Kim, C.},
-	Author-Address = {Pusan Natl Univ, Natl Res Lab Computat Proteom & Biophys, Pusan 609735, South Korea. Pusan Natl Univ, Dept Phys, Pusan 609735, South Korea. Pusan Natl Univ, Dept Stat, Pusan 609735, South Korea.},
-	Author-Keywords = {Self-Organizing Map; protein energy parameters; classification of amino acids; hydrophobicity scales},
-	Date = {OCT},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-08-03 11:02:39 -0700},
-	Document-Type = {Article},
-	E-Mail-Address = {chang@random.phys.pusan.ac.kr},
-	Group = {LitSearch; Alphabets; Printed; Reviewed; Forward; Backward},
-	Isi = {ISI:000234160500011},
-	Isi-Document-Delivery-Number = {996FT},
-	Issn = {0129-1831},
-	Journal = {INTERNATIONAL JOURNAL OF MODERN PHYSICS C},
-	Keywords = {STATISTICAL-MECHANICS; ALPHABETS; SEQUENCES; DESIGN},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Cheon/2005.pdf},
-	Month = {Oct},
-	Number = {10},
-	Pages = {1609--1616},
-	Publication-Type = {J},
-	Publisher = {WORLD SCIENTIFIC PUBL CO PTE LTD},
-	Publisher-Address = {5 TOH TUCK LINK, SINGAPORE 596224, SINGAPORE},
-	Reprint-Address = {Chang, I, Pusan Natl Univ, Natl Res Lab Computat Proteom & Biophys, Pusan 609735, South Korea.},
-	Title = {Classifications of amino acids in proteins by the self-organizing map},
-	Volume = {16},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbA8IMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAmf8KpTDBQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUNoZW9uAAAQAAgAAMFxjUkAAAARAAgAAMKprqAAAAABABgARmwPAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpDaGVvbjoyMDA1LnBkZgAOABIACAAyADAAMAA1AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0NoZW9uLzIwMDUucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ2hlb24vMjAwNS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Walter:2005uq,
-	Abstract = {Nature employs a set of 20 amino acids to produce a repertoire of protein structures endowed with sophisticated functions. Here, we combined design and selection to create an enzyme composed entirely from a set of only 9 amino acids that can rescue auxotrophic cells lacking chorismate mutase. The simplified protein captures key structural features of its natural counterpart but appears to be somewhat less stable and more flexible. The potential of a dramatically reduced amino acid alphabet to produce an active catalyst supports the notion that primordial enzymes may have possessed low amino acid diversity and suggests that combinatorial engineering strategies, such as the one used here, may be generally applied to create enzymes with novel structures and functions.},
-	Address = {BETHESDA},
-	Author = {Walter, K. U. and Vamvaca, K. and Hilvert, D.},
-	Author-Address = {ETH Honggerberg, Swiss Fed Inst Technol, Organ Chem Lab, CH-8093 Zurich, Switzerland.},
-	Date = {NOV 11},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-07-20 13:26:29 -0700},
-	Document-Type = {Article},
-	E-Mail-Address = {hilvert@org.chem.ethz.ch},
-	Group = {LitSearch; Printed; Reviewed},
-	Isi = {ISI:000233044500052},
-	Isi-Document-Delivery-Number = {980UU},
-	Issn = {0021-9258},
-	Journal = {JOURNAL OF BIOLOGICAL CHEMISTRY},
-	Keywords = {MONOFUNCTIONAL CHORISMATE MUTASE; NOVO PROTEIN DESIGN; COMBINATORIAL MUTAGENESIS; STATE; EVOLUTION; CATALYSIS; SELECTION; FOLD; SITE},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Walter/2005.pdf},
-	Month = {Nov},
-	Number = {45},
-	Pages = {37742--37746},
-	Publication-Type = {J},
-	Publisher = {AMER SOC BIOCHEMISTRY MOLECULAR BIOLOGY INC},
-	Publisher-Address = {9650 ROCKVILLE PIKE, BETHESDA, MD 20814-3996 USA},
-	Reprint-Address = {Hilvert, D, ETH Honggerberg, Swiss Fed Inst Technol, Organ Chem Lab, CH-8093 Zurich, Switzerland.},
-	Title = {An active enzyme constructed from a 9-amino acid alphabet},
-	Volume = {280},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbT8IMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB9RxMKmkhlQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABldhbHRlcgAQAAgAAMFxjUkAAAARAAgAAMKm9IkAAAABABgARm0/AEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpXYWx0ZXI6MjAwNS5wZGYAAA4AEgAIADIAMAAwADUALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvV2FsdGVyLzIwMDUucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL1dhbHRlci8yMDA1LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Esteve:2005fj,
-	Abstract = {In this paper we carry out an analysis of different types of potential and substitution matrices for amino acids, oriented to give a classification of the latter. The cluster decomposition is obtained, in a fully unsupervised way, from the subdominant ultrametric associated to the distance between amino acids induced by the corresponding matrix. In the comparative study, by looking at the classifications obtained from diverse matrices, we can get information on how they account for the different chemical-physical properties of the amino acids. (c) 2004 Elsevier B.V. All rights reserved.},
-	Address = {AMSTERDAM},
-	Annote = {Looking at ways of generating a dendrogram from an AA matrix.
-No ROC.},
-	Author = {Esteve, J. G. and Falceto, F.},
-	Author-Address = {Univ Zaragoza, Dept Theoret Phys, E-50009 Zaragoza, Spain.},
-	Author-Keywords = {amino acids; classification; potential and substitution matrices},
-	Date = {APR 1},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-08-03 11:03:40 -0700},
-	Document-Type = {Article},
-	E-Mail-Address = {esteve@unizar.es falceto@unizar.es},
-	Group = {LitSearch; Printed; Alphabets; Reviewed; Forward; Backward},
-	Isi = {ISI:000228832600015},
-	Isi-Document-Delivery-Number = {922JF},
-	Issn = {0301-4622},
-	Journal = {BIOPHYSICAL CHEMISTRY},
-	Keywords = {CONTACT ENERGIES; POTENTIALS; PROTEINS; RECOGNITION; RESIDUES},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Esteve/2005.pdf},
-	Month = {Apr},
-	Number = {2-3},
-	Pages = {177--180},
-	Publication-Type = {J},
-	Publisher = {ELSEVIER SCIENCE BV},
-	Publisher-Address = {PO BOX 211, 1000 AE AMSTERDAM, NETHERLANDS},
-	Reprint-Address = {Falceto, F, Univ Zaragoza, Dept Theoret Phys, E-50009 Zaragoza, Spain.},
-	Title = {Classification of amino acids induced by their associated matrices},
-	Volume = {115},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbDYIMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB81FsKl6W1QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkVzdGV2ZQAQAAgAAMFxjUkAAAARAAgAAMKmS90AAAABABgARmw2AEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpFc3RldmU6MjAwNS5wZGYAAA4AEgAIADIAMAAwADUALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvRXN0ZXZlLzIwMDUucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0VzdGV2ZS8yMDA1LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Cheon:2004kx,
-	Abstract = {Twenty kinds of amino acids are building blocks for proteins. The classification of characters of amino acids helps to reduce the complexity of protein properties. Here, we present the classification of 20 amino acids by using hierarchical self-organizing map clustering with the Miyazawa-Jernigan pairwise-contact energy parameters. The classification not only gives the biological interpretations for each group, whose clusterings are in agreement with those of the previous works, but also provides more detailed and characteristic features of amino acid clustering. Hydrophobic, medium, loop-favoring, and polar amino acids are grouped, and each representative amino acid is identified. Hierarchical self-organizing map clustering is proven to be a good clustering tool for classifying the 20 amino acids.},
-	Address = {SEOUL},
-	Annote = {Obscure.
-No ROC.},
-	Author = {Cheon, M. Y. and Chang, I. S.},
-	Author-Address = {Pusan Natl Univ, Dept Phys, Natl Res Lab Computat Proteom & Biophys, Pusan 609735, South Korea.},
-	Author-Keywords = {self-organizing map; protein folding alphabets; classification of amino acids},
-	Date = {JUN},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-08-03 11:11:14 -0700},
-	Document-Type = {Article},
-	E-Mail-Address = {chang@random.phys.pusan.ac.kr},
-	Group = {LitSearch; Alphabets; Printed; Reviewed; Forward; Backward},
-	Isi = {ISI:000222065600044},
-	Isi-Document-Delivery-Number = {829QV},
-	Issn = {0374-4884},
-	Journal = {JOURNAL OF THE KOREAN PHYSICAL SOCIETY},
-	Keywords = {STATISTICAL-MECHANICS; PRINCIPLES; MODEL},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Cheon/2004.pdf},
-	Month = {Jun},
-	Number = {6},
-	Pages = {1577--1580},
-	Publication-Type = {J},
-	Publisher = {KOREAN PHYSICAL SOC},
-	Publisher-Address = {635-4, YUKSAM-DONG, KANGNAM-KU, SEOUL 135-703, SOUTH KOREA},
-	Reprint-Address = {Cheon, MY, Pusan Natl Univ, Dept Phys, Natl Res Lab Computat Proteom & Biophys, Pusan 609735, South Korea.},
-	Title = {Clustering of the protein design alphabets by using hierarchical self-organizing map},
-	Volume = {44},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbA8IMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB81OcKl6cFQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUNoZW9uAAAQAAgAAMFxjUkAAAARAAgAAMKmTDEAAAABABgARmwPAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpDaGVvbjoyMDA0LnBkZgAOABIACAAyADAAMAA0AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0NoZW9uLzIwMDQucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ2hlb24vMjAwNC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Esteve:2004yq,
-	Abstract = {In this article, we address the problem of classification of amino acids. Starting from the Miyazawa-Jernigan matrix obtained from the relative positions of amino acids in the crystal structure of globular proteins, we develop a fully unsupervised method of classification for the amino acids. The method is based in the subdominant ultrametric associated to the distance induced by the Miyazawa-Jernigan matrix and the maximum likelihood principle to determine the cluster structure. We obtain a classification consistent with the five groups used in the literature, although with some peculiarities. We also show the stability of our results against changes of the method used to classify the amino acids. (C) 2004 Wiley-Liss, Inc.},
-	Address = {HOBOKEN},
-	Annote = {Devleop a method to  cluster aa's based on the MJ matrix.
-No ROC.},
-	Author = {Esteve, J. G. and Falceto, F.},
-	Author-Address = {Univ Zaragoza, Fac Ciencias, Dept Fis Teor, E-50009 Zaragoza, Spain. Univ Zaragoza, Inst Biocomputac & Fis Sistemas Complejos, E-50009 Zaragoza, Spain.},
-	Author-Keywords = {amino acid classification; maximum likelihood; potential; principal component analysis; subdominant ultrametric; ultrametric clustering},
-	Date = {JUN 1},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-08-03 11:13:27 -0700},
-	Document-Type = {Article},
-	E-Mail-Address = {esteve@unizar.es},
-	Group = {LitSearch; Printed; Alphabets; Reviewed; Forward; Backward},
-	Isi = {ISI:000221802000020},
-	Isi-Document-Delivery-Number = {826AU},
-	Issn = {0887-3585},
-	Journal = {PROTEINS-STRUCTURE FUNCTION AND BIOINFORMATICS},
-	Keywords = {CONTACT ENERGIES; APPROXIMATION},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Esteve/2004.pdf},
-	Month = {Jun},
-	Number = {4},
-	Pages = {999--1004},
-	Publication-Type = {J},
-	Publisher = {WILEY-LISS},
-	Publisher-Address = {DIV JOHN WILEY & SONS INC, 111 RIVER ST, HOBOKEN, NJ 07030 USA},
-	Reprint-Address = {Esteve, JG, Univ Zaragoza, Fac Ciencias, Dept Fis Teor, E-50009 Zaragoza, Spain.},
-	Title = {A general clustering approach with application to the Miyazawa-Jernigan potentials for amino acids},
-	Volume = {55},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbDYIMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB81WcKl6gVQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkVzdGV2ZQAQAAgAAMFxjUkAAAARAAgAAMKmTHUAAAABABgARmw2AEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpFc3RldmU6MjAwNC5wZGYAAA4AEgAIADIAMAAwADQALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvRXN0ZXZlLzIwMDQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0VzdGV2ZS8yMDA0LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Kosiol:2004vn,
-	Abstract = {It is accepted that many evolutionary changes of amino acid sequence in proteins are conservative: the replacement of one amino acid by another residue has a far greater chance of being accepted if the two residues have similar properties. It is difficult, however, to identify relevant physicochemical properties that capture this similarity. In this paper we introduce a criterion that determines similarity from an evolutionary point of view. Our criterion is based on the description of protein evolution by a Markov process and the corresponding matrix of instantaneous replacement rates. It is inspired by the conductance, a quantity that reflects the strength of mixing in a Markov process. Furthermore we introduce a method to divide the 20 amino acid residues into subsets that achieve Rood scores with our criterion. The criterion has the time-invariance property that different time distances of the same amino acid replacement rate matrix lead to the same grouping; but different rate matrices lead to different groupings. Therefore it can be used as an automated method to compare matrices derived from consideration of different types of proteins, or from parts of proteins sharing different structural or functional features. We present the groupings resulting from two standard matrices used in sequence alignment and phylogenetic tree estimation. (C) 2003 Elsevier Ltd. All rights reserved.},
-	Address = {LONDON},
-	Annote = {Another way to generate dendrograms from similarity matrices, this time by using some Markov process analysis.
-No ROC.},
-	Author = {Kosiol, C. and Goldman, N. and Buttimore, N. H.},
-	Author-Address = {EBI, EMBL, Cambridge CB10 1SD, England. Univ Dublin Trinity Coll, Sch Math, Dublin 2, Ireland.},
-	Author-Keywords = {protein evolution; groupings of amino acids; Markov models; conductance},
-	Date = {MAY 7},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-08-03 11:12:17 -0700},
-	Document-Type = {Article},
-	E-Mail-Address = {kosiol@ebi.ac.uk},
-	Group = {LitSearch; Alphabets; Printed; Reviewed; Forward; Backward},
-	Isi = {ISI:000221003300008},
-	Isi-Document-Delivery-Number = {814VX},
-	Issn = {0022-5193},
-	Journal = {JOURNAL OF THEORETICAL BIOLOGY},
-	Keywords = {PROTEIN EVOLUTION; SUBSTITUTION; MODEL; DNA; SEQUENCES; PHYLOGENY; MATRICES},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Kosiol/2004.pdf},
-	Month = {May},
-	Number = {1},
-	Pages = {97--106},
-	Publication-Type = {J},
-	Publisher = {ACADEMIC PRESS LTD ELSEVIER SCIENCE LTD},
-	Publisher-Address = {24-28 OVAL RD, LONDON NW1 7DX, ENGLAND},
-	Reprint-Address = {Kosiol, C, EBI, EMBL, Wellcome Trust Genome Campus, Cambridge CB10 1SD, England.},
-	Title = {A new criterion and method for amino acid classification},
-	Volume = {228},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbIoIMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB9SysKmk/hQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABktvc2lvbAAQAAgAAMFxjUkAAAARAAgAAMKm9mgAAAABABgARmyKAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpLb3Npb2w6MjAwNC5wZGYAAA4AEgAIADIAMAAwADQALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvS29zaW9sLzIwMDQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0tvc2lvbC8yMDA0LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Leary:2004rt,
-	Abstract = {Identifying the fold class of a protein sequence of unknown structure is a fundamental problem in modern biology. We apply a supervised learning algorithm to the classification of protein sequences with low sequence identity from a library of 174 structural classes created with the Combinatorial Extension structural alignment methodology. A class of rules is considered that assigns test sequences to structural classes based on the closest match of an amino acid index profile of the test sequence to a profile centroid for each class. A mathematical optimization procedure is applied to determine an amino acid index of maximal structural discriminatory power by maximizing the ratio of between-class to within-class pro. le variation. The optimal index is computed as the solution to a generalized eigenvalue problem, and its performance for fold classification is compared to that of other published indices. The optimal index has significantly more structural discriminatory power than all currently known indices, including average surrounding hydrophobicity, which it most closely resembles. It demonstrates >70% classification accuracy over all folds and nearly 100% accuracy on several folds with distinctive conserved structural features. Finally, there is a compelling universality to the optimal index in that it does not appear to depend strongly on the specific structural classes used in its computation.},
-	Address = {BETHESDA},
-	Author = {Leary, R. H. and Rosen, J. B. and Jambeck, P.},
-	Author-Address = {Univ Calif San Diego, San Diego Supercomp Ctr, La Jolla, CA 92093 USA. Univ Calif San Diego, Dept Comp Sci & Engn, La Jolla, CA 92093 USA. Univ Calif San Diego, Dept Bioengn, La Jolla, CA 92093 USA.},
-	Date = {JAN},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-07-20 13:05:36 -0700},
-	Document-Type = {Article},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:000187565000041},
-	Isi-Document-Delivery-Number = {757MA},
-	Issn = {0006-3495},
-	Journal = {BIOPHYSICAL JOURNAL},
-	Keywords = {SUPPORT VECTOR MACHINES; GLOBULAR-PROTEINS; PREDICTION; SEQUENCES; IDENTIFICATION; DATABASE; ENERGY; SPACE},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Leary/2004.pdf},
-	Month = {Jan},
-	Number = {1},
-	Pages = {411--419},
-	Part-Number = {Part 1},
-	Publication-Type = {J},
-	Publisher = {BIOPHYSICAL SOCIETY},
-	Publisher-Address = {9650 ROCKVILLE PIKE, BETHESDA, MD 20814-3998 USA},
-	Reprint-Address = {Leary, RH, Univ Calif San Diego, San Diego Supercomp Ctr, La Jolla, CA 92093 USA.},
-	Title = {An optimal structure-discriminative amino acid index for protein fold recognition},
-	Volume = {86},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbJcIMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB9S+cKmlFNQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUxlYXJ5AAAQAAgAAMFxjUkAAAARAAgAAMKm9sMAAAABABgARmyXAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpMZWFyeToyMDA0LnBkZgAOABIACAAyADAAMAA0AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0xlYXJ5LzIwMDQucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTGVhcnkvMjAwNC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Li:2003ys,
-	Abstract = {The validity of complexity simplifications for proteins with different structural features may be different. In this paper, the simplification for proteins is studied using the ratios of successful prediction of structural class under a presumed amino-acid-grouping scheme with a composition-coupled method. It is found that for the alpha-class proteins, a two-letter alphabet may cover the degree of freedom to characterize the complexity of the class; for the beta-class proteins, a 7-letter alphabet might indicate the minimal number of residue types to reconstruct the class feature of the natural proteins; for the alpha + beta-class proteins and the alpha/beta-class proteins, the redundancy of the compositions is weak and the simplification leads to a great loss of the information related to the corresponding structural classes.},
-	Address = {SINGAPORE},
-	Annote = {Table 1 is just the no interlace alphabet from the Li, Fan, Wang & Wang paper.
-No ROC.},
-	Author = {Li, T. P. and Wang, J. and Fan, K. and Wang, W.},
-	Author-Address = {Nanjing Univ, Natl Lab Solid State Microstruct, Inst Biophys, Nanjing 210093, Peoples R China. Nanjing Univ, Dept Phys, Nanjing 210093, Peoples R China.},
-	Author-Keywords = {protein classes (alpha, beta, alpha plus beta and alpha/beta); grouping of amino acids; prediction of protein classes},
-	Date = {MAR 10},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-08-03 11:16:16 -0700},
-	Document-Type = {Article},
-	Group = {LitSearch; Alphabets; Reviewed; Printed; Forward; Backward},
-	Isi = {ISI:000182695500007},
-	Isi-Document-Delivery-Number = {675LE},
-	Issn = {0217-9849},
-	Journal = {MODERN PHYSICS LETTERS B},
-	Keywords = {SEQUENCES},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Li/2003a.pdf},
-	Month = {Mar},
-	Number = {5-6},
-	Pages = {245--252},
-	Publication-Type = {J},
-	Publisher = {WORLD SCIENTIFIC PUBL CO PTE LTD},
-	Publisher-Address = {JOURNAL DEPT PO BOX 128 FARRER ROAD, SINGAPORE 912805, SINGAPORE},
-	Reprint-Address = {Wang, J, Nanjing Univ, Natl Lab Solid State Microstruct, Inst Biophys, Nanjing 210093, Peoples R China.},
-	Title = {How simple can the proteins be: From the prediction of the classes of protein structures},
-	Volume = {17},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbKAJMjAwM2EucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB81r8Kl6yZQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAAkxpABAACAAAwXGNSQAAABEACAAAwqZNlgAAAAEAGABGbKAARmvPAEZq1QBGahsARmRoAECJQwACAD5oc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkxpOjIwMDNhLnBkZgAOABQACQAyADAAMAAzAGEALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTGkvMjAwM2EucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTGkvMjAwM2EucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==}}
-
-@article{Fan:2003fr,
-	Abstract = {Experimental studies have shown that the full sequence complexity of naturally occurring proteins is not required to generate rapidly folding and functional proteins, i.e. proteins can be designed with fewer than 20 letters. This raises the question of what is the minimum number of amino acid types required to encode complex protein folds? Here, we investigate this issue from three aspects. First, we study the minimum sequence complexity that can reserve the necessary structural information for detection of distantly related homologues. Second, we compare the ability of designing foldable model sequences over a wide range of reduced amino acid alphabets, which find the minimum number of letters that have the similar design ability as 20. Finally, we survey the lower bound of alphabet size of globular proteins in a non-redundant protein database. These different approaches give a remarkably consistent view, that the minimum number of letters required to fold a protein is around ten. (C) 2003 Elsevier Science Ltd. All rights reserved.},
-	Address = {LONDON},
-	Author = {Fan, K and Wang, W},
-	Author-Address = {Nanjing Univ, Natl Lab Solid State Microstruct, Nanjing 210093, Peoples R China. Nanjing Univ, Dept Phys, Nanjing 210093, Peoples R China.},
-	Author-Keywords = {sequence complexity; reduced amino acid alphabet; protein folding; protein design; protein evolution},
-	Date = {MAY 9},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2008-05-29 12:31:50 -0700},
-	Document-Type = {Article},
-	Group = {LitSearch; Printed; Reviewed},
-	Isi = {ISI:000182616000012},
-	Isi-Document-Delivery-Number = {674BG},
-	Journal = {J Mol Biol},
-	Keywords = {GENETIC-CODE; AMINO-ACIDS; SEQUENCES; EVOLUTION; PRINCIPLES; COMPLEXITY; ALPHABETS; DATABASE; NUCLEUS; DESIGN},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Fan/2003.pdf},
-	Month = {May},
-	Number = {4},
-	Pages = {921--926},
-	Publication-Type = {J},
-	Publisher = {ACADEMIC PRESS LTD ELSEVIER SCIENCE LTD},
-	Publisher-Address = {24-28 OVAL RD, LONDON NW1 7DX, ENGLAND},
-	Reprint-Address = {Wang, W, Nanjing Univ, Natl Lab Solid State Microstruct, Nanjing 210093, Peoples R China.},
-	Title = {What is the minimum number of letters required to fold a protein?},
-	Volume = {328},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbDgIMjAwMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB83xMKl8C9QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA0ZhbgAAEAAIAADBcY1JAAAAEQAIAADCplKfAAAAAQAYAEZsOABGa88ARmrVAEZqGwBGZGgAQIlDAAIAPmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6RmFuOjIwMDMucGRmAA4AEgAIADIAMAAwADMALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvRmFuLzIwMDMucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRmFuLzIwMDMucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==}}
-
-@article{Chen:2002zr,
-	Abstract = {It has been found that the 20 kinds of amino acids have different frequencies of occurrence in alpha,beta, and coil structures [P. Y. Chou and G. D. Fasman, Biochemistry 13, 211 (1974)]. Based on more known structures of proteins, frequencies for each amino acid in alpha and beta secondary structures are recalculated. Next step, under the approximation ignoring the chain connectivity of proteins, energy parameters to form alpha and beta secondary structures for each amino acid are obtained. According to the hydrophobicity and energies in alpha and beta secondary structures, 20 kinds of amino acids are classified. The results suggest that dividing amino acids to five or nine groups is desirable. At last, a protein model considering both two-body hydrophobic interaction and one-body energy to form secondary structures, hydrophobic-polar alphabeta model, is introduced. It is shown that the consistency among various energy terms makes the cooperativity of protein folding closer to the experiments.},
-	Address = {COLLEGE PK},
-	Annote = {Clustering based on looking at aa's in alpha and beta structures.  Clustering is in Table II.
-No ROC.},
-	Author = {Chen, H. and Zhou, X. and Ou-Yang, Z. C.},
-	Author-Address = {Tsing Hua Univ, Ctr Adv Study, Beijing 100084, Peoples R China. Acad Sinica, Inst Theoret Phys, Beijing 100080, Peoples R China.},
-	Date = {JUN},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-08-03 11:28:23 -0700},
-	Di = {ARTN 061907},
-	Document-Type = {Article},
-	Group = {LitSearch; Alphabets; Printed; Reviewed; Forward; Backward},
-	Isi = {ISI:000176762500070},
-	Isi-Document-Delivery-Number = {572FH},
-	Issn = {1063-651X},
-	Journal = {PHYSICAL REVIEW E},
-	Keywords = {SECONDARY-STRUCTURE; GLOBULAR-PROTEINS; MODEL; SEQUENCES; CONFORMATION; PARAMETERS; COPOLYMERS; RANGE},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Chen/2002.pdf},
-	Month = {Jun},
-	Number = {6},
-	Pages = {061907},
-	Part-Number = {Part 1},
-	Publication-Type = {J},
-	Publisher = {AMERICAN PHYSICAL SOC},
-	Publisher-Address = {ONE PHYSICS ELLIPSE, COLLEGE PK, MD 20740-3844 USA},
-	Reprint-Address = {Chen, H, Tsing Hua Univ, Ctr Adv Study, Beijing 100084, Peoples R China.},
-	Title = {Classification of amino acids based on statistical results of known structures and cooperativity of protein folding},
-	Volume = {65},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbA0IMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC6YRsLYwMtQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABENoZW4AEAAIAADBcY1JAAAAEQAIAADC2SM7AAAAAQAYAEZsDQBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6Q2hlbjoyMDAyLnBkZgAADgASAAgAMgAwADAAMgAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9DaGVuLzIwMDIucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0NoZW4vMjAwMi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Wang:2002mz,
-	Abstract = {Based on the concept of energy landscape a grouping method of residues for reducing the sequence complexity in proteins is presented. For the Miyazawa and Jernigan matrix, rational groupings of 20 kinds of residues with minimal mismatches, under the consideration of local minima and statistics on correlation between the residues, are studied. A hierarchical tree of groupings relating to different numbers of groups N is obtained, and a plateau around N=8-10 is found, which may represent the basic degree of freedom of the sequence complexity in proteins.},
-	Address = {COLLEGE PK},
-	Annote = {The guys from Nat Struc Biol strike again, making an aa clustering based on the MJ matrix.
-No ROC.},
-	Author = {Wang, J. and Wang, W.},
-	Author-Address = {Nanjing Univ, Natl Lab Solid State Microstruct, Nanjing 210093, Peoples R China. Nanjing Univ, Dept Phys, Nanjing 210093, Peoples R China.},
-	Date = {APR},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-08-03 11:25:08 -0700},
-	Di = {ARTN 041911},
-	Document-Type = {Article},
-	Group = {LitSearch; Alphabets; Printed; Reviewed; Forward; Backward},
-	Isi = {ISI:000175146400085},
-	Isi-Document-Delivery-Number = {544EZ},
-	Issn = {1063-651X},
-	Journal = {PHYSICAL REVIEW E},
-	Keywords = {CALORIMETRIC 2-STATE COOPERATIVITY; PROTEINS; PRINCIPLES; RECOGNITION; LANDSCAPE; KINETICS; MODEL},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wang/2002.pdf},
-	Month = {Apr},
-	Number = {4},
-	Pages = {041911},
-	Part-Number = {Part 1},
-	Publication-Type = {J},
-	Publisher = {AMERICAN PHYSICAL SOC},
-	Publisher-Address = {ONE PHYSICS ELLIPSE, COLLEGE PK, MD 20740-3844 USA},
-	Reprint-Address = {Wang, J, Nanjing Univ, Natl Lab Solid State Microstruct, Nanjing 210093, Peoples R China.},
-	Title = {Grouping of residues based on their contact interactions},
-	Volume = {65},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbUAIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC6YOMLYwLRQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFdhbmcAEAAIAADBcY1JAAAAEQAIAADC2SMkAAAAAQAYAEZtQABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6V2FuZzoyMDAyLnBkZgAADgASAAgAMgAwADAAMgAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9XYW5nLzIwMDIucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1dhbmcvMjAwMi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Muller:2002ly,
-	Abstract = {Evolution of proteins is generally modeled as a Markov process acting on each site of the sequence. Replacement frequencies need to be estimated based on sequence alignments. Here we compare three approaches: First, the original method by Dayhoff, Schwartz, and Orcutt (1978) Atlas Protein Seq. Struc. 5:345-352, secondly, the resolvent method (RV) by Muller and Vingron (2000) J. Comput. Biol. 7(6):761-776, and finally a maximum likelihood approach (ML) developed in this paper. We evaluate the methods using a highly divergent and inhomogeneous set of sequence alignments as an input to the estimation procedure. ML is the method of choice for small sets of input data. Although the RV method is computationally much less demanding it performs only slightly worse than ML. Therefore, it is perfectly appropriate for large-scale applications.},
-	Address = {LAWRENCE},
-	Author = {Muller, T. and Spang, R. and Vingron, M.},
-	Author-Address = {Deutsch Krebsforschungszentrum, D-69120 Heidelberg, Germany. Duke Univ, Inst Stat & Decis Sci, Durham, NC 27706 USA.},
-	Author-Keywords = {amino acid replacement; amino acid score matrix; maximum-likelihood; protein evolution},
-	Date = {JAN},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-07-20 13:09:55 -0700},
-	Document-Type = {Article},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:000173101100002},
-	Isi-Document-Delivery-Number = {508QV},
-	Issn = {0737-4038},
-	Journal = {MOLECULAR BIOLOGY AND EVOLUTION},
-	Keywords = {PROTEIN SEQUENCES},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Muller/2002.pdf},
-	Month = {Jan},
-	Number = {1},
-	Pages = {8--13},
-	Publication-Type = {J},
-	Publisher = {SOC MOLECULAR BIOLOGY EVOLUTION},
-	Publisher-Address = {PO BOX 1897, LAWRENCE, KS 66044-8897 USA},
-	Reprint-Address = {Muller, T, Deutsch Krebsforschungszentrum, Neuenheimer Feld 280, D-69120 Heidelberg, Germany.},
-	Title = {Estimating amino acid substitution models: A comparison of Dayhoff's estimator, the resolvent approach and a maximum likelihood method},
-	Volume = {19},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbMIIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB9T9sKmlcZQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABk11bGxlcgAQAAgAAMFxjUkAAAARAAgAAMKm+DYAAAABABgARmzCAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpNdWxsZXI6MjAwMi5wZGYAAA4AEgAIADIAMAAwADIALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTXVsbGVyLzIwMDIucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL011bGxlci8yMDAyLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Coghlan:2001gf,
-	Abstract = {Motivation: We propose representing amino acids by bit-patterns so they may be used in a filter algorithm for similarity searches over protein databases, to rapidly eliminate non-homologous regions of database sequences. The filter algorithm would be based on dynamic programming optimization. It would have the advantage over previous filter algorithms that its substitution scoring function distinguishes between conservative and non-conservative amino acid substitutions. Results: Simulated annealing was used to search for the best five-bit or three-bit patterns to represent amino acids, where similar amino acids were given similar bit-patterns. The similarity between amino acids was estimated from the BLOSUM45 matrix. Representing amino acids by these five-bit and three-bit patterns, the Escherichia coli PhoE precursor and the bacteriophage PA2 LC precursor were aligned. The alignments were nearly the same as that obtained when BLOSUM45 was used to score substitutions.},
-	Address = {OXFORD},
-	Author = {Coghlan, A. and Mac Donaill, D. A. and Buttimore, N. H.},
-	Author-Address = {Univ Dublin Trinity Coll, Dept Chem, Dublin 2, Ireland. Univ Dublin Trinity Coll, Dept Genet, Dublin 2, Ireland. Univ Dublin Trinity Coll, Sch Math, Dublin 2, Ireland.},
-	Date = {AUG},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-07-20 11:47:16 -0700},
-	Document-Type = {Article},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:000171021000002},
-	Isi-Document-Delivery-Number = {473CB},
-	Issn = {1367-4803},
-	Journal = {BIOINFORMATICS},
-	Keywords = {SEQUENCE DATABASES; IDENTIFICATION; MATRICES; INSTRUCTIONS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Coghlan/2001.pdf},
-	Month = {Aug},
-	Number = {8},
-	Pages = {676--685},
-	Publication-Type = {J},
-	Publisher = {OXFORD UNIV PRESS},
-	Publisher-Address = {GREAT CLARENDON ST, OXFORD OX2 6DP, ENGLAND},
-	Reprint-Address = {Mac Donaill, DA, Univ Dublin Trinity Coll, Dept Chem, Dublin 2, Ireland.},
-	Title = {Representation of amino acids as five-bit or three-bit patterns for filtering protein databases},
-	Volume = {17},
-	Year = {2001},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbBgIMjAwMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB9UH8KmlgRQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0NvZ2hsYW4AABAACAAAwXGNSQAAABEACAAAwqb4dAAAAAEAGABGbBgARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkNvZ2hsYW46MjAwMS5wZGYADgASAAgAMgAwADAAMQAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Db2dobGFuLzIwMDEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ29naGxhbi8yMDAxLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Muller:2000ve,
-	Abstract = {The estimation of amino acid replacement frequencies during molecular evolution is crucial for many applications in sequence analysis, Score matrices for database search programs or phylogenetic analysis rely on such models of protein evolution. Pioneering work was done by Dayhoff et al, (1978) who formulated a Markov model of evolution and derived the famous PAM score matrices. Her estimation procedure for amino acid exchange frequencies is restricted to pairs of proteins that have a constant and small degree of divergence. Here we present an improved estimator, called the resolvent method, that is not subject to these limitations. This extension of Dayhoff's approach enables us to estimate an amino acid substitution model from alignments of varying degree of divergence. Extensive simulations show the capability of the new estimator to recover accurately the exchange frequencies among amino acids. Based on the SYSTERS database of aligned protein families (Krause and Vingron, 1998) we recompute a series of score matrices.},
-	Address = {LARCHMONT},
-	Author = {Muller, T. and Vingron, M.},
-	Author-Address = {Deutsch Krebsforschungszentrum, D-69120 Heidelberg, Germany.},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-07-20 13:11:24 -0700},
-	Document-Type = {Article},
-	Group = {LitSearch; Scoring Matrices; Reviewed},
-	Isi = {ISI:000168049700001},
-	Isi-Document-Delivery-Number = {421ET},
-	Issn = {1066-5277},
-	Journal = {JOURNAL OF COMPUTATIONAL BIOLOGY},
-	Keywords = {SUBSTITUTION MATRICES; PROTEIN SEQUENCES; SCORING MATRIX; BLOCKS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Muller/2000.pdf},
-	Number = {6},
-	Pages = {761--776},
-	Publication-Type = {J},
-	Publisher = {MARY ANN LIEBERT INC PUBL},
-	Publisher-Address = {2 MADISON AVENUE, LARCHMONT, NY 10538 USA},
-	Reprint-Address = {Muller, T, Deutsch Krebsforschungszentrum, Neuemheimer Feld 280, D-69120 Heidelberg, Germany.},
-	Title = {Modeling amino acid replacement},
-	Volume = {7},
-	Year = {2000},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbMIIMjAwMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB9UQMKmlitQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABk11bGxlcgAQAAgAAMFxjUkAAAARAAgAAMKm+JsAAAABABgARmzCAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpNdWxsZXI6MjAwMC5wZGYAAA4AEgAIADIAMAAwADAALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTXVsbGVyLzIwMDAucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL011bGxlci8yMDAwLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Ladunga:1997ul,
-	Abstract = {We present a comprehensive analysis of amino acid substitution patterns (sets of residues in a position of a multiple alignment) and conservation of physicochemical properties in alignments of protein sequences. Of the one million possible substitution patterns, only a few hundred account for the majority of aligned positions. Very similar distributions of substitution patterns are observed in all but one of the diverse databases of multiple alignments, In these substitution patterns we analyzed the conservation of 511 physicochemical and steric amino acid properties. Highest conservation was observed in those steric and transfer free energy-related properties that are crucial for folding. The best conserved steric properties include the minimal width of the side chains and their interactions,vith other residues, Among the hydrophobicity-related properties, charge and those properties that provide information on propensities to form secondary structures or side chain conformation, appear to be better conserved than pure hydrophobicity measures, Physicochemical sequence analysis based on the most conserved properties is expected to aid searching a protein sequence query against a database of multiple alignments, prediction of secondary and tertiary structures and protein engineering.},
-	Address = {OXFORD},
-	Author = {Ladunga, I. and Smith, R. F.},
-	Author-Address = {BAYLOR COLL MED,DEPT CELL BIOL,HOUSTON,TX 77030. LORAND EOTVOS UNIV,DEPT GENET,H-1088 BUDAPEST,HUNGARY. BAYLOR COLL MED,DEPT MOL & HUMAN GENET,CTR HUMAN GENOME,HOUSTON,TX 77030.},
-	Author-Keywords = {amino acid properties; amino acid substitution; hydrophobicity; multiple alignment; protein folding},
-	Date = {MAR},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-07-20 13:04:30 -0700},
-	Document-Type = {Article},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1997WX73600002},
-	Isi-Document-Delivery-Number = {WX736},
-	Issn = {0269-2139},
-	Journal = {PROTEIN ENGINEERING},
-	Keywords = {SECONDARY STRUCTURE; GLOBULAR-PROTEINS; SEQUENCE ALIGNMENT; PROFILE ANALYSIS; STRUCTURE PREDICTION; PATTERN-RECOGNITION; CLUSTER-ANALYSIS; GAP PENALTIES; SEARCH TOOL; RESIDUES},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Ladunga/1997.pdf},
-	Month = {Mar},
-	Number = {3},
-	Pages = {187--196},
-	Publication-Type = {J},
-	Publisher = {OXFORD UNIV PRESS},
-	Publisher-Address = {GREAT CLARENDON ST, OXFORD, ENGLAND OX2 6DP},
-	Title = {Amino acid substitutions preserve protein folding by conserving steric and hydrophobicity properties},
-	Volume = {10},
-	Year = {1997},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbJIIMTk5Ny5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB9UkcKmlqdQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0xhZHVuZ2EAABAACAAAwXGNSQAAABEACAAAwqb5FwAAAAEAGABGbJIARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkxhZHVuZ2E6MTk5Ny5wZGYADgASAAgAMQA5ADkANwAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9MYWR1bmdhLzE5OTcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTGFkdW5nYS8xOTk3LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Koshi:1997qf,
-	Abstract = {To investigate how the properties of individual amino acids result in proteins with particular structures and functions, we have examined the correlations between previously derived structure-dependent mutation rates and changes in various physical-chemical properties of the amino acids such as volume, charge, alpha-helical and beta-sheet propensity, and hydrophobicity. In most cases we found the Delta G of transfer from octanol to water to be the best model for evolutionary constraints, in contrast to the much weaker correlation with the Delta G of transfer from cyclohexane to water, a property found to be highly correlated to changes in stability in site-directed mutagenesis studies, This suggests that natural evolution may follow different rules than those suggested by results obtained in the laboratory. A high degree of conservation of a surface residue's relative hydrophobicity was also observed, a fact that cannot be explained by constraints on protein stability but that may reflect the consequences of the reverse-hydrophobic effect. Local propensity, especially or-helical propensity, is rather poorly conserved during evolution, indicating that non-local interactions dominate protein structure formation. We found that changes in volume were important in specific cases, most significantly in transitions among the hydrophobic residues in buried locations. To demonstrate how these techniques could be used to understand particular protein families, we derived and analyzed mutation matrices for the hypervariable and framework regions of antibody light chain V regions. We found a surprisingly high conservation of hydrophobicity in the hypervariable region, possibly indicating an important role for hydrophobicity in antigen recognition. (C) 1997 Wiley-Liss, Inc.},
-	Address = {NEW YORK},
-	Author = {Koshi, J. M. and Goldstein, R. A.},
-	Author-Address = {UNIV MICHIGAN,DEPT CHEM,ANN ARBOR,MI 48109. UNIV MICHIGAN,DIV BIOPHYS RES,ANN ARBOR,MI 48109.},
-	Author-Keywords = {hydrophobicity; molecular evolution; local propensities; reverse hydrophobic effect; protein stability},
-	Date = {MAR},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-07-20 13:01:06 -0700},
-	Document-Type = {Article},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1997WQ44200002},
-	Isi-Document-Delivery-Number = {WQ442},
-	Issn = {0887-3585},
-	Journal = {PROTEINS-STRUCTURE FUNCTION AND GENETICS},
-	Keywords = {AMINO-ACID SUBSTITUTIONS; HYDROPHOBIC CORE PACKING; PROTEIN STABILITY; BACTERIOPHAGE-T4 LYSOZYME; FORMING PROPENSITIES; STRUCTURAL-ANALYSIS; SCORING MATRIX; CYTOCHROME-C; EVOLUTION; SEQUENCES},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Koshi/1997.pdf},
-	Month = {Mar},
-	Number = {3},
-	Pages = {336--344},
-	Publication-Type = {J},
-	Publisher = {WILEY-LISS},
-	Publisher-Address = {DIV JOHN WILEY & SONS INC, 605 THIRD AVE, NEW YORK, NY 10158-0012},
-	Title = {Mutation matrices and physical-chemical properties: Correlations and implications},
-	Volume = {27},
-	Year = {1997},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbIkIMTk5Ny5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB9UysKmlvJQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUtvc2hpAAAQAAgAAMFxjUkAAAARAAgAAMKm+WIAAAABABgARmyJAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpLb3NoaToxOTk3LnBkZgAOABIACAAxADkAOQA3AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0tvc2hpLzE5OTcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvS29zaGkvMTk5Ny5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Agrafiotis:1997pd,
-	Abstract = {Recent advances in gene sequencing and rational drug design have re-emphasized the need for new methods for protein analysis, classification, and structure and function prediction. In this article, we introduce a new method for analyzing protein sequences based on Sammon's non-linear mapping algorithm. When applied to a family of homologous sequences, the method is able to capture the essential features of the similarity matrix, and provides a faithful representation of chemical or evolutionary distance in a simple and intuitive way. The merits of the new algorithm are demonstrated using examples from the protein kinase family.},
-	Address = {NEW YORK},
-	Author = {Agrafiotis, D. K.},
-	Author-Keywords = {exchange matrix; multi-dimensional scaling; non-linear mapping; protein kinase; Sammon; sequence analysis, sequence similarity},
-	Date = {FEB},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-07-20 11:28:43 -0700},
-	Document-Type = {Article},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1997WH56600003},
-	Isi-Document-Delivery-Number = {WH566},
-	Issn = {0961-8368},
-	Journal = {PROTEIN SCIENCE},
-	Keywords = {AMINO-ACID SUBSTITUTIONS; SCORING MATRIX; SIMILARITY; EVOLUTION},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Agrafiotis/1997.pdf},
-	Month = {Feb},
-	Number = {2},
-	Pages = {287--293},
-	Publication-Type = {J},
-	Publisher = {CAMBRIDGE UNIV PRESS},
-	Publisher-Address = {40 WEST 20TH STREET, NEW YORK, NY 10011-4211},
-	Reprint-Address = {Agrafiotis, DK, 3 DIMENS PHARMACEUT INC,665 STOCKTON DR,SUITE 104,EXTON,PA 19341.},
-	Title = {A new method for analyzing protein sequence relationships based on Sammon maps},
-	Volume = {6},
-	Year = {1997},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGaAAAAAAGaAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa9oIMTk5Ny5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB9U+sKmlxtQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACkFncmFmaW90aXMAEAAIAADBcY1JAAAAEQAIAADCpvmLAAAAAQAYAEZr2gBGa88ARmrVAEZqGwBGZGgAQIlDAAIARWhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6QWdyYWZpb3RpczoxOTk3LnBkZgAADgASAAgAMQA5ADkANwAuAHAAZABmAA8ACAADAGgAcwByABIAQVVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9BZ3JhZmlvdGlzLzE5OTcucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QKC4uLy4uLy4uLy4uL0FydGljbGVzL0FncmFmaW90aXMvMTk5Ny5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AlcCXAJlAnACdAKCAokCkgK9AsICxQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALS}}
-
-@article{VOGT:1995bh,
-	Abstract = {The sensitivity of most protein sequence alignment methods depends strongly on the quality of the comparison matrices used. These matrices, which assign weights or similarity scores to every possible amino acid substitution pair, are utilized to differentiate amongst the various possible alignments of two or more sequences. There are many ways to generate these exchange weights and new matrices are constantly published. There has been no overall assessment of these various matrices when applied in different alignment techniques and over many protein folds and families, both close and distant and with the use of several gap penalty values. In this work, a set of amino acid sequences matched by superposition of known protein tertiary topologies is used to test the alignment accuracy of the different method/matrix/penalty combinations. The comparisons show relatively similar results for the top scoring matrices, a preference for the global alignment method of Needleman and Wunsch, and the importance of matrix modification and optimized gap penalties. The relationship between the percentage identity in a resulting alignment and the level of correctness to be expected are given for the top-performing matrix, resulting in a better definition of the so-called ''twilight zone''. Estimates are made for the probability that two sequences, aligned at a certain level of residue percentage identity, are in fact unrelated.},
-	Address = {LONDON},
-	Author = {VOGT, G. and ETZOLD, T. and ARGOS, P.},
-	Author-Address = {EUROPEAN MOLEC BIOL LAB,D-69012 HEIDELBERG,GERMANY.},
-	Author-Keywords = {SEQUENCE ALIGNMENT; RESIDUE EXCHANGE WEIGHTS; GAP PENALTIES; PROTEIN FAMILIES},
-	Date = {JUN 16},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-07-20 13:25:37 -0700},
-	Document-Type = {Article},
-	Group = {LitSearch; Printed; Reviewed},
-	Isi = {ISI:A1995RD99500012},
-	Isi-Document-Delivery-Number = {RD995},
-	Issn = {0022-2836},
-	Journal = {JOURNAL OF MOLECULAR BIOLOGY},
-	Keywords = {DATA-BANK; SUBSTITUTION MATRICES; SCORING MATRIX; ALIGNMENTS; ALGORITHM; EVOLUTION},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/VOGT/1995.pdf},
-	Month = {Jun},
-	Number = {4},
-	Pages = {816--831},
-	Publication-Type = {J},
-	Publisher = {ACADEMIC PRESS (LONDON) LTD},
-	Publisher-Address = {24-28 OVAL RD, LONDON, ENGLAND NW1 7DX},
-	Title = {AN ASSESSMENT OF AMINO-ACID EXCHANGE MATRICES IN ALIGNING PROTEIN SEQUENCES - THE TWILIGHT ZONE REVISITED},
-	Volume = {249},
-	Year = {1995},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbTwIMTk5NS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB9VG8Kml1FQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFZPR1QAEAAIAADBcY1JAAAAEQAIAADCpvnBAAAAAQAYAEZtPABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6Vk9HVDoxOTk1LnBkZgAADgASAAgAMQA5ADkANQAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9WT0dULzE5OTUucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1ZPR1QvMTk5NS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{GRACY:1993lq,
-	Abstract = {Protein sequence alignments can be improved when at least one of the proteins to be aligned has a known 3-D structure. In this work, geometrical constraints extracted from the target fold are evaluated in independent units that deal with complementary structural features. This information is used to set up mutation tables specific to the locally observed structural environments. The resulting partial evaluations are then combined linearly into a global function which is optimized by dynamic programming. Eventually, a score based on tertiary interactions can be used as a selection criterion to discriminate among a set of suboptimal alignments. The relevance of the scores given by each unit is tested on a representative set of protein families. Finally, a method for combining the different scores is described and its efficiency is evaluated on a few pairs of weakly homologous proteins.},
-	Address = {OXFORD},
-	Author = {GRACY, J. and CHICHE, L. and SALLANTIN, J.},
-	Author-Address = {CTR PHARMACOL ENDOCRINOL,CNRS,INSERM,F-34094 MONTPELLIER,FRANCE.},
-	Author-Keywords = {DYNAMIC PROGRAMMING; HOMOLOGY MODELING; SEQUENCE STRUCTURE COMPATIBILITY},
-	Date = {NOV},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-07-20 12:58:16 -0700},
-	Document-Type = {Article},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1993MJ45200004},
-	Isi-Document-Delivery-Number = {MJ452},
-	Issn = {0269-2139},
-	Journal = {PROTEIN ENGINEERING},
-	Keywords = {KNOWLEDGE-BASED PREDICTION; AMINO-ACID-SEQUENCE; SECONDARY STRUCTURE; PATTERN-RECOGNITION; TERTIARY TEMPLATES; MEAN FORCE; IDENTIFICATION; SIMILARITIES; POTENTIALS; ENERGY},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/GRACY/1993.pdf},
-	Month = {Nov},
-	Number = {8},
-	Pages = {821--829},
-	Publication-Type = {J},
-	Publisher = {OXFORD UNIV PRESS UNITED KINGDOM},
-	Publisher-Address = {WALTON ST JOURNALS DEPT, OXFORD, ENGLAND OX2 6DP},
-	Reprint-Address = {GRACY, J, LAB INFORMAT ROBOT & MICROELECTR MONTPELLIER,161 RUE ADA,F-34090 MONTPELLIER,FRANCE.},
-	Title = {IMPROVED ALIGNMENT OF WEAKLY HOMOLOGOUS PROTEIN SEQUENCES USING STRUCTURAL INFORMATION},
-	Volume = {6},
-	Year = {1993},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbE8IMTk5My5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAmj8KpTEtQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUdSQUNZAAAQAAgAAMFxjUkAAAARAAgAAMKprrsAAAABABgARmxPAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpHUkFDWToxOTkzLnBkZgAOABIACAAxADkAOQAzAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0dSQUNZLzE5OTMucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvR1JBQ1kvMTk5My5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{ALTSCHUL:1991dq,
-	Address = {LONDON},
-	Author = {ALTSCHUL, S. F.},
-	Author-Keywords = {HOMOLOGY; SEQUENCE COMPARISON; STATISTICAL SIGNIFICANCE; ALIGNMENT ALGORITHMS; PATTERN RECOGNITION},
-	Date = {JUN 5},
-	Date-Added = {2007-06-26 10:17:53 -0700},
-	Date-Modified = {2007-07-20 11:31:03 -0700},
-	Document-Type = {Article},
-	Group = {LitSearch; Reviewed},
-	Isi = {ISI:A1991FR40400016},
-	Isi-Document-Delivery-Number = {FR404},
-	Issn = {0022-2836},
-	Journal = {JOURNAL OF MOLECULAR BIOLOGY},
-	Keywords = {DISTANTLY RELATED PROTEINS; PATTERN-RECOGNITION; NUCLEOTIDE-SEQUENCE; ESCHERICHIA-COLI; SALMONELLA-TYPHIMURIUM; MOLECULAR-CLONING; MEMBRANE-PROTEIN; BINDING-PROTEIN; SCORING MATRIX; GENERAL-METHOD},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/ALTSCHUL/1991.pdf},
-	Month = {Jun},
-	Number = {3},
-	Pages = {555--565},
-	Publication-Type = {J},
-	Publisher = {ACADEMIC PRESS LTD},
-	Publisher-Address = {24-28 OVAL RD, LONDON, ENGLAND NW1 7DX},
-	Reprint-Address = {ALTSCHUL, SF, NIH,NATL CTR BIOTECHNOL INFORMAT,NATL LIB MED,BETHESDA,MD 20892.},
-	Title = {AMINO-ACID SUBSTITUTION MATRICES FROM AN INFORMATION THEORETIC PERSPECTIVE},
-	Volume = {219},
-	Year = {1991},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa94IMTk5MS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAmksKpTE5QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEFsdHNjaHVsABAACAAAwXGNSQAAABEACAAAwqmuvgAAAAEAGABGa94ARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkFsdHNjaHVsOjE5OTEucGRmAAAOABIACAAxADkAOQAxAC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0FsdHNjaHVsLzE5OTEucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0FsdHNjaHVsLzE5OTEucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Sunyaev:1998lr,
-	Abstract = {The parametric description of residue environments through solvent accessibility, backbone conformation, or pairwise residue-residue distances is the key to the comparison between amino acid types at protein sequence positions and residue locations in structural templates (condition of protein sequence-structure match). For the first time, the research results presented in this study clarify and allow to quantify, on a rigorous statistical basis, to what extent the amino acid type-specific distributions of commonly used environment parameters are discriminative with respect to the 20 amino acid types. Relying on the Bahadur theory, we estimate the probability of error in a single-sequence-structure alignment based on weak or absent discriminative power in a learning database of protein structure. We present the results for many residue environment variables and demonstrate that each fold description parameter is sensitive with respect to only a few amino acid types while indifferent to most of the other amino acid types. Even complex structural characteristics combining solvent-accessible surface area, backbone conformation, and pairwise distances distinguish only some amino acid types, whereas the others remain nondiscriminated. We find that the knowledge-based potentials currently in use treat especially Ala, Asp, Gln, His, Ser, Thr, and Tyr as essentially "average" amino acids. Thus, highly discriminative amino acid types define the alignment register in gapless sequence-structure alignments. The introduction of gaps leads to alignment ambiguities at sequence positions occupied by nondiscriminated amino acid types. Therefore, local sequence-structure alignments produced by techniques with gaps cannot be reliable. Conceptionally new and more sensitive environment parameters must be invented.},
-	Address = {European Molecular Biology Laboratory, Heidelberg, Germany.},
-	Au = {Sunyaev, SR and Eisenhaber, F and Argos, P and Kuznetsov, EN and Tumanyan, VG},
-	Author = {Sunyaev, S R and Eisenhaber, F and Argos, P and Kuznetsov, E N and Tumanyan, V G},
-	Da = {19980701},
-	Date-Added = {2007-06-25 18:33:09 -0700},
-	Date-Modified = {2007-06-25 18:33:09 -0700},
-	Dcom = {19980701},
-	Edat = {1998/05/21 02:03},
-	Issn = {0887-3585 (Print)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Keywords = {Amino Acids/*chemistry; Chemistry, Physical; Databases, Factual; Mathematics; *Protein Conformation; Protein Folding; Protein Structure, Secondary; Protein Structure, Tertiary; Sequence Alignment; Solvents; Templates, Genetic},
-	Language = {eng},
-	Lr = {20061115},
-	Mhda = {2000/06/20 09:00},
-	Number = {3},
-	Own = {NLM},
-	Pages = {225--246},
-	Pii = {10.1002/(SICI)1097-0134(19980515)31:3<225::AID-PROT1>3.0.CO;2-I},
-	Pl = {UNITED STATES},
-	Pmid = {9593195},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Review},
-	Pubm = {Print},
-	Rf = {62},
-	Rn = {0 (Amino Acids); 0 (Solvents)},
-	Sb = {IM},
-	So = {Proteins. 1998 May 15;31(3):225-46.},
-	Stat = {MEDLINE},
-	Title = {Are knowledge-based potentials derived from protein structure sets discriminative with respect to amino acid types?},
-	Volume = {31},
-	Year = {1998}}
-
-@article{cieplak:1420,
-	Annote = {Fig. 1 gives a couple 5 and 2 letter alphabets.  5 letters might be worth running.
-
-CHMB},
-	Author = {Cieplak, M and Holter, N S and Maritan, A and Banavar, J R},
-	Date-Added = {2007-06-25 17:32:27 -0700},
-	Date-Modified = {2008-05-30 02:50:11 -0700},
-	Group = {Alphabets; Reviewed; Forward; Printed; Backward; ROC},
-	Journal = {J Chem Phys},
-	Keywords = {proteins; molecular biophysics; thermodynamic properties; matrix algebra; organic compounds},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Cieplak/2001.pdf},
-	Number = {3},
-	Pages = {1420--1423},
-	Publisher = {AIP},
-	Title = {Amino acid classes and the protein folding problem},
-	Volume = {114},
-	Year = {2001},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbBUIMjAwMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB8s/sKlrFdQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0NpZXBsYWsAABAACAAAwXGNSQAAABEACAAAwqYOxwAAAAEAGABGbBUARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkNpZXBsYWs6MjAwMS5wZGYADgASAAgAMgAwADAAMQAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9DaWVwbGFrLzIwMDEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ2llcGxhay8yMDAxLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://link.aip.org/link/?JCP/114/1420/1},
-	Bdsk-Url-2 = {http://dx.doi.org/10.1063/1.1333025}}
-
-@article{Honig:2007lr,
-	Annote = {10.1038/nsmb0607-458},
-	Author = {Honig, Barry},
-	Date-Added = {2007-06-25 15:15:01 -0700},
-	Date-Modified = {2007-06-25 15:15:29 -0700},
-	Group = {Fold Space; Reviewed},
-	Isbn = {1545-9993},
-	Journal = {Nat Struct Mol Biol},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Honig/2007.pdf},
-	M3 = {10.1038/nsmb0607-458},
-	Number = {6},
-	Pages = {458--458},
-	Title = {Protein structure space is much more than the sum of its folds},
-	Ty = {JOUR},
-	Url = {http://dx.doi.org/10.1038/nsmb0607-458},
-	Volume = {14},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbGoIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB8iicKlea5QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUhvbmlnAAAQAAgAAMFxjUkAAAARAAgAAMKl3B4AAAABABgARmxqAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpIb25pZzoyMDA3LnBkZgAOABIACAAyADAAMAA3AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0hvbmlnLzIwMDcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvSG9uaWcvMjAwNy5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1038/nsmb0607-458}}
-
-@article{Tan:2006lr,
-	Abstract = {Aligning distantly related protein sequences is a long-standing problem in bioinformatics, and a key for successful protein structure prediction. Its importance is increasing recently in the context of structural genomics projects because more and more experimentally solved structures are available as templates for protein structure modeling. Toward this end, recent structure prediction methods employ profile-profile alignments, and various ways of aligning two profiles have been developed. More fundamentally, a better amino acid similarity matrix can improve a profile itself; thereby resulting in more accurate profile-profile alignments. Here we have developed novel amino acid similarity matrices from knowledge-based amino acid contact potentials. Contact potentials are used because the contact propensity to the other amino acids would be one of the most conserved features of each position of a protein structure. The derived amino acid similarity matrices are tested on benchmark alignments at three different levels, namely, the family, the superfamily, and the fold level. Compared to BLOSUM45 and the other existing matrices, the contact potential-based matrices perform comparably in the family level alignments, but clearly outperform in the fold level alignments. The contact potential-based matrices perform even better when suboptimal alignments are considered. Comparing the matrices themselves with each other revealed that the contact potential-based matrices are very different from BLOSUM45 and the other matrices, indicating that they are located in a different basin in the amino acid similarity matrix space.},
-	Address = {Department of Computer Sciences, College of Science, Purdue University, West Lafayette, Indiana 47907, USA. dkihara@purdue.edu},
-	Au = {Tan, YH and Huang, H and Kihara, D},
-	Author = {Tan, Yen Hock and Huang, He and Kihara, Daisuke},
-	Da = {20060821},
-	Date-Added = {2007-06-22 12:54:46 -0700},
-	Date-Modified = {2007-06-22 12:54:52 -0700},
-	Dcom = {20061004},
-	Doi = {10.1002/prot.21020},
-	Edat = {2006/06/27 09:00},
-	Gr = {R01 GM-075004/GM/NIGMS},
-	Issn = {1097-0134 (Electronic)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Keywords = {Amino Acids/*chemistry/genetics; Computational Biology/methods; Databases, Protein/statistics \& numerical data; Protein Folding; Proteins/*chemistry/genetics; Reproducibility of Results; Sequence Alignment/*methods/statistics \& numerical data; Software},
-	Language = {eng},
-	Lr = {20061115},
-	Mhda = {2006/10/05 09:00},
-	Number = {3},
-	Own = {NLM},
-	Pages = {587--600},
-	Pl = {United States},
-	Pmid = {16799934},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, N.I.H., Extramural; Research Support, Non-U.S. Gov't},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Proteins)},
-	Sb = {IM},
-	So = {Proteins. 2006 Aug 15;64(3):587-600.},
-	Stat = {MEDLINE},
-	Title = {Statistical potential-based amino acid similarity matrices for aligning distantly related protein sequences},
-	Volume = {64},
-	Year = {2006},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1002/prot.21020}}
-
-@article{Johnson:1993lr,
-	Abstract = {A residue-exchange matrix has been derived that is suitable for comparison of amino acid sequences. This matrix is based on the tabulation of 207,795 amino acid replacements observed in 65 homologous sets of structurally aligned three-dimensional structures (235 proteins). The majority of the data is from structural comparisons where there is between 15 and 40% sequence identity. As a result, a scoring matrix such as the one devised here should provide a sensitive basis for the comparison of amino acid sequences and the search for homologous sequences in amino acid databases. In order to assess the value of this matrix we have made a comparative analysis with 12 other published scoring matrices that have been used for the alignment of protein amino acid sequences. We find that the matrix derived here is among the better performers in terms of alignment significance, detection of homologous sequences and the accuracy of alignments.},
-	Address = {Department of Crystallography, Birkbeck College, University of London, U.K.},
-	Annote = {Figs. 2 & 4 has the aa dendrograms.
-Let's just test the MM alphabet from JO matrix.
-No ROC.},
-	Au = {Johnson, MS and Overington, JP},
-	Author = {Johnson, M S and Overington, J P},
-	Da = {19931119},
-	Date-Added = {2007-06-20 14:26:49 -0700},
-	Date-Modified = {2008-05-29 14:21:36 -0700},
-	Dcom = {19931119},
-	Edat = {1993/10/20},
-	Group = {Alphabets; Forward; Backward},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology},
-	Keywords = {Amino Acid Sequence; Animals; Cytochrome c Group/chemistry; Endopeptidases/chemistry; Globins/chemistry; Humans; Information Systems; Molecular Sequence Data; Reproducibility of Results; Retroviridae/enzymology; Sequence Analysis/*methods; *Sequence Homology, Amino Acid},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Johnson/1993.pdf},
-	Lr = {20061115},
-	Mhda = {1993/10/20 00:01},
-	Number = {4},
-	Own = {NLM},
-	Pages = {716--738},
-	Pii = {S0022-2836(83)71548-2},
-	Pl = {ENGLAND},
-	Pmid = {8411177},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print},
-	Rn = {0 (Cytochrome c Group); 9004-22-2 (Globins); EC 3.4.- (Endopeptidases)},
-	Sb = {IM},
-	So = {J Mol Biol. 1993 Oct 20;233(4):716-38.},
-	Stat = {MEDLINE},
-	Title = {A structural basis for sequence comparisons. {An} evaluation of scoring methodologies},
-	Volume = {233},
-	Year = {1993},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbHQIMTk5My5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB43E8Ke6ZZQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0pvaG5zb24AABAACAAAwXGNSQAAABEACAAAwp9MBgAAAAEAGABGbHQARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkpvaG5zb246MTk5My5wZGYADgASAAgAMQA5ADkAMwAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Kb2huc29uLzE5OTMucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvSm9obnNvbi8xOTkzLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1006/jmbi.1993.1548}}
-
-@article{Bork:1990fk,
-	Abstract = {Consensus sequence patterns for beta-alpha-beta folds binding FAD, NAD and GTP were constructed on the basis of 11 steric and physicochemical properties. These property patterns permit detection and distinction of the respective nucleotide-binding sites on the basis of amino acid sequence analysis alone. The SWISS-PROT database (release 9) was screened with the three calculated patterns, and nucleotide-binding sites identified are presented. They correspond to existing structure data (if known). For the detected sequence segments we are able to predict the beta-alpha-beta motif as well as the respective binding sites. For some of the proteins so detected a nucleotide-binding capacity has not previously been reported.},
-	Address = {Department of Biomathematics, Central Institute of Molecular Biology, Academy of Sciences of German Democratic Republic, Berlin-Buch.},
-	Annote = {This paper explains how the motifs were derived in the Bork, Sander, Valencia 1992 PNAS paper that identified all the homologs of actin.  Describes the pattern based approach to finding homologs, also using 11 amino acid traits.  Used in a 1992 PNAS article to identify MreB as a homolog of actin.},
-	Au = {Bork, P and Grunwald, C},
-	Author = {Bork, P and Grunwald, C},
-	Da = {19900914},
-	Date-Added = {2007-06-19 16:02:42 -0700},
-	Date-Modified = {2007-06-21 16:03:23 -0700},
-	Dcom = {19900914},
-	Edat = {1990/07/31},
-	Issn = {0014-2956 (Print)},
-	Jid = {0107600},
-	Journal = {Eur J Biochem},
-	Jt = {European journal of biochemistry / FEBS},
-	Keywords = {*Algorithms; Amino Acid Sequence; Amino Acids/analysis; Base Sequence; Binding Sites/genetics; Flavin-Adenine Dinucleotide/genetics; Guanosine Triphosphate/genetics; Information Systems; Molecular Conformation; Molecular Sequence Data; NAD/genetics; Nucleotides/*genetics; *Pattern Recognition, Automated; Protein Conformation; Structure-Activity Relationship},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Bork/1990a.pdf},
-	Lr = {20041117},
-	Mhda = {1990/07/31 00:01},
-	Number = {2},
-	Own = {NLM},
-	Pages = {347--358},
-	Pl = {GERMANY, WEST},
-	Pmid = {2384083},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Nucleotides); 146-14-5 (Flavin-Adenine Dinucleotide); 53-84-9 (NAD); 86-01-1 (Guanosine Triphosphate)},
-	Sb = {IM},
-	So = {Eur J Biochem. 1990 Jul 31;191(2):347-58.},
-	Stat = {MEDLINE},
-	Title = {Recognition of different nucleotide-binding sites in primary structures using a property-pattern approach},
-	Volume = {191},
-	Year = {1990},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa/gJMTk5MGEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB33zsKdVhlQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABEJvcmsAEAAIAADBcY1JAAAAEQAIAADCnbiJAAAAAQAYAEZr+ABGa88ARmrVAEZqGwBGZGgAQIlDAAIAQGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6Qm9yazoxOTkwYS5wZGYADgAUAAkAMQA5ADkAMABhAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0JvcmsvMTk5MGEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQm9yay8xOTkwYS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@url{dali-download,
-	Date-Added = {2007-06-18 18:11:49 -0700},
-	Date-Modified = {2007-06-18 18:12:38 -0700},
-	Title = {Dali Downloads},
-	Url = {http://ekhidna.biocenter.helsinki.fi/dali/downloads},
-	Bdsk-Url-1 = {http://ekhidna.biocenter.helsinki.fi/dali/downloads}}
-
-@article{Andersen:2004rt,
-	Annote = {AB},
-	Author = {Andersen, C A F and Brunak, S},
-	Date = {SPR},
-	Date-Added = {2007-05-31 22:51:20 -0700},
-	Date-Modified = {2008-05-29 12:20:23 -0700},
-	Group = {Alphabets; ROC; Reviewed; Forward; Printed; Backward},
-	Isi = {ISI:000220410600009},
-	Journal = {AI MAGAZINE},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Andersen/2004.pdf},
-	Month = {Spr},
-	Number = {1},
-	Pages = {97--104},
-	Publication-Type = {J},
-	Title = {Representation of protein-sequence information by amino acid subalphabets},
-	Volume = {25},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa+AIMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABtuAsKFAToAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEFuZGVyc2VuABAACAAAwXGNSQAAABEACAAAwoVjqgAAAAEAGABGa+AARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkFuZGVyc2VuOjIwMDQucGRmAAAOABIACAAyADAAMAA0AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0FuZGVyc2VuLzIwMDQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0FuZGVyc2VuLzIwMDQucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Kuznetsov:2002vn,
-	Abstract = {We present a novel method designed to analyze the discriminative ability of knowledge-based potentials with respect to the 20 residue types. The method is based on the preference of amino acids for specific types of protein environment, and uses a virtual mutagenesis experiment to estimate how much information a given potential can provide about environments of each amino acid type. This allows one to test and optimize the performance of real potentials at the level of individual amino acids, using actual data on residue environments from a dataset of known protein structures. We have applied our method to long-range and medium-range pairwise distance-dependent potentials. The results of our study indicate that these potentials are only able to discriminate between a very limited number of residue types, and that discriminative ability is extremely sensitive to the choice of parameters used to construct the potentials, and even to the size of the training dataset. We also show that different types of pairwise distance potentials are dominated by different types of interactions. These dominant interactions strongly depend on the type of approximation used to define residue position. For each potential, our methodology is able to identify a potential-specific amino acid distance matrix and a reduced amino acid alphabet of any specified size, which may have implications for sequence alignment and multibody models.},
-	Address = {Department of Biomathematical Sciences, Mount Sinai School of Medicine, New York, New York 10029, USA.},
-	Annote = {Use the 1st long-range dendrogram, which has "the same information as substitution matrices" and was the best performer (not the C-beta long-range one.)
-
-KR
-
-No ROC.},
-	Au = {Kuznetsov, IB and Rackovsky, S},
-	Author = {Kuznetsov, I B and Rackovsky, S},
-	Ci = {Copyright 2002 Wiley-Liss, Inc.},
-	Da = {20020904},
-	Date-Added = {2007-05-31 22:29:20 -0700},
-	Date-Modified = {2008-05-29 12:31:14 -0700},
-	Dcom = {20021003},
-	Edat = {2002/09/05 10:00},
-	Gr = {1R01 LM06789/LM/NLM},
-	Group = {Alphabets; Reviewed; Forward; Printed; Backward},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Keywords = {Amino Acid Substitution; Amino Acids/*chemistry; Cluster Analysis; Computer Simulation; Hydrophobicity; Molecular Structure; Protein Structure, Secondary; Proteins/*chemistry/genetics; Sample Size; Sequence Analysis, Protein/*methods},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Kuznetsov/2002.pdf},
-	Lr = {20061115},
-	Mhda = {2002/10/04 04:00},
-	Number = {2},
-	Own = {NLM},
-	Pages = {266--284},
-	Pl = {United States},
-	Pmid = {12211006},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Evaluation Studies; Journal Article; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Proteins)},
-	Sb = {IM},
-	So = {Proteins. 2002 Nov 1;49(2):266-84.},
-	Stat = {MEDLINE},
-	Title = {Discriminative ability with respect to amino acid types: assessing the performance of knowledge-based potentials without threading},
-	Volume = {49},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbJEIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABttKcKE/K1QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACUt1em5ldHNvdgAAEAAIAADBcY1JAAAAEQAIAADChV8dAAAAAQAYAEZskQBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6S3V6bmV0c292OjIwMDIucGRmAA4AEgAIADIAMAAwADIALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvS3V6bmV0c292LzIwMDIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvS3V6bmV0c292LzIwMDIucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1002/prot.10211}}
-
-@article{Trinquier:1998yq,
-	Abstract = {Simple procedures are proposed to quantify how much an effective property embodied in a given ranking of the twenty amino acids can be affected by random point mutations at nucleotide bases. As expected, of the various orderings tested, rankings based on most hydrophobicity scales exhibit low scores, thus offering better immunity towards such single-base mutations. This, however, occurs to different extents and the method allows sharp discriminations between the scales. Hydrophobicity scales based on global properties such as spatial environment data of proteins residues, or mutation matrices of amino acid replacements, generally behave better than those based on pure physicochemical properties of isolated residues. An averaged scale built from the available hydrophobicity scales exhibits one of the most favorable scores. A systematic search for the best amino acid order has been carried out across all possible scales. Optimized scales are characterized by the existence of a clustering scheme into three zones, within which permutations are more or less tolerated, depending on the zone and on the summation procedure used in the score calculation. The first cluster corresponds to the hydrophobic side, and includes the ten amino acids WMCFILVGRS. Next follows the ATP triad. The third cluster coincides with the hydrophilic side and includes, in the last seven positions, the amino acids EDKNQHY. Interpretation of these optimized scales in terms of codon positions in the genetic code further suggests a clustering scheme composed of four groups, WMCFILV-GRS-ATP-EDKNQHY, emphasizing the role of the second base as the main driving parameter. As a consequence, the conserved character of the genetic code is better reflected when it is displayed in UGCA ordering rather than in the commonly used UCAG ordering. The present a priori classification of the amino acids could find potential use in protein sequence homology and structure prediction.},
-	Address = {Laboratoire de Physique Quantique, IRSAMC-CNRS, Universite Paul-Sabatier, Toulouse, France.},
-	Annote = {the mother of all hydrophobicity scales paper},
-	Au = {Trinquier, G and Sanejouand, YH},
-	Author = {Trinquier, G and Sanejouand, Y H},
-	Da = {19980807},
-	Date-Added = {2007-05-31 22:23:58 -0700},
-	Date-Modified = {2007-06-19 14:30:03 -0700},
-	Dcom = {19980807},
-	Edat = {1998/06/05},
-	Issn = {0269-2139 (Print)},
-	Jid = {8801484},
-	Journal = {Protein Eng},
-	Jt = {Protein engineering},
-	Keywords = {Amino Acid Sequence; Amino Acids/*chemistry/*genetics; Base Sequence; Biometry; Codon, Terminator/chemistry/genetics; *Genetic Code; Molecular Sequence Data; Point Mutation/genetics; Protein Engineering; Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Trinquier/1998.pdf},
-	Lr = {20001218},
-	Mhda = {1998/06/05 00:01},
-	Number = {3},
-	Own = {NLM},
-	Pages = {153--169},
-	Pl = {ENGLAND},
-	Pmid = {9613840},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Codon, Terminator)},
-	Sb = {IM},
-	So = {Protein Eng. 1998 Mar;11(3):153-69.},
-	Stat = {MEDLINE},
-	Title = {Which effective property of amino acids is best preserved by the genetic code?},
-	Volume = {11},
-	Year = {1998},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbTQIMTk5OC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABtr9MKE9QVQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACVRyaW5xdWllcgAAEAAIAADBcY1JAAAAEQAIAADChVd1AAAAAQAYAEZtNABGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6VHJpbnF1aWVyOjE5OTgucGRmAA4AEgAIADEAOQA5ADgALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvVHJpbnF1aWVyLzE5OTgucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvVHJpbnF1aWVyLzE5OTgucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@article{Maiorov:1992kx,
-	Abstract = {We have devised a continuous function of interresidue contacts in globular proteins such that the X-ray crystal structure has a lower function value than that of thousands of protein-like alternative conformations. Although we fit the adjustable parameters of the potential using only 10,000 alternative structures for a selected training set of 37 proteins, a grand total of 530,000 constraints was satisfied, derived from 73 proteins and their numerous alternative conformations. In every case where the native conformation is adequately globular and compact, according to objective criteria we have developed, the potential function always favors the native over all alternatives by a substantial margin. This is true even for an additional three proteins never used in any way in the fitting procedure. Conformations differing only slightly from the native, such as those coming from crystal structures of the same protein complexed with different ligands or from crystal structures of point mutants, have function values very similar to the native's and always less than those of alternatives derived from substantially different crystal structures. This holds for all 95 structures that are homologous to one or another of various proteins we used. Realizing that this potential should be useful for modeling the conformation of new protein sequences from the body of protein crystal structures, we suggest a test for deciding whether a nearly correct approximation to the native conformation has been found.},
-	Address = {College of Pharmacy, University of Michigan, Ann Arbor 48109.},
-	Au = {Maiorov, VN and Crippen, GM},
-	Author = {Maiorov, V N and Crippen, G M},
-	Da = {19921030},
-	Date-Added = {2007-05-31 18:15:42 -0700},
-	Date-Modified = {2007-05-31 21:44:24 -0700},
-	Dcom = {19921030},
-	Edat = {1992/10/05},
-	Gr = {DA06746/DA/NIDA; GM37123/GM/NIGMS},
-	Issn = {0022-2836 (Print)},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology},
-	Keywords = {Animals; Humans; Mathematics; *Protein Conformation; *Protein Folding; *Protein Structure, Tertiary; X-Ray Diffraction},
-	Language = {eng},
-	Lr = {20061115},
-	Mhda = {1992/10/05 00:01},
-	Number = {3},
-	Own = {NLM},
-	Pages = {876--888},
-	Pl = {ENGLAND},
-	Pmid = {1404392},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Sb = {IM},
-	So = {J Mol Biol. 1992 Oct 5;227(3):876-88.},
-	Stat = {MEDLINE},
-	Title = {Contact potential that recognizes the correct folding of globular proteins},
-	Volume = {227},
-	Year = {1992}}
-
-@article{Santibanez:1987fj,
-	Abstract = {A program for the multiple alignment of protein sequences is presented. The program is an extension of the fast alignment program by Wilbur et al. (1984) into higher dimensions. The use of hash procedures on fragments of the protein sequences increases the speed of calculation. Thereby we also take into account fragments which are present in some, but not in all, sequences considered. The results of some multiple alignments are given.},
-	Address = {Department of Biomathematics, Academy of Sciences, Berlin-Buch, GDR.},
-	Au = {Santibanez, M and Rohde, K},
-	Author = {Santibanez, M and Rohde, K},
-	Da = {19880818},
-	Date-Added = {2007-05-31 16:26:26 -0700},
-	Date-Modified = {2007-06-25 18:21:40 -0700},
-	Dcom = {19880818},
-	Edat = {1987/06/01},
-	Issn = {0266-7061 (Print)},
-	Jid = {8511758},
-	Journal = {Comput Appl Biosci},
-	Jt = {Computer applications in the biosciences : CABIOS},
-	Keywords = {*Amino Acid Sequence; Molecular Sequence Data; *Proteins; *Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Santibanez/1987.pdf},
-	Lr = {20001218},
-	Mhda = {1987/06/01 00:01},
-	Number = {2},
-	Own = {NLM},
-	Pages = {111--114},
-	Pl = {ENGLAND},
-	Pmid = {3453217},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Comput Appl Biosci. 1987 Jun;3(2):111-4.},
-	Stat = {MEDLINE},
-	Title = {A multiple alignment program for protein sequences},
-	Volume = {3},
-	Year = {1987},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGaAAAAAAGaAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbP4IMTk4Ny5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABt0NMKFjzlQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAClNhbnRpYmFuZXoAEAAIAADBcY1JAAAAEQAIAADChfGpAAAAAQAYAEZs/gBGa88ARmrVAEZqGwBGZGgAQIlDAAIARWhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6U2FudGliYW5lejoxOTg3LnBkZgAADgASAAgAMQA5ADgANwAuAHAAZABmAA8ACAADAGgAcwByABIAQVVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9TYW50aWJhbmV6LzE5ODcucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QKC4uLy4uLy4uLy4uL0FydGljbGVzL1NhbnRpYmFuZXovMTk4Ny5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AlcCXAJlAnACdAKCAokCkgK9AsICxQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALS}}
-
-@article{Risler:1988uq,
-	Abstract = {Amino acid substitutions in evolutionarily related proteins have been studied from a structural point of view. We consider here that an amino acid al in a protein p1 has been replaced by the amino acid a2 in the structurally similar protein p2 if, after superposition of the p1 and p2 structures, the a1 and a2 C alpha atoms are no more than 1.2 A apart. Thirty-two proteins, grouped in 11 classes, have been analysed by this method. This produced 2860 amino acid pairs (substitutions), which were analysed by multi-dimensional statistical methods. The main results are as follows: (1) according to the observed exchangeability of amino acid side-chains, only four groups (strong clusters) could be delineated; (i) Ile and Val, (ii) Leu and Met, (iii) Lys, Arg and Gln, and (iv) Tyr and Phe. The other residues could not be classified. (2) The matrix of distances between amino acids, or scoring matrix, determined from this study, is different from any other published matrix. (3) Except for the distance matrices based on the chemical properties of amino acid side-chains, which can be grouped together, all other published matrices are different from one another. (4) The distance matrix determined in this study seems to be very efficient for aligning distantly related protein sequences.},
-	Address = {Centre National de la Recherche Scientifique Centre de Genetique Moleculaire, Gif sur Yvette, France.},
-	Annote = {The idea of figuring out subsitution scores based on structural alignments was used later by the SDM/HSDM guys.
-No ROC.},
-	Au = {Risler, JL and Delorme, MO and Delacroix, H and Henaut, A},
-	Author = {Risler, J L and Delorme, M O and Delacroix, H and Henaut, A},
-	Da = {19890323},
-	Date-Added = {2007-05-31 16:23:33 -0700},
-	Date-Modified = {2007-07-20 17:59:54 -0700},
-	Dcom = {19890323},
-	Edat = {1988/12/20},
-	Group = {Alphabets; Reviewed; Forward; Printed; Backward},
-	Issn = {0022-2836 (Print)},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology},
-	Keywords = {Amino Acid Sequence; Amino Acids/classification/*metabolism; Animals; Bacterial Proteins/metabolism; Evolution; Humans; Molecular Sequence Data; Pattern Recognition, Automated; Proteins/*metabolism; Statistics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Risler/1988.pdf},
-	Lr = {20041117},
-	Mhda = {1988/12/20 00:01},
-	Number = {4},
-	Own = {NLM},
-	Pages = {1019--1029},
-	Pii = {0022-2836(88)90058-7},
-	Pl = {ENGLAND},
-	Pmid = {3221397},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Bacterial Proteins); 0 (Proteins)},
-	Sb = {IM},
-	So = {J Mol Biol. 1988 Dec 20;204(4):1019-29.},
-	Stat = {MEDLINE},
-	Title = {Amino acid substitutions in structurally related proteins. A pattern recognition approach. Determination of a new and efficient scoring matrix.},
-	Volume = {204},
-	Year = {1988},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbPIIMTk4OC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABt0LcKFjydQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABlJpc2xlcgAQAAgAAMFxjUkAAAARAAgAAMKF8ZcAAAABABgARmzyAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpSaXNsZXI6MTk4OC5wZGYAAA4AEgAIADEAOQA4ADgALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvUmlzbGVyLzE5ODgucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL1Jpc2xlci8xOTg4LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Miyata:1979qy,
-	Abstract = {The frequency of amino acid substitutions, relative to the frequency expected by chance, decreases linearly with the increase in physico-chemical differences between amino acid pairs involved in a substitution. This correlation does not apply to abnormal human hemoglobins. Since abnormal hemoglobins mostly reflect the process of mutation rather than selection, the correlation manifest during protein evolution between substitution frequency and physico-chemical difference in amino acids can be attributed to natural selection. Outside of 'abnormal' proteins, the correlation also does not apply to certain regions of proteins characterized by rapid rates of substitution. In these cases again, except for the largest physico-chemical differences between amino acid pairs, the substitution frequencies seem to be independent of the physico-chemical parameters. The limination of the substituents involving the largest physico-chemical differences can once more be attributed to natural selection. For smaller physico-chemical differences, natural selection, if it is operating in the polypeptide regions, must be based on parameters other than those examined.},
-	Annote = {this reference is a study of regions that are constrained in their evolution in a protein (conservative mutation) and those that are not; related to finding motifs
-
-see Table 2 for the aa partition
-
-No ROC.},
-	Au = {Miyata, T and Miyazawa, S and Yasunaga, T},
-	Author = {Miyata, T and Miyazawa, S and Yasunaga, T},
-	Da = {19790716},
-	Date-Added = {2007-05-31 16:19:15 -0700},
-	Date-Modified = {2007-08-03 11:33:25 -0700},
-	Dcom = {19790716},
-	Edat = {1979/03/15},
-	Group = {Alphabets; Forward; Backward},
-	Issn = {0022-2844 (Print)},
-	Jid = {0360051},
-	Journal = {J Mol Evol},
-	Jt = {Journal of molecular evolution},
-	Keywords = {Amino Acid Sequence; *Amino Acids; *Evolution; *Proteins},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Miyata/1979.pdf},
-	Lr = {20061115},
-	Mhda = {1979/03/15 00:01},
-	Number = {3},
-	Oid = {NASA: 79175356},
-	Own = {NLM},
-	Pages = {219--236},
-	Pl = {GERMANY, WEST},
-	Pmid = {439147},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Proteins)},
-	Sb = {IM; S},
-	So = {J Mol Evol. 1979 Mar 15;12(3):219-36.},
-	Stat = {MEDLINE},
-	Title = {Two types of amino acid substitutions in protein evolution},
-	Volume = {12},
-	Year = {1979},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbL4IMTk3OS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABuA58KGW9BQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABk1peWF0YQAQAAgAAMFxjUkAAAARAAgAAMKGvkAAAAABABgARmy+AEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpNaXlhdGE6MTk3OS5wZGYAAA4AEgAIADEAOQA3ADkALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTWl5YXRhLzE5NzkucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL01peWF0YS8xOTc5LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Landes:1994fk,
-	Abstract = {Fast sequence databanks search algorithms generally make use of hash tables and look for exactly matching words. An increased sensitivity--at the expense of a decreased selectivity--can be attained in the case of proteins by using a reduced amino acid alphabet. We propose here an alphabet reduced to 10 symbols, that we used in modified versions of the FASTP and SCAN programs. An application to the aminoacyl-tRNA synthetases shows that this technique may be useful in detecting distant relationships between proteins.},
-	Address = {Centre de Genetique Moleculaire du CNRS, Gif sur Yvette, France.},
-	Annote = {LR},
-	Au = {Landes, C and Risler, JL},
-	Author = {Landes, C and Risler, J L},
-	Da = {19950130},
-	Date-Added = {2007-05-31 16:04:18 -0700},
-	Date-Modified = {2008-05-29 12:22:28 -0700},
-	Dcom = {19950130},
-	Edat = {1994/07/01},
-	Group = {Alphabets; Reviewed; ROC; Forward; Printed; Backward},
-	Jid = {8511758},
-	Journal = {Comput Appl Biosci},
-	Jt = {Computer applications in the biosciences : CABIOS},
-	Keywords = {Algorithms; Amino Acid Sequence; Amino Acyl-tRNA Synthetases/genetics; *Databases, Factual; Escherichia coli/enzymology/genetics; Molecular Sequence Data; Oligopeptides/genetics; Proteins/*genetics; Sequence Alignment/methods; *Software; Terminology},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Landes/1994.pdf},
-	Lr = {20061115},
-	Mhda = {1994/07/01 00:01},
-	Number = {4},
-	Own = {NLM},
-	Pages = {453--454},
-	Pl = {ENGLAND},
-	Pmid = {7804879},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print},
-	Rn = {0 (Oligopeptides); 0 (Proteins); EC 6.1.1.- (Amino Acyl-tRNA Synthetases)},
-	Sb = {IM},
-	So = {Comput Appl Biosci. 1994 Jul;10(4):453-4.},
-	Stat = {MEDLINE},
-	Title = {Fast databank searching with a reduced amino-acid alphabet},
-	Volume = {10},
-	Year = {1994},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbJMIMTk5NC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABt0KsKFjyFQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkxhbmRlcwAQAAgAAMFxjUkAAAARAAgAAMKF8ZEAAAABABgARmyTAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpMYW5kZXM6MTk5NC5wZGYAAA4AEgAIADEAOQA5ADQALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTGFuZGVzLzE5OTQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0xhbmRlcy8xOTk0LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Thompson:1996lr,
-	Abstract = {Using an information theoretic formalism, we optimize classes of amino acid substitution to be maximally indicative of local protein structure. Our statistically-derived classes are loosely identifiable with the heuristic constructions found in previously published work. However, while these other methods provide a more rigid idealization of physicochemically constrained residue substitution, our classes provide substantially more structural information with many fewer parameters. Moreover, these substitution classes are consistent with the paradigmatic view of the sequence-to-structure relationship in globular proteins which holds that the three-dimensional architecture is predominantly determined by the arrangement of hydrophobic and polar side chains with weak constraints on the actual amino acid identities. More specific constraints are imposed on the placement of prolines, glycines, and the charged residues. These substitution classes have been used in highly accurate predictions of residue solvent accessibility. They could also be used in the identification of homologous proteins, the construction and refinement of multiple sequence alignments, and as a means of condensing and codifying the information in multiple sequence alignments for secondary structure prediction and tertiary fold recognition.},
-	Address = {Biophysics Research Division, University of Michigan, Ann Arbor 48109-1055, USA.},
-	Annote = {Define 28 classes based on MSAs.  In applicable to pairwise ROC generation with DALI.
-Might be useful for "fishing".},
-	Au = {Thompson, MJ and Goldstein, RA},
-	Author = {Thompson, M J and Goldstein, R A},
-	Da = {19961105},
-	Date-Added = {2007-05-31 15:36:28 -0700},
-	Date-Modified = {2007-06-19 14:23:35 -0700},
-	Dcom = {19961105},
-	Doi = {10.1002/(SICI)1097-0134(199605)25:1{$<$}28::AID-PROT3{$>$}3.0.CO;2-G},
-	Edat = {1996/05/01},
-	Gr = {R29 LM05770/LM/NLM},
-	Issn = {0887-3585 (Print)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Keywords = {Amino Acid Sequence; Amino Acids/*chemistry; Databases, Factual; Information Theory; Molecular Sequence Data; *Protein Conformation; Protein Structure, Secondary; Protein Structure, Tertiary; Proteins/*chemistry; Sequence Alignment; Solvents},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Thompson/1996.pdf},
-	Lr = {20061115},
-	Mhda = {2000/06/20 09:00},
-	Number = {1},
-	Own = {NLM},
-	Pages = {28--37},
-	Pii = {10.1002/(SICI)1097-0134(199605)25:1<28::AID-PROT3>3.0.CO;2-G},
-	Pl = {UNITED STATES},
-	Pmid = {8727317},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Proteins); 0 (Solvents)},
-	Sb = {IM},
-	So = {Proteins. 1996 May;25(1):28-37.},
-	Stat = {MEDLINE},
-	Title = {Constructing amino acid residue substitution classes maximally indicative of local protein structure},
-	Volume = {25},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbS8IMTk5Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABtdicKEnLEAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACFRob21wc29uABAACAAAwXGNSQAAABEACAAAwoT/IQAAAAEAGABGbS8ARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlRob21wc29uOjE5OTYucGRmAAAOABIACAAxADkAOQA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1Rob21wc29uLzE5OTYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL1Rob21wc29uLzE5OTYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1002/(SICI)1097-0134(199605)25:1%7B$%3C$%7D28::AID-PROT3%7B$%3E$%7D3.0.CO;2-G}}
-
-@article{Przybylski:2007lr,
-	Abstract = {Sequence alignments may be the most fundamental computational resource for molecular biology. The best methods that identify sequence relatedness through profile-profile comparisons are much slower and more complex than sequence-sequence and sequence-profile comparisons such as, respectively, BLAST and PSI-BLAST. Families of related genes and gene products (proteins) can be represented by consensus sequences that list the nucleic/amino acid most frequent at each sequence position in that family. Here, we propose a novel approach for consensus-sequence-based comparisons. This approach improved searches and alignments as a standard add-on to PSI-BLAST without any changes of code. Improvements were particularly significant for more difficult tasks such as the identification of distant structural relations between proteins and their corresponding alignments. Despite the fact that the improvements were higher for more divergent relations, they were consistent even at high accuracy/low error rates for non-trivially related proteins. The improvements were very easy to achieve; no parameter used by PSI-BLAST was altered and no single line of code changed. Furthermore, the consensus sequence add-on required relatively little additional CPU time. We discuss how advanced users of PSI-BLAST can immediately benefit from using consensus sequences on their local computers. We have also made the method available through the Internet (http://www.rostlab.org/services/consensus/).},
-	Address = {Department of Biochemistry and Molecular Biophysics, Columbia University, New York, NY 10032, USA. dsp23@columbia.edu},
-	Au = {Przybylski, D and Rost, B},
-	Author = {Przybylski, Dariusz and Rost, Burkhard},
-	Da = {20070502},
-	Date-Added = {2007-05-09 16:26:36 -0700},
-	Date-Modified = {2007-05-09 16:26:47 -0700},
-	Dep = {20070316},
-	Doi = {10.1093/nar/gkm107},
-	Edat = {2007/03/21 09:00},
-	Gr = {R01-LM07329-01/LM/NLM; U54-GM074958-01/GM/NIGMS},
-	Issn = {1362-4962 (Electronic)},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Przybylski/2007.pdf},
-	Mhda = {2007/03/21 09:00},
-	Number = {7},
-	Own = {NLM},
-	Pages = {2238--2246},
-	Phst = {2007/03/16 {$[$}aheadofprint{$]$}},
-	Pii = {gkm107},
-	Pl = {England},
-	Pmid = {17369271},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, N.I.H., Extramural},
-	Pubm = {Print-Electronic},
-	Sb = {IM},
-	So = {Nucleic Acids Res. 2007;35(7):2238-46. Epub 2007 Mar 16.},
-	Stat = {In-Process},
-	Title = {Consensus sequences improve PSI-BLAST through mimicking profile-profile alignments},
-	Volume = {35},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGaAAAAAAGaAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbOIIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABj8XcJnpklQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAClByenlieWxza2kAEAAIAADBcY1JAAAAEQAIAADCaAi5AAAAAQAYAEZs4gBGa88ARmrVAEZqGwBGZGgAQIlDAAIARWhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6UHJ6eWJ5bHNraToyMDA3LnBkZgAADgASAAgAMgAwADAANwAuAHAAZABmAA8ACAADAGgAcwByABIAQVVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Qcnp5Ynlsc2tpLzIwMDcucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QKC4uLy4uLy4uLy4uL0FydGljbGVzL1ByenlieWxza2kvMjAwNy5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AlcCXAJlAnACdAKCAokCkgK9AsICxQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALS},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1093/nar/gkm107}}
-
-@article{Balter:2000fk,
-	Au = {Balter, M},
-	Author = {Balter, M},
-	Da = {20000927},
-	Date-Added = {2007-05-01 10:13:06 -0700},
-	Date-Modified = {2007-05-01 10:13:37 -0700},
-	Dcom = {20000927},
-	Edat = {2000/09/30 11:00},
-	Issn = {0036-8075 (Print)},
-	Jid = {0404511},
-	Journal = {Science},
-	Jt = {Science (New York, N.Y.)},
-	Keywords = {Animals; *Biogenesis; *Evolution; Evolution, Molecular; Viral Physiology; Viral Proteins/chemistry; *Viruses/chemistry/genetics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Balter/2000.pdf},
-	Lr = {20070319},
-	Mhda = {2000/09/30 11:01},
-	Number = {5486},
-	Own = {NLM},
-	Pages = {1866--1867},
-	Pl = {UNITED STATES},
-	Pmid = {11012352},
-	Pst = {ppublish},
-	Pt = {News},
-	Pubm = {Print},
-	Rn = {0 (Viral Proteins)},
-	Sb = {IM; S},
-	So = {Science. 2000 Sep 15;289(5486):1866-7.},
-	Stat = {MEDLINE},
-	Title = {Virology: Evolution on life's fringes},
-	Volume = {289},
-	Year = {2000},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa+kIMjAwMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABf2lcJcwpoAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkJhbHRlcgAQAAgAAMFxjUkAAAARAAgAAMJdJQoAAAABABgARmvpAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpCYWx0ZXI6MjAwMC5wZGYAAA4AEgAIADIAMAAwADAALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvQmFsdGVyLzIwMDAucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0JhbHRlci8yMDAwLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Melo:2006lr,
-	Abstract = {Reduced or simplified amino acid alphabets group the 20 naturally occurring amino acids into a smaller number of representative protein residues. To date, several reduced amino acid alphabets have been proposed, which have been derived and optimized by a variety of methods. The resulting reduced amino acid alphabets have been applied to pattern recognition, generation of consensus sequences from multiple alignments, protein folding, and protein structure prediction. In this work, amino acid substitution matrices and statistical potentials were derived based on several reduced amino acid alphabets and their performance assessed in a large benchmark for the tasks of sequence alignment and fold assessment of protein structure models, using as a reference frame the standard alphabet of 20 amino acids. The results showed that a large reduction in the total number of residue types does not necessarily translate into a significant loss of discriminative power for sequence alignment and fold assessment. Therefore, some definitions of a few residue types are able to encode most of the relevant sequence/structure information that is present in the 20 standard amino acids. Based on these results, we suggest that the use of reduced amino acid alphabets may allow to increasing the accuracy of current substitution matrices and statistical potentials for the prediction of protein structure of remote homologs.},
-	Address = {Departamento de Genetica Molecular y Microbiologia, Facultad de Ciencias Biologicas, Pontificia Universidad Catolica de Chile, Santiago, Chile. fmelo@bio.puc.cl},
-	Annote = {MM5},
-	Au = {Melo, F and Marti-Renom, MA},
-	Author = {Melo, F and Marti-Renom, M A},
-	Ci = {2006 Wiley-Liss, Inc.},
-	Da = {20060516},
-	Date-Added = {2007-05-01 09:58:02 -0700},
-	Date-Modified = {2008-05-29 12:23:09 -0700},
-	Dcom = {20060720},
-	Edat = {2006/03/01 09:00},
-	Group = {Alphabets; Reviewed; Forward; Printed; Backward; ROC},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Keywords = {Amino Acid Sequence; Amino Acids/*chemistry/classification/*metabolism; Consensus Sequence; Molecular Sequence Data; Oxidation-Reduction; *Protein Folding; Proteins/*chemistry/*metabolism; Sequence Alignment/*methods; Structural Homology, Protein},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Melo/2006.pdf},
-	Lr = {20061115},
-	Mhda = {2006/07/21 09:00},
-	Number = {4},
-	Own = {NLM},
-	Pages = {986--995},
-	Pl = {United States},
-	Pmid = {16506243},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print},
-	Read = {Yes},
-	Rn = {0 (Amino Acids); 0 (Proteins)},
-	Sb = {IM},
-	So = {Proteins. 2006 Jun 1;63(4):986-95.},
-	Stat = {MEDLINE},
-	Title = {Accuracy of sequence alignment and fold assessment using reduced amino acid alphabets},
-	Volume = {63},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbLYIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABf1s8Jcv7NQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABE1lbG8AEAAIAADBcY1JAAAAEQAIAADCXSIjAAAAAQAYAEZstgBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6TWVsbzoyMDA2LnBkZgAADgASAAgAMgAwADAANgAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9NZWxvLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL01lbG8vMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1002/prot.20881}}
-
-@article{Karev:2002zr,
-	Abstract = {BACKGROUND: Power distributions appear in numerous biological, physical and other contexts, which appear to be fundamentally different. In biology, power laws have been claimed to describe the distributions of the connections of enzymes and metabolites in metabolic networks, the number of interactions partners of a given protein, the number of members in paralogous families, and other quantities. In network analysis, power laws imply evolution of the network with preferential attachment, i.e. a greater likelihood of nodes being added to pre-existing hubs. Exploration of different types of evolutionary models in an attempt to determine which of them lead to power law distributions has the potential of revealing non-trivial aspects of genome evolution. RESULTS: A simple model of evolution of the domain composition of proteomes was developed, with the following elementary processes: i) domain birth (duplication with divergence), ii) death (inactivation and/or deletion), and iii) innovation (emergence from non-coding or non-globular sequences or acquisition via horizontal gene transfer). This formalism can be described as a birth, death and innovation model (BDIM). The formulas for equilibrium frequencies of domain families of different size and the total number of families at equilibrium are derived for a general BDIM. All asymptotics of equilibrium frequencies of domain families possible for the given type of models are found and their appearance depending on model parameters is investigated. It is proved that the power law asymptotics appears if, and only if, the model is balanced, i.e. domain duplication and deletion rates are asymptotically equal up to the second order. It is further proved that any power asymptotic with the degree not equal to -1 can appear only if the hypothesis of independence of the duplication/deletion rates on the size of a domain family is rejected. Specific cases of BDIMs, namely simple, linear, polynomial and rational models, are considered in details and the distributions of the equilibrium frequencies of domain families of different size are determined for each case. We apply the BDIM formalism to the analysis of the domain family size distributions in prokaryotic and eukaryotic proteomes and show an excellent fit between these empirical data and a particular form of the model, the second-order balanced linear BDIM. Calculation of the parameters of these models suggests surprisingly high innovation rates, comparable to the total domain birth (duplication) and elimination rates, particularly for prokaryotic genomes. CONCLUSIONS: We show that a straightforward model of genome evolution, which does not explicitly include selection, is sufficient to explain the observed distributions of domain family sizes, in which power laws appear as asymptotic. However, for the model to be compatible with the data, there has to be a precise balance between domain birth, death and innovation rates, and this is likely to be maintained by selection. The developed approach is oriented at a mathematical description of evolution of domain composition of proteomes, but a simple reformulation could be applied to models of other evolving networks with preferential attachment.},
-	Address = {National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA. karev@ncbi.nlm.nih.gov},
-	Au = {Karev, GP and Wolf, YI and Rzhetsky, AY and Berezovskaya, FS and Koonin, EV},
-	Author = {Karev, Georgy P and Wolf, Yuri I and Rzhetsky, Andrey Y and Berezovskaya, Faina S and Koonin, Eugene V},
-	Da = {20050721},
-	Date-Added = {2007-04-25 11:24:17 -0700},
-	Date-Modified = {2007-04-26 17:02:40 -0700},
-	Dcom = {20060207},
-	Dep = {20021014},
-	Edat = {2002/10/16 04:00},
-	Group = {Fold Space; Reviewed},
-	Issn = {1471-2148 (Electronic)},
-	Jid = {100966975},
-	Journal = {BMC Evol Biol},
-	Jt = {BMC evolutionary biology},
-	Keywords = {Animals; Arabidopsis/genetics; Bacillus subtilis/genetics; Caenorhabditis elegans/genetics; Computer Simulation; Death; Drosophila melanogaster/genetics; Escherichia coli/genetics; *Evolution; Humans; *Mathematical Computing; Methanobacteriaceae/genetics; *Models, Biological; Parturition; Protein Structure, Tertiary/*genetics; Saccharomyces cerevisiae/genetics; Sulfolobus solfataricus/genetics; Thermotoga maritima/genetics; Variation (Genetics)/genetics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Karev/2002.pdf},
-	Mhda = {2006/02/08 09:00},
-	Number = {1},
-	Own = {NLM},
-	Pages = {18},
-	Phst = {2002/09/03 {$[$}received{$]$}; 2002/10/14 {$[$}accepted{$]$}; 2002/10/14 {$[$}aheadofprint{$]$}},
-	Pl = {England},
-	Pmid = {12379152},
-	Pst = {epublish},
-	Pt = {Journal Article},
-	Pubm = {Electronic},
-	Sb = {IM},
-	So = {BMC Evol Biol. 2002 Oct 14;2(1):18.},
-	Stat = {MEDLINE},
-	Title = {Birth and death of protein domains: a simple model of evolution explains power law behavior},
-	Volume = {2},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbHsIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABeHcMJWi5dQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUthcmV2AAAQAAgAAMFxjUkAAAARAAgAAMJW7gcAAAABABgARmx7AEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpLYXJldjoyMDAyLnBkZgAOABIACAAyADAAMAAyAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0thcmV2LzIwMDIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvS2FyZXYvMjAwMi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Karev:2003fr,
-	Abstract = {MOTIVATION: The distributions of many genome-associated quantities, including the membership of paralogous gene families can be approximated with power laws. We are interested in developing mathematical models of genome evolution that adequately account for the shape of these distributions and describe the evolutionary dynamics of their formation. RESULTS: We show that simple stochastic models of genome evolution lead to power-law asymptotics of protein domain family size distribution. These models, called Birth, Death and Innovation Models (BDIM), represent a special class of balanced birth-and-death processes, in which domain duplication and deletion rates are asymptotically equal up to the second order. The simplest, linear BDIM shows an excellent fit to the observed distributions of domain family size in diverse prokaryotic and eukaryotic genomes. However, the stochastic version of the linear BDIM explored here predicts that the actual size of large paralogous families is reached on an unrealistically long timescale. We show that introduction of non-linearity, which might be interpreted as interaction of a particular order between individual family members, allows the model to achieve genome evolution rates that are much better compatible with the current estimates of the rates of individual duplication/loss events.},
-	Address = {National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA.},
-	Annote = {interesting little review of power laws in various contexts
-in this article and Karev 2002 they estimate the number of families of protein sequences, not the number of folds},
-	Au = {Karev, GP and Wolf, YI and Koonin, EV},
-	Author = {Karev, Georgy P and Wolf, Yuri I and Koonin, Eugene V},
-	Da = {20031013},
-	Date-Added = {2007-04-25 11:23:48 -0700},
-	Date-Modified = {2007-04-26 17:06:44 -0700},
-	Dcom = {20040629},
-	Edat = {2003/10/14 05:00},
-	Group = {Fold Space; Reviewed},
-	Issn = {1367-4803 (Print)},
-	Jid = {9808944},
-	Journal = {Bioinformatics},
-	Jt = {Bioinformatics (Oxford, England)},
-	Keywords = {Computer Simulation; *Evolution, Molecular; Gene Deletion; Gene Duplication; *Genome; *Models, Genetic; *Models, Statistical; Mutation; Protein Structure, Tertiary/*genetics; Proteins/*genetics; *Stochastic Processes; Variation (Genetics)/*genetics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Karev/2003.pdf},
-	Mhda = {2004/06/30 05:00},
-	Number = {15},
-	Own = {NLM},
-	Pages = {1889--1900},
-	Pl = {England},
-	Pmid = {14555621},
-	Pst = {ppublish},
-	Pt = {Evaluation Studies; Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Bioinformatics. 2003 Oct 12;19(15):1889-900.},
-	Stat = {MEDLINE},
-	Title = {Simple stochastic birth and death models of genome evolution: was there enough time for us to evolve?},
-	Volume = {19},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbHsIMjAwMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABeGZcJWiMNQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUthcmV2AAAQAAgAAMFxjUkAAAARAAgAAMJW6zMAAAABABgARmx7AEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpLYXJldjoyMDAzLnBkZgAOABIACAAyADAAMAAzAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0thcmV2LzIwMDMucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvS2FyZXYvMjAwMy5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Luscombe:2002ys,
-	Abstract = {BACKGROUND: The sequencing of genomes provides us with an inventory of the 'molecular parts' in nature, such as protein families and folds, and their functions in living organisms. Through the analysis of such inventories, it has been shown that different genomes have very different usage of parts; for example, the common folds in the worm are very different from those in Escherichia coli. RESULTS: Despite these differences, we find that the genomic occurrence of generalized parts follows a well-known mathematical framework called the power law, with a few parts occurring many times and most occurring only a few times. This observation is true in a wide variety of genomic contexts. Earlier studies found power laws in a few specific cases, such as the occurrence of protein families. Here, we find many further cases of power-law behavior, for example in the occurrence of pseudogenes and in levels of gene expression. We show comprehensively that this behavior applies across many different genomes, for many different types of parts (DNA words, InterPro families, protein superfamilies and folds, pseudogene families and pseudomotifs), and for the many disparate attributes associated with these parts (their functions, interactions and expression levels). CONCLUSIONS: Power-law behavior provides a concise mathematical description of an important biological feature: the sheer dominance of a few members over the overall population. We present this behavior in a unified framework and propose that all these observations are connected to an underlying DNA duplication process as genomes evolved to their current state.},
-	Address = {Department of Molecular Biophysics and Biochemistry, Yale University, New Haven, CT 06520-8114, USA.},
-	Annote = {no estimate of N
-
-review of power law behavior in all sorts of things (folds, superfamilies, families, n-mers, etc. etc.)},
-	Au = {Luscombe, NM and Qian, J and Zhang, Z and Johnson, T and Gerstein, M},
-	Author = {Luscombe, Nicholas M and Qian, Jiang and Zhang, Zhaolei and Johnson, Ted and Gerstein, Mark},
-	Da = {20020820},
-	Date-Added = {2007-04-25 11:23:26 -0700},
-	Date-Modified = {2007-04-26 17:11:52 -0700},
-	Dcom = {20021010},
-	Dep = {20020725},
-	Edat = {2002/08/21 10:00},
-	Group = {Fold Space; Reviewed},
-	Issn = {1465-6914 (Electronic)},
-	Jid = {100960660},
-	Journal = {Genome Biol},
-	Jt = {Genome biology},
-	Keywords = {Animals; Caenorhabditis elegans/genetics; Computational Biology/methods/*statistics \& numerical data; DNA, Helminth/genetics; Gene Frequency/genetics; Genes, Dominant/*genetics; Genes, Duplicate/genetics; Genes, Helminth/genetics; Genetics, Behavioral; Genome; Multigene Family/genetics; Oligonucleotides/genetics; Protein Binding/genetics; Protein Folding; Protein Interaction Mapping/statistics \& numerical data; Saccharomyces cerevisiae/genetics; Saccharomyces cerevisiae Proteins/biosynthesis/physiology},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Luscombe/2002.pdf},
-	Lr = {20061115},
-	Mhda = {2002/10/11 04:00},
-	Number = {8},
-	Own = {NLM},
-	Pages = {RESEARCH0040},
-	Phst = {2002/03/05 {$[$}received{$]$}; 2002/04/19 {$[$}revised{$]$}; 2002/05/21 {$[$}accepted{$]$}; 2002/07/25 {$[$}aheadofprint{$]$}},
-	Pl = {England},
-	Pmid = {12186647},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print-Electronic},
-	Rn = {0 (DNA, Helminth); 0 (Oligonucleotides); 0 (Saccharomyces cerevisiae Proteins)},
-	Sb = {IM},
-	So = {Genome Biol. 2002 Jul 25;3(8):RESEARCH0040. Epub 2002 Jul 25.},
-	Stat = {MEDLINE},
-	Title = {The dominance of the population by a selected few: power-law behaviour applies to a wide variety of genomic properties},
-	Volume = {3},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbKsIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABeHn8JWjQNQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEx1c2NvbWJlABAACAAAwXGNSQAAABEACAAAwlbvcwAAAAEAGABGbKsARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkx1c2NvbWJlOjIwMDIucGRmAAAOABIACAAyADAAMAAyAC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0x1c2NvbWJlLzIwMDIucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0x1c2NvbWJlLzIwMDIucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Qian:2001rt,
-	Abstract = {Global surveys of genomes measure the usage of essential molecular parts, defined here as protein families, superfamilies or folds, in different organisms. Based on surveys of the first 20 completely sequenced genomes, we observe that the occurrence of these parts follows a power-law distribution. That is, the number of distinct parts (F) with a given genomic occurrence (V) decays as F=aV(-b), with a few parts occurring many times and most occurring infrequently. For a given organism, the distributions of families, superfamilies and folds are nearly identical, and this is reflected in the size of the decay exponent b. Moreover, the exponent varies between different organisms, with those of smaller genomes displaying a steeper decay (i.e. larger b). Clearly, the power law indicates a preference to duplicate genes that encode for molecular parts which are already common. Here, we present a minimal, but biologically meaningful model that accurately describes the observed power law. Although the model performs equally well for all three protein classes, we focus on the occurrence of folds in preference to families and superfamilies. This is because folds are comparatively insensitive to the effects of point mutations that can cause a family member to diverge beyond detectable similarity. In the model, genomes evolve through two basic operations: (i) duplication of existing genes; (ii) net flow of new genes. The flow term is closely related to the exponent b and can accommodate considerable gene loss; however, we demonstrate that the observed data is reproduced best with a net inflow, i.e. with more gene gain than loss. Moreover, we show that prokaryotes have much higher rates of gene acquisition than eukaryotes, probably reflecting lateral transfer. A further natural outcome from our model is an estimation of the fold composition of the initial genome, which potentially relates to the common ancestor for modern organisms. Supplementary material pertaining to this work is available from www.partslist.org/powerlaw.},
-	Address = {Department of Molecular Biophysics and Biochemistry, Yale University, 266 Whitney Avenue, New Haven, CT 06520-8114, USA.},
-	Annote = {no estimate of N
-
-they are looking at how folds occur in genomes, and how folds are transferred between them},
-	Au = {Qian, J and Luscombe, NM and Gerstein, M},
-	Author = {Qian, J and Luscombe, N M and Gerstein, M},
-	Ci = {Copyright 2001 Academic Press.},
-	Da = {20011107},
-	Date-Added = {2007-04-25 11:22:52 -0700},
-	Date-Modified = {2007-04-27 11:26:19 -0700},
-	Dcom = {20011205},
-	Doi = {10.1006/jmbi.2001.5079},
-	Edat = {2001/11/08 10:00},
-	Group = {Fold Space; Reviewed},
-	Issn = {0022-2836 (Print)},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology},
-	Keywords = {Animals; Computational Biology; Computer Simulation; *Evolution, Molecular; Genes, Duplicate/genetics; *Genome; Humans; Models, Genetic; *Multigene Family/genetics; *Protein Folding; Proteins/*chemistry/classification/*genetics/metabolism; Proteome},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Qian/2001.pdf},
-	Lr = {20061115},
-	Mhda = {2002/01/05 10:01},
-	Number = {4},
-	Own = {NLM},
-	Pages = {673--681},
-	Pii = {S0022-2836(01)95079-X},
-	Pl = {England},
-	Pmid = {11697896},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print},
-	Rn = {0 (Proteins); 0 (Proteome)},
-	Sb = {IM},
-	So = {J Mol Biol. 2001 Nov 2;313(4):673-81.},
-	Stat = {MEDLINE},
-	Title = {Protein family and fold occurrence in genomes: power-law behaviour and evolutionary model},
-	Volume = {313},
-	Year = {2001},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbOQIMjAwMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABeeGMJXjYNQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFFpYW4AEAAIAADBcY1JAAAAEQAIAADCV+/zAAAAAQAYAEZs5ABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6UWlhbjoyMDAxLnBkZgAADgASAAgAMgAwADAAMQAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9RaWFuLzIwMDEucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1FpYW4vMjAwMS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1006/jmbi.2001.5079}}
-
-@article{Govindarajan:1996vn,
-	Abstract = {Many biological proteins are observed to fold into one of a limited number of structural motifs. By considering the requirements imposed on proteins by their need to fold rapidly, and the ease with which such requirements can be fulfilled as a function of the native structure, we can explain why certain structures are repeatedly observed among proteins with negligible sequence similarity. This work has implications for the understanding of protein sequence structure relationships as well as protein evolution.},
-	Address = {Department of Chemistry, University of Michigan, Ann Arbor 48109-1055, USA.},
-	Annote = {no estimate of N
-
-nice little theory on how "foldability" might determine why we see some folds much more commonly},
-	Au = {Govindarajan, S and Goldstein, RA},
-	Author = {Govindarajan, S and Goldstein, R A},
-	Da = {19960618},
-	Date-Added = {2007-04-25 11:20:56 -0700},
-	Date-Modified = {2007-04-27 12:02:27 -0700},
-	Dcom = {19960618},
-	Edat = {1996/04/16},
-	Gr = {1R29 LM05770-01/LM/NLM},
-	Group = {Fold Space; Reviewed},
-	Issn = {0027-8424 (Print)},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci U S A},
-	Jt = {Proceedings of the National Academy of Sciences of the United States of America},
-	Keywords = {Evolution, Molecular; *Models, Chemical; Molecular Structure; Protein Folding; Proteins/*chemistry/genetics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Govindarajan/1996.pdf},
-	Lr = {20061115},
-	Mhda = {1996/04/16 00:01},
-	Number = {8},
-	Own = {NLM},
-	Pages = {3341--3345},
-	Pl = {UNITED STATES},
-	Pmid = {8622938},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Proc Natl Acad Sci U S A. 1996 Apr 16;93(8):3341-5.},
-	Stat = {MEDLINE},
-	Title = {Why are some proteins structures so common?},
-	Volume = {93},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGgAAAAAAGgAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbE4IMTk5Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABegSMJXlkFQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAADEdvdmluZGFyYWphbgAQAAgAAMFxjUkAAAARAAgAAMJX+LEAAAABABgARmxOAEZrzwBGatUARmobAEZkaABAiUMAAgBHaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpHb3ZpbmRhcmFqYW46MTk5Ni5wZGYAAA4AEgAIADEAOQA5ADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAENVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvR292aW5kYXJhamFuLzE5OTYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QKi4uLy4uLy4uLy4uL0FydGljbGVzL0dvdmluZGFyYWphbi8xOTk2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCXQJiAmsCdgJ6AogCjwKYAsUCygLNAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAto=}}
-
-@article{Yan:2005yq,
-	Abstract = {A major goal of structural genomics is the provision of a structural template for a large fraction of protein domains. The magnitude of this task depends on the number and nature of protein sequence families. With a large number of bacterial genomes now fully sequenced, it is possible to obtain improved estimates of the number and diversity of families in that kingdom. We have used an automated clustering procedure to group all sequences in a set of genomes into protein families. Bench-marking shows the clustering method is sensitive at detecting remote family members, and has a low level of false positives. This comprehensive protein family set has been used to address the following questions. (1) What is the structure coverage for currently known families? (2) How will the number of known apparent families grow as more genomes are sequenced? (3) What is a practical strategy for maximizing structure coverage in future? Our study indicates that approximately 20% of known families with three or more members currently have a representative structure. The study indicates also that the number of apparent protein families will be considerably larger than previously thought: We estimate that, by the criteria of this work, there will be about 250,000 protein families when 1000 microbial genomes have been sequenced. However, the vast majority of these families will be small, and it will be possible to obtain structural templates for 70-80% of protein domains with an achievable number of representative structures, by systematically sampling the larger families.},
-	Address = {Center for Advanced Research in Biotechnology, University of Maryland Biotechnology Institute, 9600 Gudelsky Drive, Rockville, MD 20850, USA.},
-	Annote = {no estimate of N (directly)
-Liu et al. fit of N(M) would give approx N=30,000 for M=250k
-
-they assume that we'll want a representative structure for each FAMILY, instead of FOLD
-estimate (from 140 genomes) that when 1000 have been sequenced, 250k protein families will be known},
-	Au = {Yan, Y and Moult, J},
-	Author = {Yan, Yongpan and Moult, John},
-	Da = {20051010},
-	Date-Added = {2007-04-25 10:31:41 -0700},
-	Date-Modified = {2007-05-01 03:10:53 -0700},
-	Dcom = {20060125},
-	Dep = {20050909},
-	Doi = {10.1016/j.jmb.2005.08.058},
-	Edat = {2005/09/28 09:00},
-	Gr = {GM57890/GM/NIGMS},
-	Group = {Fold Space; Reviewed},
-	Issn = {0022-2836 (Print)},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology},
-	Keywords = {*Genomics; Protein Conformation; Proteins/*chemistry/*genetics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Yan/2005.pdf},
-	Lr = {20061115},
-	Mhda = {2006/01/26 09:00},
-	Number = {3},
-	Own = {NLM},
-	Pages = {744--759},
-	Phst = {2005/03/25 {$[$}received{$]$}; 2005/08/18 {$[$}revised{$]$}; 2005/08/24 {$[$}accepted{$]$}; 2005/09/09 {$[$}aheadofprint{$]$}},
-	Pii = {S0022-2836(05)01012-0},
-	Pl = {England},
-	Pmid = {16185712},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, N.I.H., Extramural; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print-Electronic},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {J Mol Biol. 2005 Oct 28;353(3):744-59. Epub 2005 Sep 9.},
-	Stat = {MEDLINE},
-	Title = {Protein family clustering for structural genomics},
-	Volume = {353},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbVQIMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABdn6MJU3ylQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA1lhbgAAEAAIAADBcY1JAAAAEQAIAADCVUGZAAAAAQAYAEZtVABGa88ARmrVAEZqGwBGZGgAQIlDAAIAPmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6WWFuOjIwMDUucGRmAA4AEgAIADIAMAAwADUALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvWWFuLzIwMDUucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvWWFuLzIwMDUucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1016/j.jmb.2005.08.058}}
-
-@article{Ubarretxena-Belandia:2001kx,
-	Abstract = {During the past year, research on helical membrane proteins has brought insights into the use of deviations from canonical alpha-helical conformation to support function and the further investigation of the sequestration of protein regions from the lipid bilayer to enhance these structural alternatives. Also, the structural roles of polar sidechains, the identification of motifs in helix interactions and the significance of certain topologies on a genome-wide scale have been further explored.},
-	Address = {Department of Molecular Biophysics and Biochemistry, Yale University, 266 Whitney Avenue, PO Box 208114, New Haven, CT 06520-8114, USA.},
-	Annote = {doesn't look too relevant},
-	Au = {Ubarretxena-Belandia, I and Engelman, DM},
-	Author = {Ubarretxena-Belandia, I and Engelman, D M},
-	Da = {20010614},
-	Date-Added = {2007-04-25 10:26:38 -0700},
-	Date-Modified = {2007-04-27 11:28:23 -0700},
-	Dcom = {20010823},
-	Edat = {2001/06/19 10:00},
-	Group = {Fold Space; Reviewed},
-	Issn = {0959-440X (Print)},
-	Jid = {9107784},
-	Journal = {Curr Opin Struct Biol},
-	Jt = {Current opinion in structural biology},
-	Keywords = {Membrane Lipids/chemistry/metabolism; Membrane Proteins/*chemistry/*metabolism; Protein Conformation; *Protein Folding},
-	Language = {eng},
-	Lr = {20061115},
-	Mhda = {2001/08/24 10:01},
-	Number = {3},
-	Own = {NLM},
-	Pages = {370--376},
-	Pii = {S0959-440X(00)00217-7},
-	Pl = {England},
-	Pmid = {11406389},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.; Review},
-	Pubm = {Print},
-	Rf = {59},
-	Rn = {0 (Membrane Lipids); 0 (Membrane Proteins)},
-	Sb = {IM},
-	So = {Curr Opin Struct Biol. 2001 Jun;11(3):370-6.},
-	Stat = {MEDLINE},
-	Title = {Helical membrane proteins: diversity of functions in the context of simple architecture},
-	Volume = {11},
-	Year = {2001}}
-
-@article{Norvell:2000fj,
-	Address = {norvellj{\char64}nigms.nih.gov},
-	Au = {Norvell, JC and Machalek, AZ},
-	Author = {Norvell, J C and Machalek, A Z},
-	Da = {20001204},
-	Date-Added = {2007-04-25 10:09:46 -0700},
-	Date-Modified = {2007-04-27 11:38:18 -0700},
-	Dcom = {20001214},
-	Doi = {10.1038/80694},
-	Edat = {2000/12/05 11:00},
-	Group = {Fold Space; Reviewed},
-	Issn = {1072-8368 (Print)},
-	Jid = {9421566},
-	Journal = {Nat Struct Biol},
-	Jt = {Nature structural biology},
-	Keywords = {Animals; Computational Biology/*economics/methods; Crystallography, X-Ray; Genomics/*economics/methods; Humans; International Cooperation; Models, Molecular; *National Institutes of Health (U.S.)/economics; Proteins/*chemistry/economics/genetics/*metabolism; Structure-Activity Relationship; United States},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Norvell/2000.pdf},
-	Lr = {20041117},
-	Mhda = {2001/02/28 10:01},
-	Own = {NLM},
-	Pages = {931},
-	Pl = {UNITED STATES},
-	Pmid = {11103990},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Nat Struct Biol. 2000 Nov;7 Suppl:931.},
-	Stat = {MEDLINE},
-	Title = {Structural genomics programs at the US National Institute of General Medical Sciences},
-	Volume = {7 Suppl},
-	Year = {2000},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbMoIMjAwMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABeeycJXkQdQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB05vcnZlbGwAABAACAAAwXGNSQAAABEACAAAwlfzdwAAAAEAGABGbMoARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOk5vcnZlbGw6MjAwMC5wZGYADgASAAgAMgAwADAAMAAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Ob3J2ZWxsLzIwMDAucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTm9ydmVsbC8yMDAwLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1038/80694}}
-
-@article{Wong:2006uq,
-	Abstract = {Fold designability has been estimated by the number of families contained in that fold. Here, we show that among orthologous proteins, sequence divergence is higher for folds with greater numbers of families. Folds with greater numbers of families also tend to have families that appear more often in the proteome and greater promiscuity (the number of unique "partner" folds that the fold is found with within the same protein). We also find that many disease-related proteins have folds with relatively few families. In particular, a number of these proteins are associated with diseases occurring at high frequency. These results suggest that family counts reflect how certain structures are distributed in nature and is an important characteristic associated with many human diseases.},
-	Address = {Institute for Bioinformatics, GSF--National Research Center for Environment and Health, Neuherberg, Germany.},
-	Annote = {no estimate of N
-
-they use SCOP to assign folds
-find that ancient folds have more families and are more designable, more widespread in proteomes
-hereditary disease associated proteins have lower designability},
-	Au = {Wong, P and Frishman, D},
-	Author = {Wong, Philip and Frishman, Dmitrij},
-	Da = {20060605},
-	Date-Added = {2007-04-25 10:09:16 -0700},
-	Date-Modified = {2007-04-25 10:19:11 -0700},
-	Dep = {20060505},
-	Doi = {10.1371/journal.pcbi.0020040},
-	Edat = {2006/05/09 09:00},
-	Group = {Fold Space; Reviewed},
-	Issn = {1553-7358 (Electronic)},
-	Jid = {101238922},
-	Journal = {PLoS Comput Biol},
-	Jt = {PLoS computational biology},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wong/2006.pdf},
-	Mhda = {2006/05/09 09:00},
-	Number = {5},
-	Own = {NLM},
-	Pages = {e40},
-	Phst = {2005/11/02 {$[$}received{$]$}; 2006/03/17 {$[$}accepted{$]$}; 2006/05/05 {$[$}aheadofprint{$]$}},
-	Pl = {United States},
-	Pmid = {16680196},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print-Electronic},
-	Sb = {IM},
-	So = {PLoS Comput Biol. 2006 May;2(5):e40. Epub 2006 May 5.},
-	Stat = {In-Process},
-	Title = {Fold designability, distribution, and disease},
-	Volume = {2},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbU0IMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABdnWMJU2hVQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFdvbmcAEAAIAADBcY1JAAAAEQAIAADCVTyFAAAAAQAYAEZtTQBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6V29uZzoyMDA2LnBkZgAADgASAAgAMgAwADAANgAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Xb25nLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1dvbmcvMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1371/journal.pcbi.0020040}}
-
-@article{Thornton:1999qy,
-	Abstract = {The evolution of proteins and their functions is reviewed from a structural perspective in the light of the current database. Protein domain families segregate unequally between the three major classes, the 32 different architectures and almost 700 folds observed to date. We find that the number of new topologies is still increasing, although 25 new structures are now determined for each new topology. The corresponding analysis and classification of function is only just beginning, fuelled by the genome data. The structural data revealed unexpected conservations and divergence of function both within and between families. The next five years will see the compilation of a definitive dictionary of protein families and their related functions, based on structural data which reveals relationships hidden at the sequence level. Such information will provide the foundation to build a better understanding of the molecular basis of biological complexity and hopefully to facilitate rational molecular design.},
-	Address = {Biochemistry and Molecular Biology Department, University College London, University of London, Gower Street, London, WC1E 6BT, UK. thornton@biochem.ucl.ac.uk},
-	Annote = {no estimate of N
-
-nice overview of the diversity of families and folds},
-	Au = {Thornton, JM and Orengo, CA and Todd, AE and Pearl, FM},
-	Author = {Thornton, J M and Orengo, C A and Todd, A E and Pearl, F M},
-	Ci = {Copyright 1999 Academic Press.},
-	Da = {19991119},
-	Date-Added = {2007-04-25 10:08:41 -0700},
-	Date-Modified = {2007-04-27 11:47:55 -0700},
-	Dcom = {19991119},
-	Doi = {10.1006/jmbi.1999.3054},
-	Edat = {1999/10/26},
-	Group = {Fold Space; Reviewed},
-	Issn = {0022-2836 (Print)},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology},
-	Keywords = {Animals; Databases, Factual; Enzymes/chemistry/classification/metabolism; *Evolution, Molecular; Genome; Humans; Phylogeny; *Protein Folding; Protein Structure, Secondary; Proteins/*chemistry/classification/*metabolism; Structure-Activity Relationship},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Thornton/1999.pdf},
-	Lr = {20061115},
-	Mhda = {1999/10/26 00:01},
-	Number = {2},
-	Own = {NLM},
-	Pages = {333--342},
-	Pii = {S0022-2836(99)93054-1},
-	Pl = {ENGLAND},
-	Pmid = {10529349},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Review},
-	Pubm = {Print},
-	Rf = {35},
-	Rn = {0 (Enzymes); 0 (Proteins)},
-	Sb = {IM; S},
-	So = {J Mol Biol. 1999 Oct 22;293(2):333-42.},
-	Stat = {MEDLINE},
-	Title = {Protein folds, functions and evolution},
-	Volume = {293},
-	Year = {1999},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbTAIMTk5OS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABefx8JXkspQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACFRob3JudG9uABAACAAAwXGNSQAAABEACAAAwlf1OgAAAAEAGABGbTAARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlRob3JudG9uOjE5OTkucGRmAAAOABIACAAxADkAOQA5AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1Rob3JudG9uLzE5OTkucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL1Rob3JudG9uLzE5OTkucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1006/jmbi.1999.3054}}
-
-@article{Stevens:2001fk,
-	Abstract = {A worldwide initiative in structural genomics aims to capitalize on the recent successes of the genome projects. Substantial new investments in structural genomics in the past 2 years indicate the high level of support for these international efforts. Already, enormous progress has been made on high-throughput methodologies and technologies that will speed up macromolecular structure determinations. Recent international meetings have resulted in the formation of an International Structural Genomics Organization to formulate policy and foster cooperation between the public and private efforts.},
-	Address = {Joint Center for Structural Genomics, Scripps Research Institute, 10550 North Torrey Pines Road, La Jolla, CA 92037, USA.},
-	Annote = {no estimate of N
-
-they think 16k structures will cover most of fold space},
-	Au = {Stevens, RC and Yokoyama, S and Wilson, IA},
-	Author = {Stevens, R C and Yokoyama, S and Wilson, I A},
-	Da = {20011005},
-	Date-Added = {2007-04-25 10:07:57 -0700},
-	Date-Modified = {2007-04-27 11:31:19 -0700},
-	Dcom = {20011025},
-	Doi = {10.1126/science.1066011},
-	Edat = {2001/10/06 10:00},
-	Gr = {P50 GM62411/GM/NIGMS},
-	Group = {Fold Space; Reviewed},
-	Issn = {0036-8075 (Print)},
-	Jid = {0404511},
-	Journal = {Science},
-	Jt = {Science (New York, N.Y.)},
-	Keywords = {Animals; *Computational Biology; Congresses; Costs and Cost Analysis; Crystallography, X-Ray; Databases, Factual; *Genomics; Guidelines; Humans; Information Management; Information Services; International Cooperation; Internet; Nuclear Magnetic Resonance, Biomolecular; Patents; Private Sector; *Protein Conformation; Protein Folding; Proteins/*chemistry; *Proteome; Public Sector; Publishing; Technology Transfer},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Stevens/2001.pdf},
-	Lr = {20070319},
-	Mhda = {2001/10/26 10:01},
-	Number = {5540},
-	Own = {NLM},
-	Pages = {89--92},
-	Pii = {294/5540/89},
-	Pl = {United States},
-	Pmid = {11588249},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Proteins); 0 (Proteome)},
-	Sb = {IM},
-	So = {Science. 2001 Oct 5;294(5540):89-92.},
-	Stat = {MEDLINE},
-	Title = {Global efforts in structural genomics},
-	Volume = {294},
-	Year = {2001},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbSEIMjAwMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABeeYMJXjxBQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1N0ZXZlbnMAABAACAAAwXGNSQAAABEACAAAwlfxgAAAAAEAGABGbSEARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlN0ZXZlbnM6MjAwMS5wZGYADgASAAgAMgAwADAAMQAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9TdGV2ZW5zLzIwMDEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvU3RldmVucy8yMDAxLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1126/science.1066011}}
-
-@article{Chothia:1992lr,
-	Annote = {N = 1000
-
-Actually estimated 1440 (~1500) = 3 * 4 * 120, then says that's an overestimate and goes down to 1000.
-THE original paper on this stuff that everybody cites.},
-	Au = {Chothia, C},
-	Author = {Chothia, C},
-	Da = {19920721},
-	Date-Added = {2007-04-25 09:34:58 -0700},
-	Date-Modified = {2007-04-30 14:49:53 -0700},
-	Dcom = {19920721},
-	Doi = {10.1038/357543a0},
-	Edat = {1992/06/18},
-	Group = {Fold Space; Reviewed; Cited},
-	Issn = {0028-0836 (Print)},
-	Jid = {0410462},
-	Journal = {Nature},
-	Jt = {Nature},
-	Keywords = {Animals; Base Sequence; Databases, Factual; Evolution; Humans; Proteins/*classification/*genetics; Sequence Homology, Nucleic Acid},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Chothia/1992.pdf},
-	Lr = {20041117},
-	Mhda = {1992/06/18 00:01},
-	Number = {6379},
-	Own = {NLM},
-	Pages = {543--544},
-	Pl = {ENGLAND},
-	Pmid = {1608464},
-	Pst = {ppublish},
-	Pt = {News},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Nature. 1992 Jun 18;357(6379):543-4.},
-	Stat = {MEDLINE},
-	Title = {Proteins. One thousand families for the molecular biologist},
-	Volume = {357},
-	Year = {1992},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbBMIMTk5Mi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABfBbMJbeENQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0Nob3RoaWEAABAACAAAwXGNSQAAABEACAAAwlvaswAAAAEAGABGbBMARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkNob3RoaWE6MTk5Mi5wZGYADgASAAgAMQA5ADkAMgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9DaG90aGlhLzE5OTIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ2hvdGhpYS8xOTkyLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1038/357543a0}}
-
-@article{Richardson:1981kx,
-	Au = {Richardson, JS},
-	Author = {Richardson, J S},
-	Da = {19810922},
-	Date-Added = {2007-04-25 09:29:09 -0700},
-	Date-Modified = {2007-04-30 13:27:33 -0700},
-	Dcom = {19810922},
-	Edat = {1981/01/01},
-	Gr = {GM-15000/GM/NIGMS},
-	Group = {Fold Space; Reviewed},
-	Issn = {0065-3233 (Print)},
-	Jid = {0116732},
-	Journal = {Adv Protein Chem},
-	Jt = {Advances in protein chemistry},
-	Keywords = {Amino Acid Sequence; Animals; Enzymes; Humans; Macromolecular Substances; Models, Molecular; *Protein Conformation; *Proteins; Terminology},
-	Language = {eng},
-	Lr = {20061115},
-	Mhda = {1981/01/01 00:01},
-	Own = {NLM},
-	Pages = {167--339},
-	Pl = {UNITED STATES},
-	Pmid = {7020376},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, P.H.S.; Review},
-	Pubm = {Print},
-	Rf = {299},
-	Rn = {0 (Enzymes); 0 (Macromolecular Substances); 0 (Proteins)},
-	Sb = {IM},
-	So = {Adv Protein Chem. 1981;34:167-339.},
-	Stat = {MEDLINE},
-	Title = {The anatomy and taxonomy of protein structure},
-	Volume = {34},
-	Year = {1981}}
-
-@article{Ptitsyn:1980fj,
-	Abstract = {Another review of basic protein structure.},
-	Au = {Ptitsyn, OB and Finkelstein, AV},
-	Author = {Ptitsyn, O B and Finkelstein, A V},
-	Da = {19810623},
-	Date-Added = {2007-04-25 09:27:58 -0700},
-	Date-Modified = {2007-04-30 16:10:08 -0700},
-	Dcom = {19810623},
-	Edat = {1980/08/01},
-	Group = {Fold Space; Reviewed},
-	Issn = {0033-5835 (Print)},
-	Jid = {0144032},
-	Journal = {Q Rev Biophys},
-	Jt = {Quarterly reviews of biophysics},
-	Keywords = {Amino Acid Sequence; Chemistry, Physical; *Evolution; *Protein Conformation; *Proteins},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Ptitsyn/1980.pdf},
-	Lr = {20061115},
-	Mhda = {1980/08/01 00:01},
-	Number = {3},
-	Oid = {NASA: 81175636},
-	Own = {NLM},
-	Pages = {339--386},
-	Pl = {ENGLAND},
-	Pmid = {7012894},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article; Review},
-	Pubm = {Print},
-	Rf = {127},
-	Rn = {0 (Proteins)},
-	Sb = {IM; S},
-	So = {Q Rev Biophys. 1980 Aug;13(3):339-86.},
-	Stat = {MEDLINE},
-	Title = {Similarities of protein topologies: evolutionary divergence, functional convergence or principles of folding?},
-	Volume = {13},
-	Year = {1980},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbOMIMTk4MC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABfMv8JbxTxQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1B0aXRzeW4AABAACAAAwXGNSQAAABEACAAAwlwnrAAAAAEAGABGbOMARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlB0aXRzeW46MTk4MC5wZGYADgASAAgAMQA5ADgAMAAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9QdGl0c3luLzE5ODAucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUHRpdHN5bi8xOTgwLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Levitt:1976uq,
-	Abstract = {A simple diagrammatic representation has been used to show the arrangement of alpha helices and beta sheets in 31 globular proteins, which are classified into four clearly separated classes. The observed arrangements are significantly non-random in that pieces of secondary structure adjacent in sequence along the polypeptide chain are also often in contact in three dimensions.},
-	Annote = {Nice look at protein topologies.
-
-Separate proteins into 4 classes:
-(1) all alpha
-(2) all beta
-(3) alpha + beta (don't mix)
-(4) alpha/beta (alpha and beta interact)
-
-Is this the basis of the SCOP classification?},
-	Au = {Levitt, M and Chothia, C},
-	Author = {Levitt, M and Chothia, C},
-	Da = {19760901},
-	Date-Added = {2007-04-25 09:26:56 -0700},
-	Date-Modified = {2007-04-30 13:39:26 -0700},
-	Dcom = {19760901},
-	Edat = {1976/06/17},
-	Group = {Fold Space; Reviewed},
-	Issn = {0028-0836 (Print)},
-	Jid = {0410462},
-	Journal = {Nature},
-	Jt = {Nature},
-	Keywords = {Models, Structural; *Protein Conformation},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Levitt/1976.pdf},
-	Lr = {20001218},
-	Mhda = {1976/06/17 00:01},
-	Number = {5561},
-	Own = {NLM},
-	Pages = {552--558},
-	Pl = {ENGLAND},
-	Pmid = {934293},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	So = {Nature. 1976 Jun 17;261(5561):552-8.},
-	Stat = {MEDLINE},
-	Title = {Structural patterns in globular proteins},
-	Volume = {261},
-	Year = {1976},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbJ8IMTk3Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABfIgcJboVJQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkxldml0dAAQAAgAAMFxjUkAAAARAAgAAMJcA8IAAAABABgARmyfAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpMZXZpdHQ6MTk3Ni5wZGYAAA4AEgAIADEAOQA3ADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTGV2aXR0LzE5NzYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0xldml0dC8xOTc2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Berman:2002qy,
-	Abstract = {The Protein Data Bank {$[$}PDB; Berman, Westbrook et al. (2000), Nucleic Acids Res. 28, 235-242; http://www.pdb.org/] is the single worldwide archive of primary structural data of biological macromolecules. Many secondary sources of information are derived from PDB data. It is the starting point for studies in structural bioinformatics. This article describes the goals of the PDB, the systems in place for data deposition and access, how to obtain further information and plans for the future development of the resource. The reader should come away with an understanding of the scope of the PDB and what is provided by the resource.},
-	Address = {RCSB, Department of Chemistry, Rutgers, The State University of New Jersey, 610 Taylor Road, Piscataway, NJ, USA. berman@rcsb.rutgers.edu},
-	Au = {Berman, HM and Battistuz, T and Bhat, TN and Bluhm, WF and Bourne, PE and Burkhardt, K and Feng, Z and Gilliland, GL and Iype, L and Jain, S and Fagan, P and Marvin, J and Padilla, D and Ravichandran, V and Schneider, B and Thanki, N and Weissig, H and Westbrook, JD and Zardecki, C},
-	Author = {Berman, Helen M and Battistuz, Tammy and Bhat, T N and Bluhm, Wolfgang F and Bourne, Philip E and Burkhardt, Kyle and Feng, Zukang and Gilliland, Gary L and Iype, Lisa and Jain, Shri and Fagan, Phoebe and Marvin, Jessica and Padilla, David and Ravichandran, Veerasamy and Schneider, Bohdan and Thanki, Narmada and Weissig, Helge and Westbrook, John D and Zardecki, Christine},
-	Da = {20020530},
-	Date-Added = {2007-04-25 09:25:10 -0700},
-	Date-Modified = {2007-04-26 17:13:38 -0700},
-	Dcom = {20021127},
-	Dep = {20020529},
-	Edat = {2002/05/31 10:00},
-	Group = {Fold Space; Reviewed},
-	Issn = {0907-4449 (Print)},
-	Jid = {9305878},
-	Journal = {Acta Crystallogr D Biol Crystallogr},
-	Jt = {Acta crystallographica. Section D, Biological crystallography},
-	Keywords = {*Databases, Protein/standards; Information Storage and Retrieval; Internet},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Berman/2002.pdf},
-	Lr = {20061115},
-	Mhda = {2002/11/28 04:00},
-	Number = {Pt 6 No 1},
-	Own = {NLM},
-	Pages = {899--907},
-	Phst = {2001/12/01 {$[$}received{$]$}; 2002/02/21 {$[$}accepted{$]$}; 2002/05/29 {$[$}epublish{$]$}},
-	Pii = {S0907444902003451},
-	Pl = {Denmark},
-	Pmid = {12037327},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print-Electronic},
-	Sb = {IM},
-	So = {Acta Crystallogr D Biol Crystallogr. 2002 Jun;58(Pt 6 No 1):899-907. Epub 2002 May 29.},
-	Stat = {MEDLINE},
-	Title = {The Protein Data Bank},
-	Volume = {58},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa+8IMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABeH1MJWjjJQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkJlcm1hbgAQAAgAAMFxjUkAAAARAAgAAMJW8KIAAAABABgARmvvAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpCZXJtYW46MjAwMi5wZGYAAA4AEgAIADIAMAAwADIALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvQmVybWFuLzIwMDIucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0Jlcm1hbi8yMDAyLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Orengo:1994fk,
-	Abstract = {As the protein sequence and structure databases expand rapidly a better understanding of the relationships between proteins is required. A classification is considered that extends the sequence-based superfamilies to include proteins with similar function and three-dimensional structures but no sequence similarity. So far there are only nine protein folds known to recur in proteins having neither sequence nor functional similarity. These folds dominate the structure database, representing more than 30 per cent of all determined structures. This observation has implications for protein-fold recognition.},
-	Address = {Biochemistry and Molecular Biology Department, University College London, UK.},
-	Annote = {N = 7920
-
-They claim this is an overestimate, use same method as Chothia.},
-	Au = {Orengo, CA and Jones, DT and Thornton, JM},
-	Author = {Orengo, C A and Jones, D T and Thornton, J M},
-	Da = {19950112},
-	Date-Added = {2007-04-25 09:17:20 -0700},
-	Date-Modified = {2007-04-30 14:49:53 -0700},
-	Dcom = {19950112},
-	Doi = {10.1038/372631a0},
-	Edat = {1994/12/15},
-	Group = {Fold Space; Cited; Reviewed},
-	Issn = {0028-0836 (Print)},
-	Jid = {0410462},
-	Journal = {Nature},
-	Jt = {Nature},
-	Keywords = {Algorithms; Databases, Factual; Protein Conformation; *Protein Folding; Proteins/*chemistry/*classification; Sequence Alignment; Structure-Activity Relationship},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Orengo/1994.pdf},
-	Lr = {20001218},
-	Mhda = {1994/12/15 00:01},
-	Number = {6507},
-	Own = {NLM},
-	Pages = {631--634},
-	Pl = {ENGLAND},
-	Pmid = {7990952},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Nature. 1994 Dec 15;372(6507):631-4.},
-	Stat = {MEDLINE},
-	Title = {Protein superfamilies and domain superfolds},
-	Volume = {372},
-	Year = {1994},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbM8IMTk5NC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABfBBMJbdTBQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABk9yZW5nbwAQAAgAAMFxjUkAAAARAAgAAMJb16AAAAABABgARmzPAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpPcmVuZ286MTk5NC5wZGYAAA4AEgAIADEAOQA5ADQALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvT3JlbmdvLzE5OTQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL09yZW5nby8xOTk0LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1038/372631a0}}
-
-@article{Coulson:2002lr,
-	Abstract = {As more and more protein structures are determined, there is increasing interest in the question of how many different folds have been used in biology. The history of the rate of discovery of new folds and the distribution of sequence families among known folds provide a means of estimating the underlying distribution of fold use. Previous models exploiting these data have led to rather different conclusions on the total number of folds. We present a new model, based on the notion that the folds used in biology fall naturally into three classes: unifolds, that is, folds found only in a single narrow sequence family; mesofolds, found in an intermediate number of families; and the previously noted superfolds, found in many protein families. We show that this model fits the available data well and has predicted the development of SCOP over the past 2 years. The principle implications of the model are as follows: (1) The vast majority of folds will be found in only a single sequence family; (2) the total number of folds is at least 10,000; and (3) 80% of sequence families have one of about 400 folds, most of which are already known.},
-	Address = {Institute of Cell and Molecular Biology, University of Edinburgh, Edinburgh, Scotland. a.coulson@ed.ac.uk},
-	Annote = {N = 2279, 4596, 9703 for M = 10k, 23k, 50k
-
-(1) the vast majority of folds will be found in only a single sequence family
-(2) the total number of folds is at least 10,000
-(3) 80% of sequence families have one of about 400folds, most of which are already known},
-	Au = {Coulson, AF and Moult, J},
-	Author = {Coulson, Andrew F W and Moult, John},
-	Ci = {Copyright 2001 Wiley-Liss, Inc.},
-	Da = {20011217},
-	Date-Added = {2007-04-25 09:16:16 -0700},
-	Date-Modified = {2007-04-30 14:49:53 -0700},
-	Dcom = {20020305},
-	Edat = {2001/12/18 10:00},
-	Gr = {P01 GM5790/GM/NIGMS},
-	Group = {Fold Space; Reviewed; Cited},
-	Issn = {0887-3585 (Print)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Keywords = {Evolution, Molecular; Models, Chemical; Protein Conformation; *Protein Folding},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Coulson/2002},
-	Lr = {20061115},
-	Mhda = {2002/03/07 10:01},
-	Number = {1},
-	Own = {NLM},
-	Pages = {61--71},
-	Pii = {10.1002/prot.10011},
-	Pl = {United States},
-	Pmid = {11746703},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Sb = {IM},
-	So = {Proteins. 2002 Jan 1;46(1):61-71.},
-	Stat = {MEDLINE},
-	Title = {A unifold, mesofold, and superfold model of protein fold use},
-	Volume = {46},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGAAAAAAAGAAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbBsEMjAwMgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABedFMJXi5NQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0NvdWxzb24AABAACAAAwXGNSQAAABEACAAAwlfuAwAAAAEAGABGbBsARmvPAEZq1QBGahsARmRoAECJQwACAD5oc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkNvdWxzb246MjAwMgAOAAoABAAyADAAMAAyAA8ACAADAGgAcwByABIAOlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Db3Vsc29uLzIwMDIAEwABLwAAFQACABP//wAA0h4fICFYJGNsYXNzZXNaJGNsYXNzbmFtZaMhIiNdTlNNdXRhYmxlRGF0YVZOU0RhdGFYTlNPYmplY3RfECEuLi8uLi8uLi8uLi9BcnRpY2xlcy9Db3Vsc29uLzIwMDLSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5Aj0CQgJLAlYCWgJoAm8CeAKcAqECpAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAKx}}
-
-@article{Wang:1996fk,
-	Abstract = {Many protein structures have now been determined and reveal that protein molecules can adopt the same fold despite having very different sequences. It has been suggested that, owing to different stereochemical constraints, the number of ways that a sequence can fold may be limited. Therefore, it is reasonable to ask how many fold types exist in nature. Several groups have tackled this problem with very different results. In the present study, a novel statistical sampling approach is used to reestimate this number. The results suggest that the number of protein folds in nature is probably several hundreds.},
-	Address = {National Laboratory of Biomacromolecules, Institute of Biophysics, Academia Sinica, Beijing, People's Republic of China.},
-	Annote = {N = ~350
-
-Uses a simple statistical estimate, and clearly this number is a bit too small since more folds than this are already documented in SCOP.},
-	Au = {Wang, ZX},
-	Author = {Wang, Z X},
-	Da = {19970218},
-	Date-Added = {2007-04-25 09:08:34 -0700},
-	Date-Modified = {2007-04-30 14:49:53 -0700},
-	Dcom = {19970218},
-	Doi = {10.1002/(SICI)1097-0134(199610)26:2<186::AID-PROT8>3.0.CO;2-E},
-	Edat = {1996/10/01},
-	Group = {Fold Space; Reviewed; Cited},
-	Issn = {0887-3585 (Print)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Keywords = {Likelihood Functions; *Protein Folding; Stereoisomerism},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wang/1996.pdf},
-	Lr = {20061115},
-	Mhda = {2000/06/20 09:00},
-	Number = {2},
-	Own = {NLM},
-	Pages = {186--191},
-	Pii = {10.1002/(SICI)1097-0134(199610)26:2<186::AID-PROT8>3.0.CO;2-E},
-	Pl = {UNITED STATES},
-	Pmid = {8916226},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print},
-	Sb = {IM},
-	So = {Proteins. 1996 Oct;26(2):186-91.},
-	Stat = {MEDLINE},
-	Title = {How many fold types of protein are there in nature?},
-	Volume = {26},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbUAIMTk5Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABe+38Jbc4JQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFdhbmcAEAAIAADBcY1JAAAAEQAIAADCW9XyAAAAAQAYAEZtQABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6V2FuZzoxOTk2LnBkZgAADgASAAgAMQA5ADkANgAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9XYW5nLzE5OTYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1dhbmcvMTk5Ni5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1002/(SICI)1097-0134(199610)26:2%3C186::AID-PROT8%3E3.0.CO;2-E}}
-
-@article{Zhang:2001lr,
-	Abstract = {Advances in methods of structure determination have led to the accumulation of large amounts of protein structural data. Some 500 distinct protein folds have now been characterized, representing one-third of all globular folds that exist. The range of known structural types and the relatively large fraction of the protein universe that has already been sampled have greatly facilitated the discovery of some unifying principles governing protein structure and evolutionary relationships. These include a highly skewed distribution of topological arrangements of secondary-structure elements that favors a few very common connectivities and a highly skewed distribution in the capacity of folds to accommodate unrelated sequences. These and other observations suggest that the number of folds is far fewer than the number of genes, and that the fold universe is dominated by a small number of giant attractors that accommodate large numbers of unrelated sequences. Thus all basic protein folds will likely be determined in the near future, laying the foundation for a comprehensive understanding of the biochemical and cellular functions of whole organisms.},
-	Address = {Department of Chemistry and E. O. Lawrence Berkeley National Laboratory, University of California, Berkeley 94720, USA.},
-	Annote = {N = 1300
-
-assume a geometrical distribution
-assume uniform sampling from Nature's folds, exclude superfolds from theoretical model,use SCOP 1.48},
-	Au = {Zhang, C and DeLisi, C},
-	Author = {Zhang, C and DeLisi, C},
-	Da = {20010302},
-	Date-Added = {2007-04-25 09:07:39 -0700},
-	Date-Modified = {2007-04-30 14:49:53 -0700},
-	Dcom = {20010315},
-	Edat = {2001/03/07 10:00},
-	Group = {Fold Space; Reviewed; Cited},
-	Issn = {1420-682X (Print)},
-	Jid = {9705402},
-	Journal = {Cell Mol Life Sci},
-	Jt = {Cellular and molecular life sciences : CMLS},
-	Keywords = {Amino Acid Motifs; Databases, Factual; Models, Molecular; *Protein Folding; Protein Structure, Secondary; Protein Structure, Tertiary; Proteins/*chemistry/metabolism},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Zhang/2001.pdf},
-	Lr = {20051116},
-	Mhda = {2001/03/17 10:01},
-	Number = {1},
-	Own = {NLM},
-	Pages = {72--79},
-	Pl = {Switzerland},
-	Pmid = {11229818},
-	Pst = {ppublish},
-	Pt = {Journal Article; Review},
-	Pubm = {Print},
-	Rf = {60},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Cell Mol Life Sci. 2001 Jan;58(1):72-9.},
-	Stat = {MEDLINE},
-	Title = {Protein folds: molecular systematics in three dimensions},
-	Volume = {58},
-	Year = {2001},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbVkIMjAwMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABeemcJXj/RQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVpoYW5nAAAQAAgAAMFxjUkAAAARAAgAAMJX8mQAAAABABgARm1ZAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpaaGFuZzoyMDAxLnBkZgAOABIACAAyADAAMAAxAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1poYW5nLzIwMDEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvWmhhbmcvMjAwMS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Chothia:1990lr,
-	Address = {MRC Laboratory of Molecular Biology, Cambridge, England.},
-	Annote = {No estimate of N.
-
-Nice tutorial on protein folding and folding patterns.},
-	Au = {Chothia, C and Finkelstein, AV},
-	Author = {Chothia, C and Finkelstein, A V},
-	Da = {19900830},
-	Date-Added = {2007-04-25 08:40:37 -0700},
-	Date-Modified = {2007-04-30 13:14:12 -0700},
-	Dcom = {19900830},
-	Doi = {10.1146/annurev.bi.59.070190.005043},
-	Edat = {1990/01/01},
-	Group = {Fold Space; Reviewed},
-	Issn = {0066-4154 (Print)},
-	Jid = {2985150R},
-	Journal = {Annu Rev Biochem},
-	Jt = {Annual review of biochemistry},
-	Keywords = {Amino Acid Sequence; Chemistry, Physical; Molecular Sequence Data; *Protein Conformation; Thermodynamics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Chothia/1990.pdf},
-	Lr = {20061115},
-	Mhda = {1990/01/01 00:01},
-	Own = {NLM},
-	Pages = {1007--1039},
-	Pl = {UNITED STATES},
-	Pmid = {2197975},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Review},
-	Pubm = {Print},
-	Rf = {95},
-	Sb = {IM},
-	So = {Annu Rev Biochem. 1990;59:1007-39.},
-	Stat = {MEDLINE},
-	Title = {The classification and origins of protein folding patterns},
-	Volume = {59},
-	Year = {1990},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbBMIMTk5MC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABfBxcJbev9QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0Nob3RoaWEAABAACAAAwXGNSQAAABEACAAAwlvdbwAAAAEAGABGbBMARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkNob3RoaWE6MTk5MC5wZGYADgASAAgAMQA5ADkAMAAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9DaG90aGlhLzE5OTAucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ2hvdGhpYS8xOTkwLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1146/annurev.bi.59.070190.005043}}
-
-@article{Holm:1997zr,
-	Abstract = {The FSSP database presents a continuously updated structural classification of three-dimensional protein folds. It is derived using an automatic structure comparison program (Dali) for the all-against-all comparison of over 6000 three-dimensional coordinate sets in the Protein Data Bank (PDB). Sequence-related protein families are covered by a representative set of 813 protein chains. Hierachical clustering based on structural similarities yields a fold tree that defines 253 fold classes. For each representative protein chain, there is a database entry containing structure-structure alignments with its structural neighbours in the PDB. The database is accessible online through World Wide Web browsers and by anonymous ftp (file transfer protocol). The overview of fold space and the individual data sets provide a rich source of information for the study of both divergent and convergent aspects of molecular evolution, and define useful test sets and a standard of truth for assessing the correctness of sequence-sequence or sequence-structure alignments.},
-	Address = {European Molecular Biology Laboratory - European Bioinformatics Institute, Wellcome Trust Genome Campus, Cambridge CB10 1SD, UK.},
-	Au = {Holm, L and Sander, C},
-	Author = {Holm, L and Sander, C},
-	Da = {19970228},
-	Date-Added = {2007-04-18 11:27:06 -0700},
-	Date-Modified = {2007-04-18 11:27:19 -0700},
-	Dcom = {19970228},
-	Edat = {1997/01/01},
-	Issn = {0305-1048 (Print)},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research},
-	Keywords = {Amino Acid Sequence; Animals; *Databases, Factual; Molecular Sequence Data; *Protein Folding; Proteins/*chemistry/genetics; Sequence Alignment},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Holm/1997.pdf},
-	Lr = {20031114},
-	Mhda = {1997/01/01 00:01},
-	Number = {1},
-	Own = {NLM},
-	Pages = {231--234},
-	Pii = {gka046},
-	Pl = {ENGLAND},
-	Pmid = {9016542},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Nucleic Acids Res. 1997 Jan 1;25(1):231-4.},
-	Stat = {MEDLINE},
-	Title = {Dali/FSSP classification of three-dimensional protein folds},
-	Volume = {25},
-	Year = {1997},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbGgIMTk5Ny5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABbVLcJLoJRQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABEhvbG0AEAAIAADBcY1JAAAAEQAIAADCTAMEAAAAAQAYAEZsaABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6SG9sbToxOTk3LnBkZgAADgASAAgAMQA5ADkANwAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Ib2xtLzE5OTcucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0hvbG0vMTk5Ny5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Day:2003fr,
-	Abstract = {We have determined consensus protein-fold classifications on the basis of three classification methods, SCOP, CATH, and Dali. These classifications make use of different methods of defining and categorizing protein folds that lead to different views of protein-fold space. Pairwise comparisons of domains on the basis of their fold classifications show that much of the disagreement between the classification systems is due to differing domain definitions rather than assigning the same domain to different folds. However, there are significant differences in the fold assignments between the three systems. These remaining differences can be explained primarily in terms of the breadth of the fold classifications. Many structures may be defined as having one fold in one system, whereas far fewer are defined as having the analogous fold in another system. By comparing these folds for a nonredundant set of proteins, the consensus method breaks up broad fold classifications and combines restrictive fold classifications into metafolds, creating, in effect, an averaged view of fold space. This averaged view requires that the structural similarities between proteins having the same metafold be recognized by multiple classification systems. Thus, the consensus map is useful for researchers looking for fold similarities that are relatively independent of the method used to compare proteins. The 30 most populated metafolds, representing the folds of about half of a nonredundant subset of the PDB, are presented here. The full list of metafolds is presented on the Web.},
-	Address = {Biomolecular Structure and Design Program and Department of Medicinal Chemistry, University of Washington, Seattle, Washington 98195, USA.},
-	Annote = {no estimate of N
-consensus folds ("metafolds") number 1130
-
-nice comparison of CATH, SCOP and Dali
-},
-	Au = {Day, R and Beck, DA and Armen, RS and Daggett, V},
-	Author = {Day, Ryan and Beck, David A C and Armen, Roger S and Daggett, Valerie},
-	Da = {20030922},
-	Date-Added = {2007-04-18 11:25:22 -0700},
-	Date-Modified = {2007-04-25 11:27:52 -0700},
-	Dcom = {20040519},
-	Edat = {2003/09/23 05:00},
-	Gr = {5 T32 GM08268/GM/NIGMS; GM 50789/GM/NIGMS},
-	Group = {Fold Space; Reviewed},
-	Issn = {0961-8368 (Print)},
-	Jid = {9211750},
-	Journal = {Protein Sci},
-	Jt = {Protein science : a publication of the Protein Society},
-	Keywords = {Amino Acid Motifs; Computational Biology/methods; Computer Graphics; *Databases, Protein; Models, Molecular; Protein Conformation; *Protein Folding; Protein Structure, Tertiary/*genetics; Proteins/*chemistry/classification/genetics; Structural Homology, Protein},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Day/2003.pdf},
-	Lr = {20061115},
-	Mhda = {2004/05/20 05:00},
-	Number = {10},
-	Own = {NLM},
-	Pages = {2150--2160},
-	Pl = {United States},
-	Pmid = {14500873},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Protein Sci. 2003 Oct;12(10):2150-60.},
-	Stat = {MEDLINE},
-	Title = {A consensus view of fold space: combining SCOP, CATH, and the Dali Domain Dictionary},
-	Volume = {12},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbCAIMjAwMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABbV+8JLofNQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA0RheQAAEAAIAADBcY1JAAAAEQAIAADCTARjAAAAAQAYAEZsIABGa88ARmrVAEZqGwBGZGgAQIlDAAIAPmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6RGF5OjIwMDMucGRmAA4AEgAIADIAMAAwADMALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvRGF5LzIwMDMucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRGF5LzIwMDMucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==}}
-
-@article{Zhang:1998ys,
-	Abstract = {A number of fundamental questions in structural biology concern the diversity of protein architectures (or folds). Here, we address two of them, the size of the universe of folds, and the distribution of sequence families among them, using an analysis based on a new and rigorous statistical sampling method. In particular we show that the number of known non-transmembrane protein folds is approximately one half of the total that exist, and that certain superfolds should exist, which accommodate dozens of non-homologous sequence families.},
-	Address = {Department of Biomedical Engineering, Boston University College of Engineering, Boston, MA, 02215, USA.},
-	Annote = {N = 850
-
-use SCOP
-the usual thing; fitting some fitting function; they revisited their estimate later and adjusted it up by a factor of 2},
-	Au = {Zhang, C and DeLisi, C},
-	Author = {Zhang, C and DeLisi, C},
-	Ci = {Copyright 1998 Academic Press},
-	Da = {19990312},
-	Date-Added = {2007-04-18 11:21:14 -0700},
-	Date-Modified = {2007-04-30 14:49:53 -0700},
-	Dcom = {19990312},
-	Doi = {10.1006/jmbi.1998.2282},
-	Edat = {1999/01/08},
-	Group = {Fold Space; Cited; Reviewed},
-	Issn = {0022-2836 (Print)},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology},
-	Keywords = {Databases, Factual; *Models, Biological; Models, Molecular; Models, Statistical; *Protein Folding; Proteins/*chemistry; Selection Bias},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Zhang/1998.pdf},
-	Lr = {20061115},
-	Mhda = {1999/01/08 00:01},
-	Number = {5},
-	Own = {NLM},
-	Pages = {1301--1305},
-	Pii = {S0022-2836(98)92282-3},
-	Pl = {ENGLAND},
-	Pmid = {9878351},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, Non-P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {J Mol Biol. 1998 Dec 18;284(5):1301-5.},
-	Stat = {MEDLINE},
-	Title = {Estimating the number of protein folds},
-	Volume = {284},
-	Year = {1998},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbVkIMTk5OC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABbTe8JLngJQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVpoYW5nAAAQAAgAAMFxjUkAAAARAAgAAMJMAHIAAAABABgARm1ZAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpaaGFuZzoxOTk4LnBkZgAOABIACAAxADkAOQA4AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1poYW5nLzE5OTgucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvWmhhbmcvMTk5OC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1006/jmbi.1998.2282}}
-
-@article{Yee:1993rt,
-	Abstract = {Protein structures come in families. Are families {\tt{}"{}}closely knit{\tt{}"{}} or "loosely knit" entities? We describe a measure of relatedness among polymer conformations. Based on weighted distance maps, this measure differs from existing measures mainly in two respects: (1) it is computationally fast, and (2) it can compare any two proteins, regardless of their relative chain lengths or degree of similarity. It does not require finding relative alignments. The measure is used here to determine the dissimilarities between all 12,403 possible pairs of 158 diverse protein structures from the Brookhaven Protein Data Bank (PDB). Combined with minimal spanning trees and hierarchical clustering methods, this measure is used to define structural families. It is also useful for rapidly searching a dataset of protein structures for specific substructural motifs. By using an analogy to distributions of Euclidean distances, we find that protein families are not tightly knit entities.},
-	Address = {Department of Pharmaceutical Chemistry, University of California, San Francisco 94143-1204.},
-	Annote = {Fold Space
-kinda old, argues that protein families are not "tightly-knit"
-can't find a direct estimate of the number of folds},
-	Au = {Yee, DP and Dill, KA},
-	Author = {Yee, D P and Dill, K A},
-	Da = {19930805},
-	Date-Added = {2007-04-18 11:17:54 -0700},
-	Date-Modified = {2007-04-30 10:40:39 -0700},
-	Dcom = {19930805},
-	Edat = {1993/06/01},
-	Group = {Fold Space; Reviewed},
-	Issn = {0961-8368 (Print)},
-	Jid = {9211750},
-	Journal = {Protein Sci},
-	Jt = {Protein science : a publication of the Protein Society},
-	Keywords = {Algorithms; Animals; Calcium-Binding Proteins/chemistry; DNA-Binding Proteins/chemistry; Databases, Factual; Humans; Models, Molecular; Molecular Structure; Protein Conformation; Proteins/*chemistry/classification; Sequence Alignment},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Yee/1993.pdf},
-	Lr = {20061115},
-	Mhda = {1993/06/01 00:01},
-	Number = {6},
-	Own = {NLM},
-	Pages = {884--899},
-	Pl = {UNITED STATES},
-	Pmid = {8318894},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Calcium-Binding Proteins); 0 (DNA-Binding Proteins); 0 (Proteins)},
-	Sb = {IM},
-	So = {Protein Sci. 1993 Jun;2(6):884-99.},
-	Stat = {MEDLINE},
-	Title = {Families and the structural relatedness among globular proteins},
-	Volume = {2},
-	Year = {1993},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbVYIMTk5My5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABbQLMJLlXRQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA1llZQAAEAAIAADBcY1JAAAAEQAIAADCS/fkAAAAAQAYAEZtVgBGa88ARmrVAEZqGwBGZGgAQIlDAAIAPmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6WWVlOjE5OTMucGRmAA4AEgAIADEAOQA5ADMALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvWWVlLzE5OTMucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvWWVlLzE5OTMucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==}}
-
-@article{Wolf:2000vn,
-	Annote = {N = 1000
-
-use SCOP, detailed model of how distribution is sampled
-binomial distribution of families among folds},
-	Author = {Wolf, Yuri I. and Grishin, Nick V. and Koonin, Eugene V.},
-	Date-Added = {2007-04-18 11:12:17 -0700},
-	Date-Modified = {2007-04-30 14:49:53 -0700},
-	Group = {Fold Space; Reviewed; Cited},
-	Journal = {Journal of Molecular Biology},
-	Keywords = {protein structure classification; structural genomics; sampling; logarithmic distribution},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wolf/2000.pdf},
-	Number = {4},
-	Pages = {897--905},
-	Title = {Estimating the number of protein folds and families from complete genome data},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6WK7-45F51HT-FW/2/c3fb8376cad58b34cf6b9782e3b38a6f},
-	Volume = {299},
-	Year = {2000},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbUsIMjAwMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABbQW8JLldRQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFdvbGYAEAAIAADBcY1JAAAAEQAIAADCS/hEAAAAAQAYAEZtSwBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6V29sZjoyMDAwLnBkZgAADgASAAgAMgAwADAAMAAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Xb2xmLzIwMDAucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1dvbGYvMjAwMC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6WK7-45F51HT-FW/2/c3fb8376cad58b34cf6b9782e3b38a6f}}
-
-@article{Koonin:2002yq,
-	Abstract = {Despite the practically unlimited number of possible protein sequences, the number of basic shapes in which proteins fold seems not only to be finite, but also to be relatively small, with probably no more than 10,000 folds in existence. Moreover, the distribution of proteins among these folds is highly non-homogeneous -- some folds and superfamilies are extremely abundant, but most are rare. Protein folds and families encoded in diverse genomes show similar size distributions with notable mathematical properties, which also extend to the number of connections between domains in multidomain proteins. All these distributions follow asymptotic power laws, such as have been identified in a wide variety of biological and physical systems, and which are typically associated with scale-free networks. These findings suggest that genome evolution is driven by extremely general mechanisms based on the preferential attachment principle.},
-	Address = {National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, Maryland 20894, USA. koonin@ncbi.nih.gov},
-	Annote = {no estimate of N
-cites estimates from 650-10k
-
-NICE review of how estimates are made of families/folds
-several references given with estimates from 650 - 10e3},
-	Au = {Koonin, EV and Wolf, YI and Karev, GP},
-	Author = {Koonin, Eugene V and Wolf, Yuri I and Karev, Georgy P},
-	Da = {20021114},
-	Date-Added = {2007-04-18 11:10:30 -0700},
-	Date-Modified = {2007-04-25 11:27:45 -0700},
-	Dcom = {20021212},
-	Doi = {10.1038/nature01256},
-	Edat = {2002/11/15 04:00},
-	Group = {Fold Space; Reviewed},
-	Issn = {0028-0836 (Print)},
-	Jid = {0410462},
-	Journal = {Nature},
-	Jt = {Nature},
-	Keywords = {Databases, Protein; *Evolution, Molecular; *Genome; Models, Genetic; *Protein Folding; Protein Structure, Tertiary; Proteins/*chemistry/classification/genetics; Proteome; Proteomics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Koonin/2002.pdf},
-	Lr = {20051116},
-	Mhda = {2002/12/13 04:00},
-	Number = {6912},
-	Own = {NLM},
-	Pages = {218--223},
-	Pii = {nature01256},
-	Pl = {England},
-	Pmid = {12432406},
-	Pst = {ppublish},
-	Pt = {Journal Article; Review},
-	Pubm = {Print},
-	Rf = {77},
-	Rn = {0 (Proteins); 0 (Proteome)},
-	Sb = {IM},
-	So = {Nature. 2002 Nov 14;420(6912):218-23.},
-	Stat = {MEDLINE},
-	Title = {The structure of the protein universe and genome evolution},
-	Volume = {420},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbIcIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABbUT8JLn0wAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABktvb25pbgAQAAgAAMFxjUkAAAARAAgAAMJMAbwAAAABABgARmyHAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpLb29uaW46MjAwMi5wZGYAAA4AEgAIADIAMAAwADIALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvS29vbmluLzIwMDIucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0tvb25pbi8yMDAyLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1038/nature01256}}
-
-@article{Leonov:2003kx,
-	Abstract = {The estimation of the number of protein folds in nature is a matter of considerable interest. In this study, a Monte Carlo method employing the broken stick model is used to assign a given number of proteins into a given number of folds. Subsequently, random, integer, non-repeating numbers are generated in order to simulate the process of fold discovery. With this conceptual framework at hand, the effects of two factors upon the fold identification process were investigated: (1) the nature of folds distributions and (2) preferential sampling bias of previously identified folds. Depending on the type of distribution, dividing 100,000 proteins into 1,000 folds resulted in 10-30% of the folds having 10 proteins or less per fold, approximately 10% of the folds having 10-20 proteins per fold, 31-45% having 20-100 proteins per fold, and >30% of the folds having more than 100 proteins per fold. After randomly sampling one tenth of the proteins, 68-96% of the folds were identified. These percentages depend both on folds distribution and biased/non-biased sampling. Only upon increasing the sampling bias for previously identified folds to 1,000, did the model result in a reduction of the number of proteins identified by an order of magnitude (approximately 9%). Thus, assuming the structures of one tenth of the population of proteins in nature have been solved, the results of the Monte Carlo simulation are more consistent with recent lower estimates of the number of folds, <or=1,000. Any deviation from this estimate would reflect significant bias in the experimental sampling of protein structure, and/or substantially nonuniform folds distribution, manifested in a large number of single-fold proteins.},
-	Address = {School of Computer Science and Engineering. The Hebrew University, Givat-Ram, Jerusalem, Israel.},
-	Annote = {N = 1000
-assuming structures of 1/10th of proteins have been solved (!!)
-probably not true, b/c of bias in sampling (structure and sequence) AND non-uniform fold distribution
-
-NICE statistical study on how folds may have been sampled by researchers},
-	Au = {Leonov, H and Mitchell, JS and Arkin, IT},
-	Author = {Leonov, Hadas and Mitchell, Joseph S B and Arkin, Isaiah T},
-	Ci = {Copyright 2003 Wiley-Liss, Inc.},
-	Da = {20030415},
-	Date-Added = {2007-04-18 11:06:53 -0700},
-	Date-Modified = {2007-04-30 14:49:53 -0700},
-	Dcom = {20030616},
-	Doi = {10.1002/prot.10336},
-	Edat = {2003/04/16 05:00},
-	Group = {Fold Space; Reviewed; Cited},
-	Issn = {1097-0134 (Electronic)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Keywords = {Models, Theoretical; *Monte Carlo Method; *Protein Folding; Proteins/*chemistry/classification; Reproducibility of Results},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Leonov/2003.pdf},
-	Lr = {20061115},
-	Mhda = {2003/06/17 05:00},
-	Number = {3},
-	Own = {NLM},
-	Pages = {352--359},
-	Pl = {United States},
-	Pmid = {12696047},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Proteins. 2003 May 15;51(3):352-9.},
-	Stat = {MEDLINE},
-	Title = {Monte Carlo estimation of the number of possible protein folds: effects of sampling bias and folds distributions},
-	Volume = {51},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbJwIMjAwMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABbULMJLnyFQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkxlb25vdgAQAAgAAMFxjUkAAAARAAgAAMJMAZEAAAABABgARmycAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpMZW9ub3Y6MjAwMy5wZGYAAA4AEgAIADIAMAAwADMALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTGVvbm92LzIwMDMucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0xlb25vdi8yMDAzLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1002/prot.10336}}
-
-@article{Oberai:2006fj,
-	Abstract = {One of the goals of structural genomics is to obtain a structural representative of almost every fold in nature. A recent estimate suggests that 70%-80% of soluble protein domains identified in the first 1000 genome sequences should be covered by about 25,000 structures-a reasonably achievable goal. As no current estimates exist for the number of membrane protein families, however, it is not possible to know whether family coverage is a realistic goal for membrane proteins. Here we find that virtually all polytopic helical membrane protein families are present in the already known sequences so we can make an estimate of the total number of families. We find that only approximately 700 polytopic membrane protein families account for 80% of structured residues and approximately 1700 cover 90% of structured residues. While apparently a finite and reachable goal, we estimate that it will likely take more than three decades to obtain the structures needed for 90% residue coverage, if current trends continue.},
-	Address = {Department of Chemistry and Biochemistry, UCLA-DOE Institute for Genomics and Proteomics, Los Angeles, CA 90095-1570, USA.},
-	Annote = {N = 300 to 550 (MEMBRANE proteins)
-assuming same distribution for membrane proteins as soluble
-
-SCOP already has 898 folds (v1.69)
-quote an estimate of 10 times fewer membrane protein families than soluble proteins
-
-quote a study by Yan and Moult estimating 25,000 representative folds},
-	Au = {Oberai, A and Ihm, Y and Kim, S and Bowie, JU},
-	Author = {Oberai, Amit and Ihm, Yungok and Kim, Sanguk and Bowie, James U},
-	Da = {20060703},
-	Date-Added = {2007-04-18 11:00:28 -0700},
-	Date-Modified = {2007-04-25 10:28:20 -0700},
-	Dcom = {20061017},
-	Doi = {10.1110/ps.062109706},
-	Edat = {2006/07/04 09:00},
-	Gr = {GM3919/GM/NIGMS},
-	Group = {Fold Space; Reviewed},
-	Issn = {0961-8368 (Print)},
-	Jid = {9211750},
-	Journal = {Protein Sci},
-	Jt = {Protein science : a publication of the Protein Society},
-	Keywords = {Algorithms; Cluster Analysis; Genomics; Hydrophobicity; Membrane Proteins/*chemistry; Protein Folding; Sequence Alignment; Solubility},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Oberai/2006.pdf},
-	Lr = {20061115},
-	Mhda = {2006/10/18 09:00},
-	Number = {7},
-	Own = {NLM},
-	Pages = {1723--1734},
-	Pii = {15/7/1723},
-	Pl = {United States},
-	Pmid = {16815920},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, N.I.H., Extramural; Research Support, U.S. Gov't, Non-P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Membrane Proteins)},
-	Sb = {IM},
-	So = {Protein Sci. 2006 Jul;15(7):1723-34.},
-	Stat = {MEDLINE},
-	Title = {A limited universe of membrane protein families and folds},
-	Volume = {15},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbM0IMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABbUBsJLnvIAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABk9iZXJhaQAQAAgAAMFxjUkAAAARAAgAAMJMAWIAAAABABgARmzNAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpPYmVyYWk6MjAwNi5wZGYAAA4AEgAIADIAMAAwADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvT2JlcmFpLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL09iZXJhaS8yMDA2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1110/ps.062109706}}
-
-@article{Grant:2004uq,
-	Abstract = {Although the precise aims differ between the various international structural genomics initiatives currently aiming to illuminate the universe of protein folds, many selectively target protein families for which the fold is unknown. How well can the current set of known protein families and folds be used to estimate the total number of folds in nature, and will structural genomics initiatives yield representatives for all the major protein families within a reasonable time scale?},
-	Address = {Department of Biochemistry and Molecular Biology, University College, Gower Street, London WC1E 6BT, UK. grant@biochem.ucl.ac.uk},
-	Annote = {N = 4684 (for 120 genomes)
-
-"[All] fold estimates are unsatisfying, in that they necessitate simplified 
-models of fold usage and optimism regarding lack of bias in 
-the databases; whilst our sampling of `species' space remains 
-so sparse, calculations on the numbers of folds in all of 
-nature seem rather esoteric."
-
-nice review of the state of things in 2004 w/ references
-1e3 - 1e4 estimates quoted},
-	Au = {Grant, A and Lee, D and Orengo, C},
-	Author = {Grant, A and Lee, D and Orengo, C},
-	Da = {20040506},
-	Date-Added = {2007-04-18 10:57:12 -0700},
-	Date-Modified = {2008-05-29 12:10:24 -0700},
-	Dcom = {20041007},
-	Dep = {20040429},
-	Edat = {2004/05/07 05:00},
-	Group = {Fold Space; Reviewed; Cited},
-	Jid = {100960660},
-	Journal = {Genome Biol},
-	Jt = {Genome biology},
-	Keywords = {Chromosome Mapping; Databases, Protein; Genome; *Protein Folding; Proteins/chemistry/classification/genetics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Grant/2004.pdf},
-	Mhda = {2004/10/08 09:00},
-	Number = {5},
-	Own = {NLM},
-	Pages = {107},
-	Phst = {2004/04/29 {$[$}aheadofprint{$]$}},
-	Pii = {gb-2004-5-5-107},
-	Pl = {England},
-	Pmid = {15128436},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print-Electronic},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Genome Biol. 2004;5(5):107. Epub 2004 Apr 29.},
-	Stat = {MEDLINE},
-	Title = {Progress towards mapping the universe of protein folds},
-	Volume = {5},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbFEIMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABbTvsJLnptQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUdyYW50AAAQAAgAAMFxjUkAAAARAAgAAMJMAQsAAAABABgARmxRAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpHcmFudDoyMDA0LnBkZgAOABIACAAyADAAMAA0AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0dyYW50LzIwMDQucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvR3JhbnQvMjAwNC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1186/gb-2004-5-5-107}}
-
-@article{Shindyalov:2000qy,
-	Abstract = {Comparing and subsequently classifying protein structures information has received significant attention concurrent with the increase in the number of experimentally derived 3-dimensional structures. Classification schemes have focused on biological function found within protein domains and on structure classification based on topology. Here an alternative view is presented that groups substructures. Substructures are long (50-150 residue) highly repetitive near-contiguous pieces of polypeptide chain that occur frequently in a set of proteins from the PDB defined as structurally non-redundant over the complete polypeptide chain. The substructure classification is based on a previously reported Combinatorial Extension (CE) algorithm that provides a significantly different set of structure alignments than those previously described, having, for example, only a 40% overlap with FSSP. Qualitatively the algorithm provides longer contiguous aligned segments at the price of a slightly higher root-mean-square deviation (rmsd). Clustering these alignments gives a discreet and highly repetitive set of substructures not detectable by sequence similarity alone. In some cases different substructures represent all or different parts of well known folds indicative of the Russian doll effect--the continuity of protein fold space. In other cases they fall into different structure and functional classifications. It is too early to determine whether these newly classified substructures represent new insights into the evolution of a structural framework important to many proteins. What is apparent from on-going work is that these substructures have the potential to be useful probes in finding remote sequence homology and in structure prediction studies. The characteristics of the complete all-by-all comparison of the polypeptide chains present in the PDB and details of the filtering procedure by pair-wise structure alignment that led to the emergent substructure gallery are discussed. Substructure classification, alignments, and tools to analyze them are available at http://cl.sdsc.edu/ce.html.},
-	Address = {San Diego Supercomputer Center, California, USA.},
-	Annote = {Fold Space
-breaks up proteins into small structural motifs, sort of one step down from domains},
-	Au = {Shindyalov, IN and Bourne, PE},
-	Author = {Shindyalov, I N and Bourne, P E},
-	Da = {20000411},
-	Date-Added = {2007-04-18 10:53:38 -0700},
-	Date-Modified = {2007-04-27 11:42:30 -0700},
-	Dcom = {20000411},
-	Edat = {2000/03/14 09:00},
-	Group = {Fold Space; Reviewed},
-	Issn = {0887-3585 (Print)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Keywords = {Algorithms; Amino Acid Sequence; Databases, Factual; Internet; Models, Molecular; Molecular Sequence Data; *Protein Folding; Protein Structure, Secondary; Proteins/*chemistry/classification; Sequence Alignment/*methods},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Shindyalov/2000.pdf},
-	Lr = {20061115},
-	Mhda = {2000/04/15 09:00},
-	Number = {3},
-	Own = {NLM},
-	Pages = {247--260},
-	Pii = {10.1002/(SICI)1097-0134(20000215)38:3<247::AID-PROT2>3.0.CO;2-T},
-	Pl = {UNITED STATES},
-	Pmid = {10713986},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, Non-P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Proteins. 2000 Feb 15;38(3):247-60.},
-	Stat = {MEDLINE},
-	Title = {An alternative view of protein fold space},
-	Volume = {38},
-	Year = {2000},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGaAAAAAAGaAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbREIMjAwMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABbU2sJLoA1QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAClNoaW5keWFsb3YAEAAIAADBcY1JAAAAEQAIAADCTAJ9AAAAAQAYAEZtEQBGa88ARmrVAEZqGwBGZGgAQIlDAAIARWhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6U2hpbmR5YWxvdjoyMDAwLnBkZgAADgASAAgAMgAwADAAMAAuAHAAZABmAA8ACAADAGgAcwByABIAQVVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9TaGluZHlhbG92LzIwMDAucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QKC4uLy4uLy4uLy4uL0FydGljbGVzL1NoaW5keWFsb3YvMjAwMC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AlcCXAJlAnACdAKCAokCkgK9AsICxQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALS}}
-
-@article{Friedberg:2007fk,
-	Annote = {Fold Space
-no explicit estimate of N
-interesting: state that sequence/structure weakly correlate with function
-authors argue that functional considerations should be accounted for when selecting targets for structural genomics
-implies that the number of folds in somewhere in the thousands to 10,000 range},
-	Author = {Friedberg, Iddo and Godzik, Adam},
-	Date-Added = {2007-04-18 10:49:03 -0700},
-	Date-Modified = {2007-04-25 09:59:50 -0700},
-	Group = {Fold Space; Reviewed},
-	Journal = {Structure},
-	Keywords = {PROTEINS},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Friedberg/2007.pdf},
-	Number = {4},
-	Pages = {405--415},
-	Title = {Functional Differentiation of Proteins: Implications for Structural Genomics},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6VSR-4NHDSTT-3/2/ab802760ae9e1e1b7fe57e721885ed91},
-	Volume = {15},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbEAIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABbKL8JLgmNQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACUZyaWVkYmVyZwAAEAAIAADBcY1JAAAAEQAIAADCS+TTAAAAAQAYAEZsQABGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6RnJpZWRiZXJnOjIwMDcucGRmAA4AEgAIADIAMAAwADcALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvRnJpZWRiZXJnLzIwMDcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRnJpZWRiZXJnLzIwMDcucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6VSR-4NHDSTT-3/2/ab802760ae9e1e1b7fe57e721885ed91}}
-
-@article{Zhang:1997lr,
-	Abstract = {The relations among the numbers of protein sequences, families and folds have been studied theoretically. It is found that the number of families is related to the natural logarithm of the number of sequences. The logarithmic relation should not be changed regardless of what value of the homology threshold is applied in the protein sequence comparison routines. To study the relation between the numbers of families and folds, the degenerate degree of a fold has been introduced. The degenerate degree of a fold is the number of protein families which adopt the same fold. The distribution of the degenerate degrees of folds has been found to be very likely exponential. Based on the distribution, the average degenerate degree d is calculated. The number of folds is simply equal to that of families divided by the average degenerate degree of folds. It is shown that d is an increasing function of time. The current value of d is about 2. It will continue to increase and reach the value of at least 3.3 in some years. By using the above result, the numbers of protein folds for four species have been estimated. In particular, the number of folds for human proteins is estimated to be < or =5200.},
-	Address = {Department of Physics, Tianjin University, China.},
-	Annote = {N = ~5200 folds in humans
-
-this paper kinda sucks b/c it assumes the degenerate degree is the same for all folds, which is bogus
-},
-	Au = {Zhang, CT},
-	Author = {Zhang, C T},
-	Da = {19971124},
-	Date-Added = {2007-04-18 10:40:52 -0700},
-	Date-Modified = {2007-04-30 14:49:53 -0700},
-	Dcom = {19971124},
-	Edat = {1997/07/01 00:00},
-	Group = {Fold Space; Reviewed; Cited},
-	Issn = {0269-2139 (Print)},
-	Jid = {8801484},
-	Journal = {Protein Eng},
-	Jt = {Protein engineering},
-	Keywords = {Animals; Bacterial Proteins/chemistry; Caenorhabditis elegans/chemistry; Escherichia coli/chemistry; Fungal Proteins/chemistry; Helminth Proteins/chemistry; Humans; Models, Chemical; Protein Conformation; Protein Engineering; Protein Folding; Proteins/*chemistry/classification; Saccharomyces cerevisiae/chemistry; Species Specificity},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Zhang/1997.pdf},
-	Lr = {20061115},
-	Mhda = {1997/10/28 00:01},
-	Number = {7},
-	Own = {NLM},
-	Pages = {757--761},
-	Pl = {ENGLAND},
-	Pmid = {9342141},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print},
-	Rn = {0 (Bacterial Proteins); 0 (Fungal Proteins); 0 (Helminth Proteins); 0 (Proteins)},
-	Sb = {IM},
-	So = {Protein Eng. 1997 Jul;10(7):757-61.},
-	Stat = {MEDLINE},
-	Title = {Relations of the numbers of protein sequences, families and folds},
-	Volume = {10},
-	Year = {1997},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbVkIMTk5Ny5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABbUe8JLn5ZQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVpoYW5nAAAQAAgAAMFxjUkAAAARAAgAAMJMAgYAAAABABgARm1ZAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpaaGFuZzoxOTk3LnBkZgAOABIACAAxADkAOQA3AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1poYW5nLzE5OTcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvWmhhbmcvMTk5Ny5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Wong:1975uq,
-	Au = {Wong, JT},
-	Author = {Wong, J T},
-	Da = {19751108},
-	Date-Added = {2007-03-15 11:54:41 -0400},
-	Date-Modified = {2007-03-15 11:54:59 -0400},
-	Dcom = {19751108},
-	Edat = {1975/05/01},
-	Issn = {0027-8424 (Print)},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci U S A},
-	Jt = {Proceedings of the National Academy of Sciences of the United States of America},
-	Keywords = {Amino Acids/biosynthesis; Anticodon; Codon; *Evolution; *Genetic Code; Models, Biological; Probability; Protein Biosynthesis; RNA, Transfer/metabolism},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wong/1975.pdf},
-	Lr = {20041117},
-	Mhda = {1975/05/01 00:01},
-	Number = {5},
-	Oid = {NASA: 75217864},
-	Own = {NLM},
-	Pages = {1909--1912},
-	Pl = {UNITED STATES},
-	Pmid = {1057181},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Anticodon); 0 (Codon); 9014-25-9 (RNA, Transfer)},
-	Sb = {IM; S},
-	So = {Proc Natl Acad Sci U S A. 1975 May;72(5):1909-12.},
-	Stat = {MEDLINE},
-	Title = {A co-evolution theory of the genetic code},
-	Volume = {72},
-	Year = {1975}}
-
-@article{Wolynes:1997qy,
-	Annote = {10.1038/nsb1197-871},
-	Author = {Wolynes, Peter G.},
-	Date-Added = {2007-03-15 11:54:18 -0400},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Journal = {Nat Struct Mol Biol},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wolynes/1997.pdf},
-	M3 = {10.1038/nsb1197-871},
-	Number = {11},
-	Pages = {871--874},
-	Title = {As simple as can be?},
-	Ty = {JOUR},
-	Url = {http://dx.doi.org/10.1038/nsb1197-871},
-	Volume = {4},
-	Year = {1997},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbUwIMTk5Ny5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQ1PsIcmHZQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1dvbHluZXMAABAACAAAwXGNSQAAABEACAAAwhz65gAAAAEAGABGbUwARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOldvbHluZXM6MTk5Ny5wZGYADgASAAgAMQA5ADkANwAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Xb2x5bmVzLzE5OTcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvV29seW5lcy8xOTk3LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1038/nsb1197-871}}
-
-@article{Osawa:1992fk,
-	Abstract = {The genetic code, formerly thought to be frozen, is now known to be in a state of evolution. This was first shown in 1979 by Barrell et al. (G. Barrell, A. T. Bankier, and J. Drouin, Nature [London] 282:189-194, 1979), who found that the universal codons AUA (isoleucine) and UGA (stop) coded for methionine and tryptophan, respectively, in human mitochondria. Subsequent studies have shown that UGA codes for tryptophan in Mycoplasma spp. and in all nonplant mitochondria that have been examined. Universal stop codons UAA and UAG code for glutamine in ciliated protozoa (except Euplotes octacarinatus) and in a green alga, Acetabularia. E. octacarinatus uses UAA for stop and UGA for cysteine. Candida species, which are yeasts, use CUG (leucine) for serine. Other departures from the universal code, all in nonplant mitochondria, are CUN (leucine) for threonine (in yeasts), AAA (lysine) for asparagine (in platyhelminths and echinoderms), UAA (stop) for tyrosine (in planaria), and AGR (arginine) for serine (in several animal orders) and for stop (in vertebrates). We propose that the changes are typically preceded by loss of a codon from all coding sequences in an organism or organelle, often as a result of directional mutation pressure, accompanied by loss of the tRNA that translates the codon. The codon reappears later by conversion of another codon and emergence of a tRNA that translates the reappeared codon with a different assignment. Changes in release factors also contribute to these revised assignments. We also discuss the use of UGA (stop) as a selenocysteine codon and the early history of the code.},
-	Address = {Department of Biology, Nagoya University, Japan.},
-	Au = {Osawa, S and Jukes, TH and Watanabe, K and Muto, A},
-	Author = {Osawa, S and Jukes, T H and Watanabe, K and Muto, A},
-	Da = {19920610},
-	Date-Added = {2007-03-15 11:53:54 -0400},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19920610},
-	Edat = {1992/03/01},
-	Fir = {Jukes, T H},
-	Gr = {R01 HG00312/HG/NHGRI},
-	Ir = {Jukes TH},
-	Irad = {U CA, Berkeley},
-	Issn = {0146-0749 (Print)},
-	Jid = {7806086},
-	Journal = {Microbiol Rev},
-	Jt = {Microbiological reviews},
-	Keywords = {Base Sequence; Cell Nucleus/chemistry; Codon/*classification; *Evolution; *Genetic Code; Mitochondria/chemistry; Molecular Sequence Data; Nucleic Acid Conformation},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Osawa/1992.pdf},
-	Lr = {20061115},
-	Mhda = {1992/03/01 00:01},
-	Number = {1},
-	Ot = {NASA Discipline Exobiology; Non-NASA Center},
-	Oto = {NASA},
-	Own = {NLM},
-	Pages = {229--264},
-	Pl = {UNITED STATES},
-	Pmid = {1579111},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.; Review},
-	Pubm = {Print},
-	Rf = {252},
-	Rn = {0 (Codon)},
-	Sb = {IM; S},
-	So = {Microbiol Rev. 1992 Mar;56(1):229-64.},
-	Stat = {MEDLINE},
-	Title = {Recent evidence for evolution of the genetic code},
-	Volume = {56},
-	Year = {1992},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbNAIMTk5Mi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQ0/cIcl3FQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABU9zYXdhAAAQAAgAAMFxjUkAAAARAAgAAMIc+eEAAAABABgARmzQAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpPc2F3YToxOTkyLnBkZgAOABIACAAxADkAOQAyAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL09zYXdhLzE5OTIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvT3Nhd2EvMTk5Mi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Crick:1968lr,
-	Au = {Crick, FH},
-	Author = {Crick, F H},
-	Da = {19690520},
-	Date-Added = {2007-03-15 11:53:18 -0400},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19690520},
-	Edat = {1968/12/01},
-	Issn = {0022-2836 (Print)},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology},
-	Keywords = {Amino Acids/metabolism; Escherichia coli; Evolution; *Genetic Code; Nucleosides/physiology; Protein Biosynthesis; RNA, Messenger/physiology},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Crick/1968.pdf},
-	Lr = {20041117},
-	Mhda = {1968/12/01 00:01},
-	Number = {3},
-	Own = {NLM},
-	Pages = {367--379},
-	Pii = {0022-2836(68)90392-6},
-	Pl = {ENGLAND},
-	Pmid = {4887876},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Nucleosides); 0 (RNA, Messenger)},
-	Sb = {IM},
-	So = {J Mol Biol. 1968 Dec;38(3):367-79.},
-	Stat = {MEDLINE},
-	Title = {The origin of the genetic code},
-	Volume = {38},
-	Year = {1968},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbB0IMTk2OC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABRyasIeucFQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUNyaWNrAAAQAAgAAMFxjUkAAAARAAgAAMIfHDEAAAABABgARmwdAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpDcmljazoxOTY4LnBkZgAOABIACAAxADkANgA4AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0NyaWNrLzE5NjgucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ3JpY2svMTk2OC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@misc{zheng-2001,
-	Annote = {This is clearly the pre-print fo the later PRE by Liu, Liu, Qi and Zheng.  I will just use the peer reviewed PRE alphabets.
-No ROC.},
-	Author = {Wei-mou Zheng},
-	Date-Added = {2007-03-12 18:21:03 -0700},
-	Date-Modified = {2007-07-20 17:56:56 -0700},
-	Group = {Reviewed; Forward; Alphabets; Printed; Backward},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Zheng/2001.pdf},
-	Read = {Yes},
-	Title = {Entropic Approach for Reduction of Amino Acid Alphabets},
-	Url = {http://www.citebase.org/abstract?id=oai:arXiv.org:physics/0106074},
-	Year = {2001},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbVoIMjAwMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQg/8Ia6WRQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVpoZW5nAAAQAAgAAMFxjUkAAAARAAgAAMIbS9QAAAABABgARm1aAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpaaGVuZzoyMDAxLnBkZgAOABIACAAyADAAMAAxAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1poZW5nLzIwMDEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvWmhlbmcvMjAwMS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://www.citebase.org/abstract?id=oai:arXiv.org:physics/0106074}}
-
-@article{Munson:1994yq,
-	Abstract = {Rationally redesigned variants of the 4-helix-bundle protein Rop are described. The novel proteins have simplified, repacked, hydrophobic cores and yet reproduce the structure and native-like physical properties of the wild-type protein. The repacked proteins have been characterized thermodynamically and their equilibrium and kinetic thermal and chemical unfolding properties are compared with those of wild-type Rop. The equilibrium stability of the repacked proteins to thermal denaturation is enhanced relative to that of the wild-type protein. The rate of chemically induced folding and unfolding of wild-type Rop is extremely slow when compared with other small proteins. Interestingly, although the repacked proteins are more thermally stable than the wild type, their rates of chemically induced folding and unfolding are greatly increased in comparison to wild type. Perhaps as a consequence of this, their equilibrium stabilities to chemical denaturants are slightly reduced in comparison to the wild type.},
-	Address = {Department of Molecular Biophysics and Biochemistry, Yale University, New Haven, Connecticut 06520-8114.},
-	Au = {Munson, M and O'Brien, R and Sturtevant, JM and Regan, L},
-	Author = {Munson, M and O'Brien, R and Sturtevant, J M and Regan, L},
-	Da = {19950509},
-	Date-Added = {2007-03-12 18:16:23 -0700},
-	Date-Modified = {2008-05-29 12:07:52 -0700},
-	Dcom = {19950509},
-	Edat = {1994/11/01},
-	Gr = {GM 04725/GM/NIGMS; GM 46340-01A1/GM/NIGMS},
-	Jid = {9211750},
-	Journal = {Protein Sci},
-	Jt = {Protein science : a publication of the Protein Society},
-	Keywords = {Amino Acid Sequence; Bacterial Proteins/*chemistry; Calorimetry; Circular Dichroism; Guanidine; Guanidines; Kinetics; Molecular Sequence Data; Point Mutation/genetics; *Protein Conformation; Protein Denaturation; Protein Engineering; Protein Folding; *Protein Structure, Secondary; RNA/metabolism; *RNA-Binding Proteins; Temperature; Thermodynamics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Munson/1994.pdf},
-	Lr = {20061115},
-	Mhda = {1994/11/01 00:01},
-	Number = {11},
-	Own = {NLM},
-	Pages = {2015--2022},
-	Pl = {UNITED STATES},
-	Pmid = {7535612},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Bacterial Proteins); 0 (Guanidines); 0 (RNA-Binding Proteins); 0 (Rop protein, ColE1 plasmid); 113-00-8 (Guanidine); 63231-63-0 (RNA)},
-	Sb = {IM},
-	So = {Protein Sci. 1994 Nov;3(11):2015-22.},
-	Stat = {MEDLINE},
-	Title = {Redesigning the hydrophobic core of a four-helix-bundle protein},
-	Volume = {3},
-	Year = {1994},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbMMIMTk5NC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQkJsIbAyFQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABk11bnNvbgAQAAgAAMFxjUkAAAARAAgAAMIbZZEAAAABABgARmzDAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpNdW5zb246MTk5NC5wZGYAAA4AEgAIADEAOQA5ADQALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTXVuc29uLzE5OTQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL011bnNvbi8xOTk0LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Plaxco:1998kx,
-	Abstract = {Recent research has suggested that stable, native proteins may be encoded by simple sequences of fewer than the full set of 20 proteogenic amino acids. Studies of the ability of simple amino acid sequences to encode stable, topologically complex, native conformations and to fold to these conformations in a biologically relevant time frame have provided insights into the sequence determinants of protein structure and folding kinetics. They may also have important implications for protein design and for theories of the origins of protein synthesis itself.},
-	Address = {Department of Biochemistry, University of Washington, Seattle 98195, USA. kwp@elina.bchem.washington.edu},
-	Au = {Plaxco, KW and Riddle, DS and Grantcharova, V and Baker, D},
-	Author = {Plaxco, K W and Riddle, D S and Grantcharova, V and Baker, D},
-	Da = {19980501},
-	Date-Added = {2007-03-12 18:15:35 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19980501},
-	Edat = {1998/03/31},
-	Issn = {0959-440X (Print)},
-	Jid = {9107784},
-	Journal = {Curr Opin Struct Biol},
-	Jt = {Current opinion in structural biology},
-	Keywords = {*Amino Acid Sequence; Kinetics; Models, Molecular; Protein Biosynthesis; *Protein Conformation; Protein Engineering; *Protein Folding; Protein Structure, Secondary; Proteins/*chemistry},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Plaxco/1998.pdf},
-	Lr = {20051116},
-	Mhda = {1998/03/31 00:01},
-	Number = {1},
-	Own = {NLM},
-	Pages = {80--85},
-	Pii = {S0959-440X(98)80013-4},
-	Pl = {ENGLAND},
-	Pmid = {9519299},
-	Pst = {ppublish},
-	Pt = {Journal Article; Review},
-	Pubm = {Print},
-	Rf = {48},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Curr Opin Struct Biol. 1998 Feb;8(1):80-5.},
-	Stat = {MEDLINE},
-	Title = {Simplified proteins: minimalist solutions to the 'protein folding problem'},
-	Volume = {8},
-	Year = {1998},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbNsIMTk5OC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQgscIa5+NQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABlBsYXhjbwAQAAgAAMFxjUkAAAARAAgAAMIbSlMAAAABABgARmzbAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpQbGF4Y286MTk5OC5wZGYAAA4AEgAIADEAOQA5ADgALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvUGxheGNvLzE5OTgucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL1BsYXhjby8xOTk4LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Chan:1999fj,
-	Abstract = {A new computational approach optimizes searches for reduced protein folding alphabets that use fewer than 20 types of amino acids. The predicted optimal five-letter alphabet happens to be in agreement with the suggestive results of a recent experiment, but whether highly reduced alphabets are sufficient for truly protein-like properties remains an open experimental question.},
-	Address = {Department of Biochemistry, and Department of Medical Genetics and Microbiology, Faculty of Medicine, University of Toronto, 1 King's College Circle, Toronto, Ontario M5S 1A8, Canada. chan@arrhenius.med.toronto.edu},
-	Au = {Chan, HS},
-	Author = {Chan, H S},
-	Con = {Nat Struct Biol. 1999 Nov;6(11):1033-8. PMID: 10542095},
-	Da = {20010126},
-	Date-Added = {2007-03-12 18:14:57 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20020618},
-	Doi = {10.1038/14876},
-	Edat = {1999/12/14 09:00},
-	Issn = {1072-8368 (Print)},
-	Jid = {9421566},
-	Journal = {Nat Struct Biol},
-	Jt = {Nature structural biology},
-	Keywords = {Algorithms; Amino Acids/*chemistry/*metabolism; *Computer Simulation; Hydrophobicity; Kinetics; *Models, Chemical; Protein Engineering; *Protein Folding; Proteins/*chemistry/genetics/*metabolism},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Chan/1999.pdf},
-	Mhda = {2002/06/19 10:01},
-	Number = {11},
-	Own = {NLM},
-	Pages = {994--996},
-	Pl = {United States},
-	Pmid = {10542084},
-	Pst = {ppublish},
-	Pt = {Comment; News},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Proteins)},
-	Sb = {IM},
-	So = {Nat Struct Biol. 1999 Nov;6(11):994-6.},
-	Stat = {MEDLINE},
-	Title = {Folding alphabets},
-	Volume = {6},
-	Year = {1999},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbAoIMTk5OS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQgocIa56JQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABENoYW4AEAAIAADBcY1JAAAAEQAIAADCG0oSAAAAAQAYAEZsCgBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6Q2hhbjoxOTk5LnBkZgAADgASAAgAMQA5ADkAOQAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9DaGFuLzE5OTkucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0NoYW4vMTk5OS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1038/14876}}
-
-@article{Wang:1999uq,
-	Abstract = {What is the minimal number of residue types required to form a structured protein? This question is important for understanding protein modeling and design. Recently, an experimental finding by Baker and coworkers suggested a five-residue solution to this problem. We were motivated by their results and by the arguments of Wolynes to study reductions of protein representation based on the concept of mismatch between a reduced interaction matrix and the Miyazawa and Jernigan (MJ) matrix. We find several possible simplified schemes from the relationship of minimized mismatch versus the number of residue types (N = approximately 2-20). As a specific case, an optimal reduction with five types of residues has the same form as the simplified palette of Baker and coworkers. Statistical and kinetic features of a number of sequences are tested. Comparison of results from sequences with 20 residue types and their reduced representations indicates that the reduction by mismatch minimization is successful. For example, sequences with five types of residues have good folding ability and kinetic accessibility in model studies.},
-	Address = {National Laboratory of Solid-State Microstructure and Department of Physics, Nanjing University, Nanjing 210093, People's Republic of China.},
-	Annote = {WW},
-	Au = {Wang, J and Wang, W},
-	Author = {Wang, J and Wang, W},
-	Cin = {Nat Struct Biol. 1999 Nov;6(11):994-6. PMID: 10542084},
-	Da = {20010126},
-	Date-Added = {2007-03-12 18:14:02 -0700},
-	Date-Modified = {2008-05-29 12:24:16 -0700},
-	Dcom = {20020618},
-	Edat = {1999/12/14 09:00},
-	Group = {Alphabets; Reviewed; ROC; Forward; Printed; Backward},
-	Jid = {9421566},
-	Journal = {Nat Struct Biol},
-	Jt = {Nature structural biology},
-	Keywords = {Algorithms; Amino Acid Sequence; Amino Acids/*chemistry/*metabolism; *Computer Simulation; Hydrophobicity; Kinetics; *Models, Chemical; Molecular Sequence Data; Protein Engineering; *Protein Folding; Proteins/*chemistry/*metabolism},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wang/1999.pdf},
-	Lr = {20061115},
-	Mhda = {2002/06/19 10:01},
-	Number = {11},
-	Own = {NLM},
-	Pages = {1033--1038},
-	Pl = {United States},
-	Pmid = {10542095},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print},
-	Rn = {0 (Amino Acids); 0 (Proteins)},
-	Sb = {IM},
-	So = {Nat Struct Biol. 1999 Nov;6(11):1033-8.},
-	Stat = {MEDLINE},
-	Title = {A computational approach to simplifying the protein folding alphabet},
-	Volume = {6},
-	Year = {1999},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbUAIMTk5OS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQg68Ia6OZQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFdhbmcAEAAIAADBcY1JAAAAEQAIAADCG0tWAAAAAQAYAEZtQABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6V2FuZzoxOTk5LnBkZgAADgASAAgAMQA5ADkAOQAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9XYW5nLzE5OTkucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1dhbmcvMTk5OS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1038/14918}}
-
-@article{Schafmeister:1997qy,
-	Abstract = {A 108 amino acid protein was designed and constructed from a reduced alphabet of seven amino acids. The 2.9 A resolution X-ray crystal structure confirms that the protein is a four helix bundle, as it was designed to be. Hydrogen/deuterium exchange experiments reveal buried amide protons with protection factors in excess of 1 x 10(6) in the range characteristic of well protected protons in functional folded proteins (10(3)-10(8)) rather than protons in rapid exchange (0-10(2)). The protein is monomeric at 1 mM, the concentration at which the exchange experiments were undertaken, indicating that the exchange factors are due to a unique stable tertiary structure fold, and not due to any higher order quaternary structure. Thermodynamic analysis provides an estimate of the free energy of folding of -9.3 kcal mole-1 at 25 degrees C, consistent with the free energy of folding derived from the protection factors of the most protected protons, indicating that global unfolding is required for exchange of the most protected protons.},
-	Address = {Department of Biochemistry and Biophysics, University of California, San Francisco 94143-0448, USA.},
-	Annote = {in this case the authors had already designed a 24 residue peptide to act as a detergent for membrane proteins; when they crystallized this thing it formed 4-helix bundles
-
-SO...
-it was a short hop to the full 4-helix bundle, they just needed 3 linkers which they made from Gly (3, 4 and 3 Gly's long) which adding it up (10 + 4*24) gives 106--the length of their protein was 108 so not sure what the disacrepancy is
-
-in any case this was a do novo design, no function was designed for this bundle},
-	Au = {Schafmeister, CE and LaPorte, SL and Miercke, LJ and Stroud, RM},
-	Author = {Schafmeister, C E and LaPorte, S L and Miercke, L J and Stroud, R M},
-	Da = {19980115},
-	Date-Added = {2007-03-12 18:10:21 -0700},
-	Date-Modified = {2008-05-29 12:09:20 -0700},
-	Dcom = {19980115},
-	Edat = {1997/12/24},
-	Jid = {9421566},
-	Journal = {Nat Struct Biol},
-	Jt = {Nature structural biology},
-	Keywords = {Bacterial Proteins/chemistry/genetics; Crystallography, X-Ray; Drug Design; Drug Stability; Escherichia coli/genetics; Models, Molecular; Protein Engineering; Protein Folding; Protein Structure, Secondary; Proteins/*chemistry/genetics; Recombinant Fusion Proteins/chemistry/genetics; Solutions; Thermodynamics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Schafmeister/1997.pdf},
-	Lr = {20061115},
-	Mhda = {1997/12/24 00:01},
-	Number = {12},
-	Own = {NLM},
-	Pages = {1039--1046},
-	Pl = {UNITED STATES},
-	Pmid = {9406555},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Bacterial Proteins); 0 (Proteins); 0 (Recombinant Fusion Proteins); 0 (Solutions)},
-	Sb = {IM},
-	So = {Nat Struct Biol. 1997 Dec;4(12):1039-46.},
-	Stat = {MEDLINE},
-	Title = {A designed four helix bundle protein with native-like structure},
-	Volume = {4},
-	Year = {1997},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGgAAAAAAGgAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbQIIMTk5Ny5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQj9cIbAcxQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAADFNjaGFmbWVpc3RlcgAQAAgAAMFxjUkAAAARAAgAAMIbZDwAAAABABgARm0CAEZrzwBGatUARmobAEZkaABAiUMAAgBHaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpTY2hhZm1laXN0ZXI6MTk5Ny5wZGYAAA4AEgAIADEAOQA5ADcALgBwAGQAZgAPAAgAAwBoAHMAcgASAENVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvU2NoYWZtZWlzdGVyLzE5OTcucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QKi4uLy4uLy4uLy4uL0FydGljbGVzL1NjaGFmbWVpc3Rlci8xOTk3LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCXQJiAmsCdgJ6AogCjwKYAsUCygLNAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAto=},
-	Bdsk-Url-1 = {http://www.nature.com/nsmb/journal/v4/n12/abs/nsb1297-1039.html}}
-
-@article{Wei:2003fk,
-	Abstract = {Combinatorial libraries of de novo amino acid sequences can provide a rich source of diversity for the discovery of novel proteins. Randomly generated sequences, however, rarely fold into well ordered protein-like structures. To enhance the quality of a library, diversity must be focused into those regions of sequence space most likely to yield well folded structures. We have constructed focused libraries of de novo sequences by designing the binary pattern of polar and nonpolar amino acids to favor structures that contain abundant secondary structure, while simultaneously burying hydrophobic side chains in the protein interior and exposing hydrophilic side chains to solvent. Because binary patterning specifies only the polar/nonpolar periodicity, but not the identities of the side chains, detailed structural features, including packing interactions, cannot be designed a priori. Can binary patterned libraries nonetheless encode well folded proteins? An unambiguous answer to this question requires determination of a 3D structure. We used NMR spectroscopy to determine the structure of S-824, a novel protein from a recently constructed library of 102-residue sequences. This library is "naive" in that it has not been subjected to high-throughput screens or directed evolution. The experimentally determined structure of S-824 is a four-helix bundle, as specified by the design. As dictated by the binary-code strategy, nonpolar side chains are buried in the protein interior, and polar side chains are exposed to solvent. The polypeptide backbone and buried side chains are well ordered, demonstrating that S-824 is not a molten globule and forms a unique structure. These results show that amino acid sequences that have neither been selected by evolution, nor designed by computer, nor isolated by high-throughput screening, can form native-like structures. These findings validate the binary-code strategy as an effective method for producing vast collections of well folded de novo proteins.},
-	Address = {Department of Chemistry, Princeton University, Princeton, NJ 08544, USA.},
-	Au = {Wei, Y and Kim, S and Fela, D and Baum, J and Hecht, MH},
-	Author = {Wei, Yinan and Kim, Seho and Fela, David and Baum, Jean and Hecht, Michael H},
-	Da = {20031112},
-	Date-Added = {2007-03-12 17:47:29 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20040105},
-	Dep = {20031030},
-	Doi = {10.1073/pnas.1835644100},
-	Edat = {2003/11/01 05:00},
-	Gr = {R01-GM45302/GM/NIGMS; R01-GM62869/GM/NIGMS},
-	Issn = {0027-8424 (Print)},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci U S A},
-	Jt = {Proceedings of the National Academy of Sciences of the United States of America},
-	Keywords = {Amino Acid Sequence; Escherichia coli/metabolism; Magnetic Resonance Spectroscopy; Models, Molecular; Molecular Sequence Data; *Peptide Library; Protein Conformation; Protein Engineering; Protein Folding; Protein Structure, Secondary; Sequence Homology, Amino Acid; Solvents/chemistry},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wei/2003a.pdf},
-	Lr = {20061115},
-	Mhda = {2004/01/06 05:00},
-	Number = {23},
-	Own = {NLM},
-	Pages = {13270--13273},
-	Phst = {2003/10/30 {$[$}aheadofprint{$]$}},
-	Pii = {1835644100},
-	Pl = {United States},
-	Pmid = {14593201},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print-Electronic},
-	Rn = {0 (Peptide Library); 0 (Solvents)},
-	Sb = {IM},
-	Si = {PDB/1P68},
-	So = {Proc Natl Acad Sci U S A. 2003 Nov 11;100(23):13270-3. Epub 2003 Oct 30.},
-	Stat = {MEDLINE},
-	Title = {Solution structure of a de novo protein from a designed combinatorial library},
-	Volume = {100},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbUMJMjAwM2EucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQng8IbQp5QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA1dlaQAAEAAIAADBcY1JAAAAEQAIAADCG6UOAAAAAQAYAEZtQwBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6V2VpOjIwMDNhLnBkZgAADgAUAAkAMgAwADAAMwBhAC4AcABkAGYADwAIAAMAaABzAHIAEgA7VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1dlaS8yMDAzYS5wZGYAABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAiLi4vLi4vLi4vLi4vQXJ0aWNsZXMvV2VpLzIwMDNhLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCRwJMAlUCYAJkAnICeQKCAqcCrAKvAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAArw=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1073/pnas.1835644100}}
-
-@article{Riddle:1997lr,
-	Abstract = {Early protein synthesis is thought to have involved a reduced amino acid alphabet. What is the minimum number of amino acids that would have been needed to encode complex protein folds similar to those found in nature today? Here we show that a small beta-sheet protein, the SH3 domain, can be largely encoded by a five letter amino acid alphabet but not by a three letter alphabet. Furthermore, despite the dramatic changes in sequence, the folding rates of the reduced alphabet proteins are very close to that of the naturally occurring SH3 domain. This finding suggests that despite the vast size of the search space, the rapid folding of biological sequences to their native states is not the result of extensive evolutionary optimization. Instead, the results support the idea that the interactions which stabilize the native state induce a funnel shape to the free energy landscape sufficient to guide the folding polypeptide chain to the proper structure.},
-	Address = {Department of Biochemistry, University of Washington, Seattle 98195, USA.},
-	Annote = {the residues that were NOT targeted were ones that contact the proline-rich peptide ligand (9) and those at a restriction site or oligonucleotide annealing region (8)
-
-the other 40 were all targeted, two variants with simplified sequences that succeeded the binding tests (biopanning and colony lift screen) were isolated FP1 and FP2
-
-38 of the 40 sites in both FP1 and FP2 could be mutated to IKEAG},
-	Au = {Riddle, DS and Santiago, JV and Bray-Hall, ST and Doshi, N and Grantcharova, VP and Yi, Q and Baker, D},
-	Author = {Riddle, D S and Santiago, J V and Bray-Hall, S T and Doshi, N and Grantcharova, V P and Yi, Q and Baker, D},
-	Da = {19971113},
-	Date-Added = {2007-03-12 15:44:15 -0700},
-	Date-Modified = {2008-05-29 12:09:45 -0700},
-	Dcom = {19971113},
-	Edat = {1997/10/23},
-	Jid = {9421566},
-	Journal = {Nat Struct Biol},
-	Jt = {Nature structural biology},
-	Keywords = {*Amino Acid Sequence; Calorimetry; Cloning, Molecular; Conserved Sequence; Genes, src; Guanidine; Kinetics; Models, Molecular; Molecular Sequence Data; Peptide Library; *Protein Biosynthesis; Protein Denaturation; *Protein Folding; *Protein Structure, Secondary; Proteins/*chemistry; Sequence Alignment; Thermodynamics; src Homology Domains},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Riddle/1997.pdf},
-	Lr = {20061115},
-	Mhda = {1997/10/23 00:01},
-	Number = {10},
-	Own = {NLM},
-	Pages = {805--809},
-	Pl = {UNITED STATES},
-	Pmid = {9334745},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Peptide Library); 0 (Proteins); 113-00-8 (Guanidine)},
-	Sb = {IM},
-	So = {Nat Struct Biol. 1997 Oct;4(10):805-9.},
-	Stat = {MEDLINE},
-	Title = {Functional rapidly folding proteins from simplified amino acid sequences},
-	Volume = {4},
-	Year = {1997},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbPAIMTk5Ny5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQjjsIbAQFQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABlJpZGRsZQAQAAgAAMFxjUkAAAARAAgAAMIbY3EAAAABABgARmzwAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpSaWRkbGU6MTk5Ny5wZGYAAA4AEgAIADEAOQA5ADcALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvUmlkZGxlLzE5OTcucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL1JpZGRsZS8xOTk3LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Ball:2002qy,
-	Abstract = {We consider the design of proteins to be simultaneously thermodynamically stable in multiple independent and correlated conformations. We first show that a protein can be trained to fold to multiple independent conformations and calculate its capacity. The number of configurations that it can remember is proportional to the logarithm of the number of amino acid species A, independent of chain length. Next we investigate the recognition of correlated conformations, which we apply to funnel design around a single configuration. The maximum basin of attraction, as parametrized in our model, also depends on the number of amino acid species as ln A. We argue that the extent to which the protein energy landscape can be manipulated is fixed, effecting a trade off between well breadth, well depth, and well number. This emerging picture motivates a clearer understanding of the scope and limits of protein and heteropolymer function.},
-	Address = {Department of Physics, University of Warwick, Coventry CV4 7AL, England. r.c.ball@warwick.ac.uk},
-	Annote = {The size of the funnel in the protein folding landscape is proportional to log(A) where A is the number of amino acid species (alphabet size).},
-	Au = {Ball, RC and Fink, TM},
-	Author = {Ball, Robin C and Fink, Thomas M A},
-	Da = {20021007},
-	Date-Added = {2007-03-07 11:06:43 -0800},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20021218},
-	Dep = {20020919},
-	Edat = {2002/10/09 04:00},
-	Issn = {1539-3755 (Print)},
-	Jid = {101136452},
-	Journal = {Phys Rev E},
-	Jt = {Physical review. E, Statistical, nonlinear, and soft matter physics .},
-	Keywords = {Amino Acids/*chemistry; Biophysics; Kinetics; Models, Statistical; Protein Conformation; Protein Folding; Proteins/*chemistry; Thermodynamics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Ball/2002.pdf},
-	Lr = {20030103},
-	Mhda = {2002/12/19 04:00},
-	Number = {3 Pt 1},
-	Own = {NLM},
-	Pages = {031902},
-	Phst = {2001/11/26 {$[$}received{$]$}; 2002/09/19 {$[$}aheadofprint{$]$}},
-	Pl = {United States},
-	Pmid = {12366147},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print-Electronic},
-	Rn = {0 (Amino Acids); 0 (Proteins)},
-	Sb = {IM},
-	So = {Phys Rev E Stat Nonlin Soft Matter Phys. 2002 Sep;66(3 Pt 1):031902. Epub 2002 Sep 19.},
-	Stat = {MEDLINE},
-	Title = {Protein design depends on the size of the amino acid alphabet},
-	Volume = {66},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa+gIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAlRhsEPT4BQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABEJhbGwAEAAIAADBcY1JAAAAEQAIAADBD7HwAAAAAQAYAEZr6ABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6QmFsbDoyMDAyLnBkZgAADgASAAgAMgAwADAAMgAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9CYWxsLzIwMDIucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0JhbGwvMjAwMi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Liu:2002lr,
-	Abstract = {The primitive data for deducing the Miyazawa-Jernigan contact energy or blocks substitution matrix (BLOSUM) consists of pair frequency counts. Each amino acid corresponds to a conditional probability distribution. Based on the deviation of such a conditional probability from random background, a scheme for the reduction of the amino acid alphabet is proposed. It is observed that an evident discrepancy exists between the reduced alphabets obtained from the raw data of the Miyazawa-Jernigan's and BLOSUM's residue pair counts. Taking a homologous sequence database SCOP40 as a test set, we detect homology with the obtained coarse-grained substitution matrices. It is verified that the reduced alphabets obtained well preserve information contained in the original 20-letter alphabet.},
-	Address = {Institute of Theoretical Physics, China, Beijing 100080, China.},
-	Annote = {PRE_BL50
-PRE_MJ},
-	Au = {Liu, X and Liu, D and Qi, J and Zheng, WM},
-	Author = {Liu, X and Liu, D and Qi, J and Zheng, W-M},
-	Da = {20020920},
-	Date-Added = {2007-03-07 10:38:11 -0800},
-	Date-Modified = {2008-05-29 12:23:51 -0700},
-	Dcom = {20021217},
-	Dep = {20020823},
-	Edat = {2002/09/21 10:00},
-	Group = {Alphabets; Reviewed; ROC; Forward; Printed; Backward},
-	Jid = {101136452},
-	Journal = {Phys Rev E},
-	Jt = {Physical review. E, Statistical, nonlinear, and soft matter physics},
-	Keywords = {Algorithms; Amino Acid Sequence; Amino Acids/*chemistry; Cluster Analysis; Models, Statistical; Molecular Sequence Data},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Liu/2002.pdf},
-	Lr = {20061115},
-	Mhda = {2002/12/18 04:00},
-	Number = {2 Pt 1},
-	Own = {NLM},
-	Pages = {021906},
-	Phst = {2001/12/30 {$[$}received{$]$}; 2002/08/23 {$[$}aheadofprint{$]$}},
-	Pl = {United States},
-	Pmid = {12241213},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print-Electronic},
-	Read = {Yes},
-	Rn = {0 (Amino Acids)},
-	Sb = {IM},
-	So = {Phys Rev E Stat Nonlin Soft Matter Phys. 2002 Aug;66(2 Pt 1):021906. Epub 2002 Aug 23.},
-	Stat = {MEDLINE},
-	Title = {Simplified amino acid alphabets based on deviation of conditional probability from random background},
-	Volume = {66},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbKYIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAlRxsA263EAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA0xpdQAAEAAIAADBcY1JAAAAEQAIAADAN1vxAAAAAQAYAEZspgBGa88ARmrVAEZqGwBGZGgAQIlDAAIAPmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6TGl1OjIwMDIucGRmAA4AEgAIADIAMAAwADIALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTGl1LzIwMDIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTGl1LzIwMDIucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==}}
-
-@article{Pandini:2007fk,
-	Abstract = {MOTIVATION: The size of current protein databases is a challenge for many Bioinformatics applications, both in terms of processing speed and information redundancy. It may be therefore desirable to efficiently reduce the database of interest to a maximally representative subset. RESULTS: The MinSet method employs a combination of a Suffix Tree and a Genetic Algorithm for the generation, selection and assessment of database subsets. The approach is generally applicable to any type of string-encoded data, allowing for a drastic reduction of the database size whilst retaining most of the information contained in the original set. We demonstrate the performance of the method on a database of protein domain structures encoded as strings. We used the SCOP40 domain database by translating protein structures into character strings by means of a structural alphabet and by extracting optimised subsets according to an entropy score that is based on a constant-length fragment dictionary. Therefore, optimised subsets are maximally representative for the distribution and range of local structures. Subsets containing only 10% of the SCOP structure classes show a coverage of > 90% for fragments of length 1-4. AVAILABILITY: http://mathbio.nimr.mrc.ac.uk/~jkleinj/MinSet. SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.},
-	Address = {Dipartimento di Scienze dell'Ambiente e del Territorio, Universita degli Studi di Milano-Bicocca, Milano, Italy.},
-	Author = {Pandini, A and Bonati, L and Fraternali, F and Kleinjung, J},
-	Da = {20070105},
-	Date-Added = {2007-02-15 16:53:34 -0800},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dep = {20070103},
-	Doi = {10.1093/bioinformatics/btl637},
-	Edat = {2007/01/06 09:00},
-	Issn = {1460-2059 (Electronic)},
-	Jid = {9808944},
-	Journal = {Bioinformatics},
-	Language = {ENG},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Pandini/2007.pdf},
-	Mhda = {2007/01/06 09:00},
-	Own = {NLM},
-	Pii = {btl637},
-	Pmid = {17204463},
-	Pst = {aheadofprint},
-	Pt = {JOURNAL ARTICLE},
-	Pubm = {Print-Electronic},
-	So = {Bioinformatics. 2007 Jan 3;.},
-	Stat = {Publisher},
-	Title = {MinSet : A general approach to derive maximally representative database subsets by using fragment dictionaries and its application to the SCOP database},
-	Year = {2007},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbNUIMjAwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABA4IcH4p+JQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1BhbmRpbmkAABAACAAAwXGNSQAAABEACAAAwfkYYgAAAAEAGABGbNUARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlBhbmRpbmk6MjAwNy5wZGYADgASAAgAMgAwADAANwAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9QYW5kaW5pLzIwMDcucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUGFuZGluaS8yMDA3LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/btl637}}
-
-@article{Suhrer:2006lr,
-	Abstract = {SUMMARY: The database SCOP (Structural Classification Of Proteins) has become a major resource in bioinformatics and protein science. A particular strength of SCOP is the flexibility of its rules enabling the preservation of the many details spotted by experts in the classification process. Here we endow classic SCOP Families with quantified structural information and comment on the structural diversity found in the SCOP hierarchy. AVAILABILITY: Quantified SCOP (QSCOP) is available as a public WEB service. http://services.came.sbg.ac.at.},
-	Address = {Center of Applied Molecular Engineering, Department of Bioinformatics, Division of Molecular Biology, University of Salzburg, Hellbrunnerstrasse 34, 5020 Salzburg, Austria.},
-	Author = {Suhrer, SJ and Wiederstein, M and Sippl, MJ},
-	Da = {20061127},
-	Date-Added = {2007-02-15 16:53:04 -0800},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dep = {20061124},
-	Doi = {10.1093/bioinformatics/btl594},
-	Edat = {2006/11/28 09:00},
-	Issn = {1460-2059 (Electronic)},
-	Jid = {9808944},
-	Journal = {Bioinformatics},
-	Language = {ENG},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Suhrer/2006.pdf},
-	Mhda = {2006/11/28 09:00},
-	Own = {NLM},
-	Pii = {btl594},
-	Pmid = {17127679},
-	Pst = {aheadofprint},
-	Pt = {JOURNAL ARTICLE},
-	Pubm = {Print-Electronic},
-	So = {Bioinformatics. 2006 Nov 24;.},
-	Stat = {Publisher},
-	Title = {QSCOP -- SCOP quantified by structural relationships},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbSMIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABA4MMH4qBVQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABlN1aHJlcgAQAAgAAMFxjUkAAAARAAgAAMH5GJUAAAABABgARm0jAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpTdWhyZXI6MjAwNi5wZGYAAA4AEgAIADIAMAAwADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvU3VocmVyLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL1N1aHJlci8yMDA2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/btl594}}
-
-@article{Pearson:1991lr,
-	Abstract = {The sensitivity and selectivity of the FASTA and the Smith-Waterman protein sequence comparison algorithms were evaluated using the superfamily classification provided in the National Biomedical Research Foundation/Protein Identification Resource (PIR) protein sequence database. Sequences from each of the 34 superfamilies in the PIR database with 20 or more members were compared against the protein sequence database. The similarity scores of the related and unrelated sequences were determined using either the FASTA program or the Smith-Waterman local similarity algorithm. These two sets of similarity scores were used to evaluate the ability of the two comparison algorithms to identify distantly related protein sequences. The FASTA program using the ktup = 2 sensitivity setting performed as well as the Smith-Waterman algorithm for 19 of the 34 superfamilies. Increasing the sensitivity by setting ktup = 1 allowed FASTA to perform as well as Smith-Waterman on an additional 7 superfamilies. The rigorous Smith-Waterman method performed better than FASTA with ktup = 1 on 8 superfamilies, including the globins, immunoglobulin variable regions, calmodulins, and plastocyanins. Several strategies for improving the sensitivity of FASTA were examined. The greatest improvement in sensitivity was achieved by optimizing a band around the best initial region found for every library sequence. For every superfamily except the globins and immunoglobulin variable regions, this strategy was as sensitive as a full Smith-Waterman. For some sequences, additional sensitivity was achieved by including conserved but nonidentical residues in the lookup table used to identify the initial region.},
-	Address = {Department of Biochemistry, University of Virginia, Charlottesville 22908.},
-	Au = {Pearson, WR},
-	Author = {Pearson, W R},
-	Da = {19920305},
-	Date-Added = {2007-02-07 11:39:30 -0800},
-	Date-Modified = {2008-05-29 12:27:17 -0700},
-	Dcom = {19920305},
-	Edat = {1991/11/01},
-	Gr = {LM04969/LM/NLM},
-	Jid = {8800135},
-	Journal = {Genomics},
-	Jt = {Genomics},
-	Keywords = {*Algorithms; *Amino Acid Sequence; *Databases, Factual; Gene Library; *Information Storage and Retrieval; Molecular Sequence Data; Proteins/*classification; Sensitivity and Specificity; Sequence Alignment; Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Pearson/1991.pdf},
-	Lr = {20061115},
-	Mhda = {1991/11/01 00:01},
-	Number = {3},
-	Own = {NLM},
-	Pages = {635--650},
-	Pii = {0888-7543(91)90071-L},
-	Pl = {UNITED STATES},
-	Pmid = {1774068},
-	Pst = {ppublish},
-	Pt = {Comparative Study; Journal Article; Research Support, U.S. Gov't, P.H.S.},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Genomics. 1991 Nov;11(3):635-50.},
-	Stat = {MEDLINE},
-	Title = {Searching protein sequence libraries: comparison of the sensitivity and selectivity of the {Smith-Waterman} and {FASTA} algorithms},
-	Volume = {11},
-	Year = {1991},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbNgIMTk5MS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA+7X8HvpPMAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1BlYXJzb24AABAACAAAwXGNSQAAABEACAAAwfAVcwAAAAEAGABGbNgARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlBlYXJzb246MTk5MS5wZGYADgASAAgAMQA5ADkAMQAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9QZWFyc29uLzE5OTEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUGVhcnNvbi8xOTkxLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Holm:1996lr,
-	Abstract = {The comparison of the three-dimensional shapes of protein molecules poses a complex algorithmic problem. Its solution provides biologists with computational tools to organize the rapidly growing set of thousands of known protein shapes, to identify new types of protein architecture, and to discover unexpected evolutionary relations, reaching back billions of years, between protein molecules. Protein shape comparison also improves tools for identifying gene functions in genome databases by defining the essential sequence-structure features of a protein family. Finally, an exhaustive all-on-all shape comparison provides a map of physical attractor regions in the abstract shape space of proteins, with implications for the processes of protein folding and evolution.},
-	Address = {European Bioinformatics Institute, European Molecular Biology Laboratory, Hinxton Hall, Cambridge CB10 1SD, UK.},
-	Au = {Holm, L and Sander, C},
-	Author = {Holm, L and Sander, C},
-	Da = {19960903},
-	Date-Added = {2007-02-06 09:14:26 -0800},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19960903},
-	Edat = {1996/08/02},
-	Issn = {0036-8075 (Print)},
-	Jid = {0404511},
-	Journal = {Science},
-	Jt = {Science},
-	Keywords = {*Algorithms; Amino Acid Sequence; Computer Communication Networks; *Databases, Factual; *Evolution, Molecular; Models, Molecular; Molecular Sequence Data; *Protein Conformation; Protein Folding; Protein Structure, Secondary; Protein Structure, Tertiary; Proteins/*chemistry; Sequence Alignment},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Holm/1996.pdf},
-	Lr = {20061115},
-	Mhda = {1996/08/02 00:01},
-	Number = {5275},
-	Own = {NLM},
-	Pages = {595--603},
-	Pl = {UNITED STATES},
-	Pmid = {8662544},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Review},
-	Pubm = {Print},
-	Rf = {36},
-	Rn = {0 (Proteins)},
-	Sb = {IM; S},
-	Si = {PDB/2DRP},
-	So = {Science. 1996 Aug 2;273(5275):595-603.},
-	Stat = {MEDLINE},
-	Title = {Mapping the protein universe},
-	Volume = {273},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbGgIMTk5Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA+NNcHt8rlQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABEhvbG0AEAAIAADBcY1JAAAAEQAIAADB7mM5AAAAAQAYAEZsaABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6SG9sbToxOTk2LnBkZgAADgASAAgAMQA5ADkANgAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Ib2xtLzE5OTYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0hvbG0vMTk5Ni5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Dietmann:2001kx,
-	Abstract = {The Dali Domain Dictionary (http://www.ebi.ac.uk/dali/domain) is a numerical taxonomy of all known structures in the Protein Data Bank (PDB). The taxonomy is derived fully automatically from measurements of structural, functional and sequence similarities. Here, we report the extension of the classification to match the traditional four hierarchical levels corresponding to: (i) supersecondary structural motifs (attractors in fold space), (ii) the topology of globular domains (fold types), (iii) remote homologues (functional families) and (iv) homologues with sequence identity above 25% (sequence families). The computational definitions of attractors and functional families are new. In September 2000, the Dali classification contained 10 531 PDB entries comprising 17 101 chains, which were partitioned into five attractor regions, 1375 fold types, 2582 functional families and 3724 domain sequence families. Sequence families were further associated with 99 582 unique homologous sequences in the HSSP database, which increases the number of effectively known structures several-fold. The resulting database contains the description of protein domain architecture, the definition of structural neighbours around each known structure, the definition of structurally conserved cores and a comprehensive library of explicit multiple alignments of distantly related protein families.},
-	Address = {Structural Genomics Group, EMBL-EBI, Cambridge CB10 1SD, UK.},
-	Au = {Dietmann, S and Park, J and Notredame, C and Heger, A and Lappe, M and Holm, L},
-	Author = {Dietmann, S and Park, J and Notredame, C and Heger, A and Lappe, M and Holm, L},
-	Da = {20010104},
-	Date-Added = {2007-01-26 09:53:05 -0800},
-	Date-Modified = {2008-05-29 12:25:54 -0700},
-	Dcom = {20010208},
-	Edat = {2000/01/11 19:15},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research},
-	Keywords = {Amino Acid Motifs; Amino Acid Sequence; *Databases, Factual; Evolution, Molecular; Internet; Protein Structure, Tertiary; *Proteins/chemistry/classification/genetics; Sequence Alignment; Sequence Homology, Amino Acid},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Dietmann/2001.pdf},
-	Lr = {20061115},
-	Mhda = {2001/03/03 10:01},
-	Number = {1},
-	Own = {NLM},
-	Pages = {55--57},
-	Pl = {ENGLAND},
-	Pmid = {11125048},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Nucleic Acids Res. 2001 Jan 1;29(1):55-7.},
-	Stat = {MEDLINE},
-	Title = {A fully automatic evolutionary classification of protein folds: {Dali Domain Dictionary} version 3},
-	Volume = {29},
-	Year = {2001},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbCcIMjAwMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA7resHff+hQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACERpZXRtYW5uABAACAAAwXGNSQAAABEACAAAwd/waAAAAAEAGABGbCcARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkRpZXRtYW5uOjIwMDEucGRmAAAOABIACAAyADAAMAAxAC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0RpZXRtYW5uLzIwMDEucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0RpZXRtYW5uLzIwMDEucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Holm:1998fj,
-	Abstract = {The rapid growth in the number of experimentally determined three-dimensional protein structures has sharpened the need for comprehensive and up-to-date surveys of known structures. Classic work on protein structure classification has made it clear that a structural survey is best carried out at the level of domains, i.e., substructures that recur in evolution as functional units in different protein contexts. We present a method for automated domain identification from protein structure atomic coordinates based on quantitative measures of compactness and, as the new element, recurrence. Compactness criteria are used to recursively divide a protein into a series of successively smaller and smaller substructures. Recurrence criteria are used to select an optimal size level of these substructures, so that many of the chosen substructures are common to different proteins at a high level of statistical significance. The joint application of these criteria automatically yields consistent domain definitions between remote homologs, a result difficult to achieve using compactness criteria alone. The method is applied to a representative set of 1,137 sequence-unique protein families covering 6,500 known structures. Clustering of the resulting set of domains (substructures) yields 594 distinct fold classes (types of substructures). The Dali Domain Dictionary (http://www.embl-ebi.ac.uk/dali/) not only provides a global structural classification, but also a comprehensive description of families of protein sequences grouped around representative proteins of known structure. The classification will be continuously updated and can serve as a basis for improving our understanding of protein evolution and function and for evolving optimal strategies to complete the map of all natural protein structures.},
-	Address = {EMBL-EBI, Wellcome Trust Genome Campus, Cambridge, United Kingdom.},
-	Aid = {10.1002/(SICI)1097-0134(19981001)33:1<88::AID-PROT8>3.0.CO;2-H {$[$}pii{$]$}},
-	Annote = {Defines the Z-score used to cluster domains and uses a cutoff of Z = 2 to cluster folds.},
-	Au = {Holm, L and Sander, C},
-	Author = {Holm, L and Sander, C},
-	Da = {19990119},
-	Date-Added = {2007-01-26 09:32:18 -0800},
-	Date-Modified = {2008-05-29 12:25:12 -0700},
-	Dcom = {19990119},
-	Edat = {1998/09/19 02:17},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins},
-	Keywords = {Amino Acid Sequence; Molecular Sequence Data; *Protein Conformation},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Holm/1998},
-	Lr = {20001218},
-	Mhda = {2000/06/20 09:00},
-	Number = {1},
-	Own = {NLM},
-	Pages = {88-96},
-	Pl = {UNITED STATES},
-	Pmid = {9741847},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	So = {Proteins. 1998 Oct 1;33(1):88-96.},
-	Stat = {MEDLINE},
-	Title = {Dictionary of recurrent domains in protein structures},
-	Volume = {33},
-	Year = {1998},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQF4AAAAAAF4AAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbGgEMTk5OAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA7qhsHfewlQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABEhvbG0AEAAIAADBcY1JAAAAEQAIAADB3+uJAAAAAQAYAEZsaABGa88ARmrVAEZqGwBGZGgAQIlDAAIAO2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6SG9sbToxOTk4AAAOAAoABAAxADkAOQA4AA8ACAADAGgAcwByABIAN1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Ib2xtLzE5OTgAABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAeLi4vLi4vLi4vLi4vQXJ0aWNsZXMvSG9sbS8xOTk40h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQI1AjoCQwJOAlICYAJnAnACkQKWApkAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACpg==}}
-
-@article{Holm:1999uq,
-	Abstract = {Dali and HSSP are derived databases organizing protein space in the structurally known regions. We use an automatic structure alignment program (Dali) for the classification of all known 3D structures based on all-against-all comparison of 3D structures in the Protein Data Bank. The HSSP database associates 1D sequences with known 3D structures using a position-weighted dynamic programming method for sequence profile alignment (MaxHom). As a result, the HSSP database not only provides aligned sequence families, but also implies secondary and tertiary structures covering 36% of all sequences in Swiss-Prot. The structure classification by Dali and the sequence families in HSSP can be browsed jointly from a web interface providing a rich network of links between neighbours in fold space, between domains and proteins, and between structures and sequences. In particular, this results in a database of explicit multiple alignments of protein families in the twilight zone of sequence similarity. The organization of protein structures and families provides a map of the currently known regions of the protein universe that is useful for the analysis of folding principles, for the evolutionary unification of protein families and for maximizing the information return from experimental structure determination. The databases are available from http://www.embl-ebi.ac.uk/dali/},
-	Address = {European Bioinformatics Institute, EMBL-EBI, Genome Campus, Cambridge CB10 1SD, UK. holm@embl-ebi.ac.uk},
-	Aid = {gkc097 {$[$}pii{$]$}},
-	Au = {Holm, L and Sander, C},
-	Author = {Holm, L and Sander, C},
-	Da = {19990316},
-	Date-Added = {2007-01-26 09:01:11 -0800},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19990316},
-	Edat = {1998/12/10},
-	Issn = {0305-1048 (Print)},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research},
-	Keywords = {*Databases, Factual; Evolution, Molecular; Information Storage and Retrieval; Internet; Protein Conformation; *Protein Folding; Proteins/*chemistry/*classification/metabolism; Reproducibility of Results; *Sequence Alignment; Sequence Homology, Amino Acid; Variation (Genetics)},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Holm/1999.pdf},
-	Lr = {20011114},
-	Mhda = {1998/12/10 00:01},
-	Number = {1},
-	Own = {NLM},
-	Pages = {244-7},
-	Pl = {ENGLAND},
-	Pmid = {9847191},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM; S},
-	So = {Nucleic Acids Res. 1999 Jan 1;27(1):244-7.},
-	Stat = {MEDLINE},
-	Title = {Protein folds and families: sequence and structure alignments},
-	Volume = {27},
-	Year = {1999},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbGgIMTk5OS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA7qBcHfcptQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABEhvbG0AEAAIAADBcY1JAAAAEQAIAADB3+MbAAAAAQAYAEZsaABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6SG9sbToxOTk5LnBkZgAADgASAAgAMQA5ADkAOQAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Ib2xtLzE5OTkucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0hvbG0vMTk5OS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Holm:1993qy,
-	Abstract = {With a rapidly growing pool of known tertiary structures, the importance of protein structure comparison parallels that of sequence alignment. We have developed a novel algorithm (DALI) for optimal pairwise alignment of protein structures. The three-dimensional co-ordinates of each protein are used to calculate residue--residue (C{$[$}alpha{$]$}--C{$[$}alpha{$]$};) distance matrices. The distance matrices are first decomposed into elementary contact patterns, e.g. hexapeptide--hexapeptide submatrices. Then, similar contact patterns in the two matrices are paired and combined into larger consistent sets of pairs. A Monte Carlo procedure is used to optimize a similarity score defined in terms of equivalent intramolecular distances. Several alignments are optimized in parallel, leading to simultaneous detection of the best, second-best and so on solutions. The method allows sequence gaps of any length, reversal of chain direction and free topological connectivity of aligned segments. Sequential connectivity can be imposed as an option. The method is fully automatic and identifies structural resemblances and common structural cores accurately and sensitively, even in the presence of geometrical distortions. An all-against-all alignment of over 200 representative protein structures results in an objective classification of known three-dimensional folds in agreement with visual classifications. Unexpected topological similarities of biological interest have been detected, e.g. between the bacterial toxin colicin A and globins, and between the eukaryotic POU-specific DNA-binding domain and the bacterial {$[$}lambda{$]$} repressor.},
-	Author = {Holm, L and Sander, C},
-	Date-Added = {2007-01-26 08:44:40 -0800},
-	Date-Modified = {2008-05-29 14:28:12 -0700},
-	Journal = {J Mol Biol},
-	Keywords = {classification of protein folds; database searching; distance geometry; pattern recognition; protein structure alignment},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Holm/1993.pdf},
-	Number = {1},
-	Pages = {123--138},
-	Title = {Protein Structure Comparison by Alignment of Distance Matrices},
-	Ty = {JOUR},
-	Volume = {233},
-	Year = {1993},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbGgIMTk5My5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA7pcMHfb5JQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABEhvbG0AEAAIAADBcY1JAAAAEQAIAADB3+ASAAAAAQAYAEZsaABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6SG9sbToxOTkzLnBkZgAADgASAAgAMQA5ADkAMwAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Ib2xtLzE5OTMucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0hvbG0vMTk5My5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6WK7-45PKM2N-75/2/b869e064e2fad7571693dd6f0246b1e4}}
-
-@article{Holm:1995fk,
-	Author = {Holm, Liisa and Sander, Chris},
-	Date-Added = {2007-01-26 08:44:03 -0800},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Journal = {Trends in Biochemical Sciences},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Holm/1995.pdf},
-	Number = {11},
-	Pages = {478--480},
-	Title = {Dali: a network tool for protein structure comparison},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6TCV-40W0TN7-56/2/d6dd7b03913813204b60b938b8b73346},
-	Volume = {20},
-	Year = {1995},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbGgIMTk5NS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA7pTcHfbqNQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABEhvbG0AEAAIAADBcY1JAAAAEQAIAADB398jAAAAAQAYAEZsaABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6SG9sbToxOTk1LnBkZgAADgASAAgAMQA5ADkANQAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Ib2xtLzE5OTUucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0hvbG0vMTk5NS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6TCV-40W0TN7-56/2/d6dd7b03913813204b60b938b8b73346}}
-
-@article{Li:2003lr,
-	Abstract = {It is well known that there are some similarities among various naturally occurring amino acids. Thus, the complexity in protein systems could be reduced by sorting these amino acids with similarities into groups and then protein sequences can be simplified by reduced alphabets. This paper discusses how to group similar amino acids and whether there is a minimal amino acid alphabet by which proteins can be folded. Various reduced alphabets are obtained by reserving the maximal information for the simplified protein sequence compared with the parent sequence using global sequence alignment. With these reduced alphabets and simplified similarity matrices, we achieve recognition of the protein fold based on the similarity score of the sequence alignment. The coverage in dataset SCOP40 for various levels of reduction on the amino acid types is obtained, which is the number of homologous pairs detected by program BLAST to the number marked by SCOP40. For the reduced alphabets containing 10 types of amino acids, the ability to detect distantly related folds remains almost at the same level as that by the alphabet of 20 types of amino acids, which implies that 10 types of amino acids may be the degree of freedom for characterizing the complexity in proteins.},
-	Address = {National Laboratory of Solid State Microstructure, Institute of Biophysics and Department of Physics, Nanjing University, China.},
-	Annote = {LWW-I
-LWW-NI},
-	Au = {Li, T and Fan, K and Wang, J and Wang, W},
-	Author = {Li, T and Fan, K and Wang, J and Wang, W},
-	Da = {20030626},
-	Date-Added = {2007-01-26 08:28:10 -0800},
-	Date-Modified = {2008-05-29 12:14:05 -0700},
-	Dcom = {20040304},
-	Edat = {2003/06/27 05:00},
-	Group = {Alphabets; Reviewed; Forward; Printed; Backward; ROC},
-	Jid = {8801484},
-	Journal = {Protein Eng},
-	Jt = {Protein engineering},
-	Keywords = {Amino Acid Sequence; *Data Interpretation, Statistical; Databases, Protein; Molecular Sequence Data; *Sequence Alignment; *Sequence Analysis, Protein},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Li/2003.pdf},
-	Lr = {20061115},
-	Mhda = {2004/03/05 05:00},
-	Number = {5},
-	Own = {NLM},
-	Pages = {323-30},
-	Pl = {England},
-	Pmid = {12826723},
-	Pst = {ppublish},
-	Pt = {Journal Article; Research Support, Non-U.S. Gov't},
-	Pubm = {Print},
-	Read = {Yes},
-	Sb = {IM},
-	So = {Protein Eng. 2003 May;16(5):323-30.},
-	Stat = {MEDLINE},
-	Title = {Reduction of protein sequence complexity by residue grouping},
-	Volume = {16},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGCAAAAAAGCAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbKAIMjAwMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA7oBMHfa4JQREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAAkxpABAACAAAwXGNSQAAABEACAAAwd/cAgAAAAEAGABGbKAARmvPAEZq1QBGahsARmRoAECJQwACAD1oc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkxpOjIwMDMucGRmAAAOABIACAAyADAAMAAzAC4AcABkAGYADwAIAAMAaABzAHIAEgA5VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0xpLzIwMDMucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIC4uLy4uLy4uLy4uL0FydGljbGVzL0xpLzIwMDMucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQI/AkQCTQJYAlwCagJxAnoCnQKiAqUAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACsg==}}
-
-@article{Solis:2000lr,
-	Abstract = {In an effort to quantify loss of information in the processing of protein bioinformatic data, we examine how representations of amino acid sequence and backbone conformation affect the quantity of accessible structural information from local sequence. We propose a method to extract the maximum amount of peptide backbone structural information available in local sequence fragments, given a finite structural data set. Using methods of information theory, we develop an unbiased measure of local structural information that gauges changes in structural distributions when different representations of secondary structure and local sequence are used. We find that the manner in which backbone structure is represented affects the amount and quality of structural information that may be extracted from local sequence. Representations based on virtual bonds capture more structural information from local sequence than a three-state assignment scheme (helix/strand/loop). Furthermore, we find that amino acids show significant kinship with respect to the backbone structural information they carry, so that a collapse of the amino acid alphabet can be accomplished without severely affecting the amount of extractable information. This strategy is critical in optimizing the utility of a limited database of experimentally solved protein structures. Finally, we discuss the similarities within and differences between groups of amino acids in their roles in the local folding code and recognize specific amino acids critical in the formation of local structure.},
-	Address = {Department of Biomathematical Sciences, Mount Sinai School of Medicine, New York, New York 10029, USA.},
-	Aid = {10.1002/(SICI)1097-0134(20000201)38:2<149::AID-PROT4>3.0.CO;2-{\#} {$[$}pii{$]$}},
-	Annote = {They describe a couple of reduced alphabet trees based on structural data rather than a substitution matrix; these alphabets are designed to preserve the most local information about secondary structure/backbone preferences.
-
-GBMR/DSSP},
-	Au = {Solis AD and Rackovsky S},
-	Author = {Solis, A D and Rackovsky, S},
-	Da = {20000223},
-	Date-Added = {2006-10-03 18:24:49 -0700},
-	Date-Modified = {2008-05-29 12:19:51 -0700},
-	Dcom = {20000223},
-	Edat = {2000/02/03 09:00},
-	Group = {Alphabets; Reviewed; ROC; Forward; Printed; Backward},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins.},
-	Keywords = {Amino Acids, Models, Chemical, Models, Molecular, *Protein Conformation, Research Support, Non-U.S. Gov't, Structure-Activity Relationship},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Solis/2000.pdf},
-	Lr = {20041117},
-	Mhda = {2000/02/26 09:00},
-	Number = {2},
-	Own = {NLM},
-	Pages = {149--164},
-	Pl = {UNITED STATES},
-	Pmid = {10656262},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Read = {Yes},
-	Rn = {0 (Amino Acids)},
-	Sb = {IM},
-	So = {Proteins. 2000 Feb 1;38(2):149-64.},
-	Stat = {MEDLINE},
-	Title = {Optimized representations and maximal information in proteins},
-	Volume = {38},
-	Year = {2000},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbRoIMjAwMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnK1cEPmf5QREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVNvbGlzAAAQAAgAAMFxjUkAAAARAAgAAMEP/G4AAAABABgARm0aAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpTb2xpczoyMDAwLnBkZgAOABIACAAyADAAMAAwAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1NvbGlzLzIwMDAucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvU29saXMvMjAwMC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{cao:031921,
-	Author = {Hai-Bo Cao and Cai-Zhuang Wang and Drena Dobbs and Yungok Ihm and Kai-Ming Ho},
-	Date-Added = {2006-09-29 16:23:31 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Eid = {031921},
-	Journal = {Physical Review E},
-	Keywords = {proteins; random processes; molecular biophysics; macromolecules},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Cao/2006.pdf},
-	Number = {3},
-	Numpages = {4},
-	Pages = {031921},
-	Publisher = {APS},
-	Title = {Codability criterion for picking proteinlike structures from random three-dimensional configurations},
-	Url = {http://link.aps.org/abstract/PRE/v74/e031921},
-	Volume = {74},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbAgIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKK8FC5fsAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA0NhbwAAEAAIAADBcY1JAAAAEQAIAADBQ0hrAAAAAQAYAEZsCABGa88ARmrVAEZqGwBGZGgAQIlDAAIAPmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6Q2FvOjIwMDYucGRmAA4AEgAIADIAMAAwADYALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvQ2FvLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ2FvLzIwMDYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==},
-	Bdsk-Url-1 = {http://link.aps.org/abstract/PRE/v74/e031921}}
-
-@article{Sanchez:2006fk,
-	Author = {Sanchez, I. E. and Tejero, J. and Gomez-Moreno, C. and Medina, M. and Serrano, L.},
-	Date-Added = {2006-09-29 10:33:58 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Journal = {J Mol Biol},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Sanchez/2006.pdf},
-	Number = {2},
-	Pages = {422-432},
-	Title = {Point Mutations in Protein Globular Domains: Contributions from Function, Stability and Misfolding},
-	Volume = {363},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbP0IMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKx8FCpSVQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1NhbmNoZXoAABAACAAAwXGNSQAAABEACAAAwUMHlQAAAAEAGABGbP0ARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlNhbmNoZXo6MjAwNi5wZGYADgASAAgAMgAwADAANgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9TYW5jaGV6LzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvU2FuY2hlei8yMDA2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Jonson:2001lr,
-	Abstract = {By analysing the surface composition of a set of protein 3D structures, complemented with predicted surface compositional information for homologous proteins, we have found significant evidence for a layer composition of protein structures. In the innermost and outermost parts of proteins there is a net negative charge, while the middle has a net positive charge. In addition, our findings indicate that the concept of conservative mutation needs substantial revision, e.g. very different spatial preferences were found for glutamic acid and aspartic acid. The alanine screening often used in protein engineering projects involves the substitution of residues to alanine, based on the assumption that alanine is a "neutral" residue. However, alanine has a high negative correlation with all but the non-polar residues. We therefore propose the use of, for example, serine as a substitute for the residues that are negatively correlated with alanine.},
-	Address = {Biostructure and Protein Engineering Group, Department of Life Sciences, Aalborg University, Sohngaardsholmsvej 49, DK-9000 Aalborg, Denmark.},
-	Au = {Jonson PH and Petersen SB},
-	Author = {Jonson, P H and Petersen, S B},
-	Da = {20010730},
-	Date-Added = {2006-09-26 14:46:30 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20020402},
-	Edat = {2001/07/31 10:00},
-	Issn = {0269-2139 (Print)},
-	Jid = {8801484},
-	Journal = {Protein Eng},
-	Jt = {Protein engineering.},
-	Keywords = {Amino Acid Sequence, Aspartic Acid/chemistry, Electrostatics, Glutamic Acid/chemistry, Ions, *Models, Molecular, *Mutation, Protein Conformation, Research Support, Non-U.S. Gov't, *Sequence Homology, Amino Acid, Solvents/pharmacology},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Jonson/2001.pdf},
-	Lr = {20041117},
-	Mhda = {2002/04/03 10:01},
-	Number = {6},
-	Own = {NLM},
-	Pages = {397-402},
-	Pl = {England},
-	Pmid = {11477218},
-	Pst = {ppublish},
-	Pt = {Journal Article, Validation Studies},
-	Pubm = {Print},
-	Rn = {0 (Ions), 0 (Solvents), 56-84-8 (Aspartic Acid), 56-86-0 (Glutamic Acid)},
-	Sb = {IM},
-	So = {Protein Eng. 2001 Jun;14(6):397-402.},
-	Stat = {MEDLINE},
-	Title = {A critical view on conservative mutations},
-	Volume = {14},
-	Year = {2001},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbHUIMjAwMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJU8EPlQBQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkpvbnNvbgAQAAgAAMFxjUkAAAARAAgAAMEP93AAAAABABgARmx1AEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpKb25zb246MjAwMS5wZGYAAA4AEgAIADIAMAAwADEALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvSm9uc29uLzIwMDEucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0pvbnNvbi8yMDAxLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Yang:2006uq,
-	Abstract = {As more protein structures become available and structural genomics efforts provide structural models in a genome-wide strategy, there is a growing need for fast and accurate methods for discovering homologous proteins and evolutionary classifications of newly determined structures. We have developed 3D-BLAST, in part, to address these issues. 3D-BLAST is as fast as BLAST and calculates the statistical significance (E-value) of an alignment to indicate the reliability of the prediction. Using this method, we first identified 23 states of the structural alphabet that represent pattern profiles of the backbone fragments and then used them to represent protein structure databases as structural alphabet sequence databases (SADB). Our method enhanced BLAST as a search method, using a new structural alphabet substitution matrix (SASM) to find the longest common substructures with high-scoring structured segment pairs from an SADB database. Using personal computers with Intel Pentium4 (2.8 GHz) processors, our method searched more than 10 000 protein structures in 1.3 s and achieved a good agreement with search results from detailed structure alignment methods. [3D-BLAST is available at http://3d-blast.life.nctu.edu.tw].},
-	Address = {Department of Biological Science and Technology, National Chiao Tung University, Hsinchu, 30050, Taiwan. moon@faculty.nctu.edu.tw},
-	Aid = {34/13/3646 {$[$}pii{$]$}, 10.1093/nar/gkl395 {$[$}doi{$]$}},
-	Au = {Yang JM and Tung CH},
-	Author = {Yang, Jinn-Moon and Tung, Chi-Hua},
-	Da = {20060803},
-	Date-Added = {2006-09-21 11:11:39 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20060816},
-	Dep = {20060802},
-	Edat = {2006/08/04 09:00},
-	Issn = {1362-4962 (Electronic)},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research.},
-	Keywords = {Acetyltransferases/chemistry, Data Interpretation, Statistical, *Databases, Protein, *Evolution, Molecular, Protein Conformation, Proteins/*classification/genetics, Research Support, Non-U.S. Gov't, Sequence Alignment, Sequence Analysis, Protein, *Software, *Structural Homology, Protein},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Yang/2006.pdf},
-	Mhda = {2006/08/17 09:00},
-	Number = {13},
-	Own = {NLM},
-	Pages = {3646-59},
-	Phst = {2006 {$[$}ppublish{$]$}},
-	Pl = {England},
-	Pmid = {16885238},
-	Pst = {epublish},
-	Pt = {Evaluation Studies, Journal Article},
-	Pubm = {Electronic-Print},
-	Rn = {0 (Proteins), EC 2.3.1. (Acetyltransferases)},
-	Sb = {IM},
-	So = {Nucleic Acids Res. 2006 Aug 2;34(13):3646-59. Print 2006.},
-	Stat = {MEDLINE},
-	Title = {Protein structure database search and evolutionary classification},
-	Volume = {34},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbVUIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJy8E4I8NQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFlhbmcAEAAIAADBcY1JAAAAEQAIAADBOIYzAAAAAQAYAEZtVQBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6WWFuZzoyMDA2LnBkZgAADgASAAgAMgAwADAANgAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9ZYW5nLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1lhbmcvMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Lingner:2006qy,
-	Abstract = {MOTIVATION: Remote homology detection is among the most intensively researched problems in bioinformatics. Currently discriminative approaches, especially kernel-based methods, provide the most accurate results. However, kernel methods also show several drawbacks: in many cases prediction of new sequences is computationally expensive, often kernels lack an interpretable model for analysis of characteristic sequence features, and finally most approaches make use of so-called hyperparameters which complicate the application of methods across different datasets. RESULTS: We introduce a feature vector representation for protein sequences based on distances between short oligomers. The corresponding feature space arises from distance histograms for any possible pair of K-mers. Our distance-based approach shows important advantages in terms of computational speed while on common test data the prediction performance is highly competitive with state-of-the-art methods for protein remote homology detection. Furthermore the learnt model can easily be analyzed in terms of discriminative features and in contrast to other methods our representation does not require any tuning of kernel hyperparameters. AVAILABILITY: Normalized kernel matrices for the experimental setup can be downloaded at www.gobics.de/thomas. Matlab code for computing the kernel matrices is available upon request. CONTACT: thomas@gobics.de, peter@gobics.de.},
-	Address = {Abteilung Bioinformatik, Institut fur Mikrobiologie und Genetik, Georg-August-Universitat Gottingen Goldschmidtstr. 1, 37077 Gottingen, Germany. thomas@gobics.de},
-	Aid = {btl376 {$[$}pii{$]$}, 10.1093/bioinformatics/btl376 {$[$}doi{$]$}},
-	Au = {Lingner T and Meinicke P},
-	Author = {Lingner, Thomas and Meinicke, Peter},
-	Da = {20060907},
-	Date-Added = {2006-09-21 11:06:27 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dep = {20060712},
-	Edat = {2006/07/14 09:00},
-	Issn = {1460-2059 (Electronic)},
-	Jid = {9808944},
-	Journal = {Bioinformatics},
-	Jt = {Bioinformatics (Oxford, England)},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Lingner/2006.pdf},
-	Mhda = {2006/07/14 09:00},
-	Number = {18},
-	Own = {NLM},
-	Pages = {2224-31},
-	Phst = {2006/07/12 {$[$}aheadofprint{$]$}},
-	Pl = {England},
-	Pmid = {16837522},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print-Electronic},
-	Sb = {IM},
-	So = {Bioinformatics. 2006 Sep 15;22(18):2224-31. Epub 2006 Jul 12.},
-	Stat = {In-Process},
-	Title = {Remote homology detection based on oligomer distances},
-	Volume = {22},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbKQIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKnME4Il5QREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0xpbmduZXIAABAACAAAwXGNSQAAABEACAAAwTiEzgAAAAEAGABGbKQARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkxpbmduZXI6MjAwNi5wZGYADgASAAgAMgAwADAANgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9MaW5nbmVyLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTGluZ25lci8yMDA2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Choi:2006lr,
-	Abstract = {In protein structure space, protein structures cluster into four elongated regions when mapped based solely on similarity among the 3D structures. These four regions correspond to the four major classes of present-day proteins defined by the contents of secondary structure types and their topological arrangement. Evolution of and restriction to these four classes suggest that, in most cases, the evolution of genes may have been constrained or selected to those genetic changes that results in structurally stable proteins occupying one of the four "allowed" regions of the protein structure space, "structural selection," an important component of natural selection in gene evolution. Our studies on tracing the "common structural ancestor" for each protein sequence family of known structure suggest that: (i) recently emerged proteins belong mostly to three classes; (ii) the proteins that emerged earlier evolved to gain a new class; and (iii) the proteins that emerged earliest evolved to become the present-day proteins in the four major classes, with the fourth-class proteins becoming the most dominant population. Furthermore, our studies also show that not all present-day proteins evolved from one single set of proteins in the last common ancestral organism, but new common ancestral proteins were "born" at different evolutionary times, not traceable to one or two ancestral proteins: "the multiple birth model" for the evolution of protein sequence families.},
-	Address = {*Physical Biosciences Division, Lawrence Berkeley National Laboratory, and Department of Chemistry, University of California, Berkeley, CA 94720.},
-	Aid = {0606239103 {$[$}pii{$]$}, 10.1073/pnas.0606239103 {$[$}doi{$]$}},
-	Au = {Choi IG and Kim SH},
-	Author = {Choi, In-Geol and Kim, Sung-Hou},
-	Da = {20060920},
-	Date-Added = {2006-09-21 09:32:58 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dep = {20060907},
-	Edat = {2006/09/09 09:00},
-	Issn = {0027-8424 (Print)},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci U S A},
-	Jt = {Proceedings of the National Academy of Sciences of the United States of America.},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Choi/2006.pdf},
-	Mhda = {2006/09/09 09:00},
-	Number = {38},
-	Own = {NLM},
-	Pages = {14056-61},
-	Phst = {2006/09/07 {$[$}aheadofprint{$]$}},
-	Pl = {United States},
-	Pmid = {16959887},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print-Electronic},
-	Sb = {IM},
-	So = {Proc Natl Acad Sci U S A. 2006 Sep 19;103(38):14056-61. Epub 2006 Sep 7.},
-	Stat = {In-Data-Review},
-	Title = {Evolution of protein structural classes and protein sequence families},
-	Volume = {103},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbBIIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKr8E4DHVQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABENob2kAEAAIAADBcY1JAAAAEQAIAADBOG7lAAAAAQAYAEZsEgBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6Q2hvaToyMDA2LnBkZgAADgASAAgAMgAwADAANgAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9DaG9pLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0Nob2kvMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Roeben:2006lr,
-	Abstract = {Prokaryotic homologs of the eukaryotic structural protein actin, such as MreB and ParM, have been implicated in determination of bacterial cell shape, and in the segregation of genomic and plasmid DNA. In contrast to these bacterial actin homologs, little is known about the archaeal counterparts. As a first step, we expressed a predicted actin homolog of the thermophilic archaeon Thermoplasma acidophilum, Ta0583, and determined its crystal structure at 2.1A resolution. Ta0583 is expressed as a soluble protein in T.acidophilum and is an active ATPase at physiological temperature. In vitro, Ta0583 forms sheets with spacings resembling the crystal lattice, indicating an inherent propensity to form filamentous structures. The fold of Ta0583 contains the core structure of actin and clearly belongs to the actin/Hsp70 superfamily of ATPases. Ta0583 is approximately equidistant from actin and MreB on the structural level, and combines features from both eubacterial actin homologs, MreB and ParM. The structure of Ta0583 co-crystallized with ADP indicates that the nucleotide binds at the interface between the subdomains of Ta0583 in a manner similar to that of actin. However, the conformation of the nucleotide observed in complex with Ta0583 clearly differs from that in complex with actin, but closely resembles the conformation of ParM-bound nucleotide. On the basis of sequence and structural homology, we suggest that Ta0583 derives from a ParM-like actin homolog that was once encoded by a plasmid and was transferred into a common ancestor of Thermoplasma and Ferroplasma. Intriguingly, both genera are characterized by the lack of a cell wall, and therefore Ta0583 could have a function in cellular organization.},
-	Affiliation = {Department of Cellular Biochemistry, Max-Planck-Institute of Biochemistry, Am Klopferspitz 18, 82152 Martinsried, Germany.},
-	Aid = {S0022-2836(06)00150-1 {$[$}pii{$]$}},
-	Au = {Roeben A and Kofler C and Nagy I and Nickell S and Hartl FU and Bracher A},
-	Author = {Roeben, Annette and Kofler, Christine and Nagy, Istvan and Nickell, Stephan and Hartl, F Ulrich and Bracher, Andreas},
-	Da = {20060327},
-	Date-Added = {2006-08-28 08:51:30 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20060503},
-	Dep = {20060209},
-	Edat = {2006/02/28 09:00},
-	Issn = {0022-2836 (Print)},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology.},
-	Keywords = {Actins/*chemistry, Adenosine Diphosphate/chemistry, Adenosine Triphosphate/metabolism, Amino Acid Sequence, Archaeal Proteins/*chemistry/metabolism/ultrastructure, Consensus Sequence, Crystallization, Crystallography, X-Ray, Evolution, Molecular, Hydrolysis, Models, Molecular, Molecular Sequence Data, Protein Binding, Protein Interaction Mapping, Protein Structure, Secondary, *Sequence Homology, Amino Acid, Thermoplasma/*chemistry},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Roeben/2006.pdf},
-	Mhda = {2006/05/04 09:00},
-	Number = {1},
-	Own = {NLM},
-	Pages = {145-56},
-	Phst = {2005/12/16 {$[$}received{$]$}},
-	Pl = {England},
-	Pmid = {16500678},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print-Electronic},
-	Rn = {0 (Actins)},
-	Sb = {IM},
-	Si = {PDB/2FSJ},
-	So = {J Mol Biol. 2006 Apr 21;358(1):145-56. Epub 2006 Feb 9.},
-	Stat = {MEDLINE},
-	Title = {Crystal structure of an archaeal actin homolog},
-	Volume = {358},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbPQIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnK58EYXudQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABlJvZWJlbgAQAAgAAMFxjUkAAAARAAgAAMEYwVcAAAABABgARmz0AEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpSb2ViZW46MjAwNi5wZGYAAA4AEgAIADIAMAAwADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvUm9lYmVuLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL1JvZWJlbi8yMDA2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Campbell-Valois:2006lr,
-	Author = {Campbell-Valois, F. -X. and Tarassov, Kirill and Michnick, S. W.},
-	Date-Added = {2006-08-24 13:39:50 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Journal = {Journal of Molecular Biology},
-	Keywords = {Raf ras binding domain, {$[$}beta{$]$}-grasp ubiquitin-like topology, sequence entropy, core volume, secondary structure propensity},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Campbell-Valois/2006.pdf},
-	Number = {1},
-	Pages = {151--171},
-	Title = {Massive Sequence Perturbation of the Raf ras Binding Domain Reveals Relationships between Sequence Conservation, Secondary Structure Propensity, Hydrophobic Core Organization and Stability},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6WK7-4KNKH41-1/2/af4bce65251ca6eb4635078016bb4d8a},
-	Volume = {362},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGoAAAAAAGoAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbAUIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKd8ETXFpQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAD0NhbXBiZWxsLVZhbG9pcwAAEAAIAADBcY1JAAAAEQAIAADBE77KAAAAAQAYAEZsBQBGa88ARmrVAEZqGwBGZGgAQIlDAAIASmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6Q2FtcGJlbGwtVmFsb2lzOjIwMDYucGRmAA4AEgAIADIAMAAwADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAEZVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvQ2FtcGJlbGwtVmFsb2lzLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAtLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ2FtcGJlbGwtVmFsb2lzLzIwMDYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJlAmoCcwJ+AoICkAKXAqAC0ALVAtgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAAC5Q==},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6WK7-4KNKH41-1/2/af4bce65251ca6eb4635078016bb4d8a}}
-
-@article{Poole:2006uq,
-	Author = {Poole, Alan M and Ranganathan, Rama},
-	Date-Added = {2006-08-15 11:12:27 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Journal = {Current Opinion in Structural Biology},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Poole/2006.pdf},
-	Number = {4},
-	Pages = {508--513},
-	T2 = {Membranes / Engineering and design},
-	Title = {Knowledge-based potentials in protein design},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6VS6-4KDBM0R-1/2/4a194d507f5011cbbe923e291296e887},
-	Volume = {16},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbNwIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJocEHXGxQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVBvb2xlAAAQAAgAAMFxjUkAAAARAAgAAMEHvtwAAAABABgARmzcAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpQb29sZToyMDA2LnBkZgAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1Bvb2xlLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUG9vbGUvMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6VS6-4KDBM0R-1/2/4a194d507f5011cbbe923e291296e887}}
-
-@article{Holland:2006qy,
-	Author = {Holland, Timothy A. and Veretnik, Stella and Shindyalov, Ilya N. and Bourne, Philip E.},
-	Date-Added = {2006-08-15 10:47:51 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Journal = {Journal of Molecular Biology},
-	Keywords = {structural domains, benchmark dataset, performance evaluation, topological assessment, integrity of secondary structures},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Holland/2006.pdf},
-	Number = {3},
-	Pages = {562--590},
-	Title = {Partitioning Protein Structures into Domains: Why Is it so Difficult?},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6WK7-4K7NG6X-1/2/6e542ec97b6923a9164fd2ee221eb26f},
-	Volume = {361},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbGcIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnK3cEHVrhQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0hvbGxhbmQAABAACAAAwXGNSQAAABEACAAAwQe5KAAAAAEAGABGbGcARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkhvbGxhbmQ6MjAwNi5wZGYADgASAAgAMgAwADAANgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Ib2xsYW5kLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvSG9sbGFuZC8yMDA2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6WK7-4K7NG6X-1/2/6e542ec97b6923a9164fd2ee221eb26f}}
-
-@article{Armougom:2006ys,
-	Abstract = {Expresso is a multiple sequence alignment server that aligns sequences using structural information. The user only needs to provide sequences. The server runs BLAST to identify close homologues of the sequences within the PDB database. These PDB structures are used as templates to guide the alignment of the original sequences using structure-based sequence alignment methods like SAP or Fugue. The final result is a multiple sequence alignment of the original sequences based on the structural information of the templates. An advanced mode makes it possible to either upload private structures or specify which PDB templates should be used to model each sequence. Providing the suitable structural information is available, Expresso delivers sequence alignments with accuracy comparable with structure-based alignments. The server is available on http://www.tcoffee.org/.},
-	Affiliation = {Laboratoire Information Genomique et Structurale, CNRS UPR2589, Institute for Structural Biology and Microbiology (IBSM), Parc Scientifique de Luminy, 163 Avenue de Luminy, FR- 13288, Marseille cedex 09, France.},
-	Aid = {10.1093/nar/gkl092 {$[$}doi{$]$}},
-	Au = {Armougom F and Moretti S and Poirot O and Audic S and Dumas P and Schaeli B and Keduas V and Notredame C},
-	Author = {Armougom, Fabrice and Moretti, Sebastien and Poirot, Olivier and Audic, Stephane and Dumas, Pierre and Schaeli, Basile and Keduas, Vladimir and Notredame, Cedric},
-	Da = {20060717},
-	Date-Added = {2006-08-02 11:41:32 -0400},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Edat = {2006/07/18 09:00},
-	Issn = {1362-4962 (Electronic)},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research.},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Armougom/2006.pdf},
-	Mhda = {2006/07/18 09:00},
-	Number = {Web Server issue},
-	Own = {NLM},
-	Pages = {W604-8},
-	Pl = {England},
-	Pmid = {16845081},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	So = {Nucleic Acids Res. 2006 Jul 1;34(Web Server issue):W604-8.},
-	Stat = {In-Process},
-	Title = {Expresso: automatic incorporation of structural information in multiple sequence alignments using 3D-Coffee},
-	Volume = {34},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa+UIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKksDvliEAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEFybW91Z29tABAACAAAwXGNSQAAABEACAAAwO/4kQAAAAEAGABGa+UARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkFybW91Z29tOjIwMDYucGRmAAAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0FybW91Z29tLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0FybW91Z29tLzIwMDYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Bhadra:2006rt,
-	Abstract = {Owing to high evolutionary divergence, it is not always possible to identify distantly related protein domains by sequence search techniques. Intermediate sequences possess sequence features of more than one protein and facilitate detection of remotely related proteins. We have demonstrated recently the employment of Cascade PSI-BLAST where we perform PSI-BLAST for many 'generations', initiating searches from new homologues as well. Such a rigorous propagation through generations of PSI-BLAST employs effectively the role of intermediates in detecting distant similarities between proteins. This approach has been tested on a large number of folds and its performance in detecting superfamily level relationships is approximately 35% better than simple PSI-BLAST searches. We present a web server for this search method that permits users to perform Cascade PSI-BLAST searches against the Pfam, SCOP and SwissProt databases. The URL for this server is http://crick.mbu.iisc.ernet.in/~CASCADE/CascadeBlast.html.},
-	Affiliation = {Molecular Biophysics Unit, Indian Institute of Science, 560 012, Bangalore, India.},
-	Aid = {34/suppl{\_}2/W143 {$[$}pii{$]$}},
-	Au = {Bhadra R and Sandhya S and Abhinandan KR and Chakrabarti S and Sowdhamini R and Srinivasan N},
-	Author = {Bhadra, R and Sandhya, S and Abhinandan, K R and Chakrabarti, S and Sowdhamini, R and Srinivasan, N},
-	Da = {20060717},
-	Date-Added = {2006-08-02 11:40:58 -0400},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Edat = {2006/07/18 09:00},
-	Gr = {Wellcome Trust},
-	Issn = {1362-4962 (Electronic)},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research.},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Bhadra/2006.pdf},
-	Mhda = {2006/07/18 09:00},
-	Number = {Web Server issue},
-	Own = {NLM},
-	Pages = {W143-6},
-	Pl = {England},
-	Pmid = {16844978},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	So = {Nucleic Acids Res. 2006 Jul 1;34(Web Server issue):W143-6.},
-	Stat = {In-Process},
-	Title = {Cascade PSI-BLAST web server: a remote homology search tool for relating protein domains},
-	Volume = {34},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa/AIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJ28Dvn2oAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkJoYWRyYQAQAAgAAMFxjUkAAAARAAgAAMDwAdoAAAABABgARmvwAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpCaGFkcmE6MjAwNi5wZGYAAA4AEgAIADIAMAAwADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvQmhhZHJhLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0JoYWRyYS8yMDA2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Tyagi:2006vn,
-	Abstract = {Encoding protein 3D structures into 1D string using short structural prototypes or structural alphabets opens a new front for structure comparison and analysis. Using the well-documented 16 motifs of Protein Blocks (PBs) as structural alphabet, we have developed a methodology to compare protein structures that are encoded as sequences of PBs by aligning them using dynamic programming which uses a substitution matrix for PBs. This methodology is implemented in the applications available in Protein Block Expert (PBE) server. PBE addresses common issues in the field of protein structure analysis such as comparison of proteins structures and identification of protein structures in structural databanks that resemble a given structure. PBE-T provides facility to transform any PDB file into sequences of PBs. PBE-ALIGNc performs comparison of two protein structures based on the alignment of their corresponding PB sequences. PBE-ALIGNm is a facility for mining SCOP database for similar structures based on the alignment of PBs. Besides, PBE provides an interface to a database (PBE-SAdb) of preprocessed PB sequences from SCOP culled at 95% and of all-against-all pairwise PB alignments at family and superfamily levels. PBE server is freely available at http://bioinformatics.univ-reunion.fr/PBE/.},
-	Affiliation = {Laboratoire de Biochimie et Genetique Moleculaire, Bioinformatics Team, Universite de La Reunion, BP 7151, 15 avenue Rene Cassin, 97715 Saint Denis Messag Cedex 09, La Reunion, France.},
-	Aid = {34/suppl{\_}2/W119 {$[$}pii{$]$}},
-	Au = {Tyagi M and Sharma P and Swamy CS and Cadet F and Srinivasan N and de Brevern AG and Offmann B},
-	Author = {Tyagi, M and Sharma, P and Swamy, C S and Cadet, F and Srinivasan, N and de Brevern, A G and Offmann, B},
-	Da = {20060717},
-	Date-Added = {2006-08-02 11:40:20 -0400},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Edat = {2006/07/18 09:00},
-	Gr = {Wellcome Trust},
-	Issn = {1362-4962 (Electronic)},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research.},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Tyagi/2006.pdf},
-	Mhda = {2006/07/18 09:00},
-	Number = {Web Server issue},
-	Own = {NLM},
-	Pages = {W119-23},
-	Pl = {England},
-	Pmid = {16844973},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	So = {Nucleic Acids Res. 2006 Jul 1;34(Web Server issue):W119-23.},
-	Stat = {In-Process},
-	Title = {Protein Block Expert (PBE): a web-based protein structure analysis server using a structural alphabet},
-	Volume = {34},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbTYIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKscDvoYUAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVR5YWdpAAAQAAgAAMFxjUkAAAARAAgAAMDwA/UAAAABABgARm02AEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpUeWFnaToyMDA2LnBkZgAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1R5YWdpLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvVHlhZ2kvMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Tangrot:2006yq,
-	Abstract = {The FISH server is highly accurate in identifying the family membership of domains in a query protein sequence, even in the case of very low sequence identities to known homologues. A performance test using SCOP sequences and an E-value cut-off of 0.1 showed that 99.3% of the top hits are to the correct family saHMM. Matches to a query sequence provide the user not only with an annotation of the identified domains and hence a hint to their function, but also with probable 2D and 3D structures, as well as with pairwise and multiple sequence alignments to homologues with low sequence identity. In addition, the FISH server allows users to upload and search their own protein sequence collection or to quarry public protein sequence data bases with individual saHMMs. The FISH server can be accessed at http://babel.ucmp.umu.se/fish/.},
-	Affiliation = {Umea Center for Molecular Pathogenesis UCMP, Umea University Umea, Sweden.},
-	Aid = {10.1093/nar/gkl330 {$[$}doi{$]$}},
-	Annote = {The main result for me in this paper is the saHMM (structure-anchored HMMs) that the authors built from the SCOP database.  They might be interesting to study, if they're available for download.},
-	Au = {Tangrot J and Wang L and Kagstrom B and Sauer UH},
-	Author = {Tangrot, Jeanette and Wang, Lixiao and Kagstrom, Bo and Sauer, Uwe H},
-	Da = {20060717},
-	Date-Added = {2006-08-02 11:38:15 -0400},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Edat = {2006/07/18 09:00},
-	Issn = {1362-4962 (Electronic)},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research.},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Tangrot/2006.pdf},
-	Mhda = {2006/07/18 09:00},
-	Number = {Web Server issue},
-	Own = {NLM},
-	Pages = {W10-4},
-	Pl = {England},
-	Pmid = {16844969},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	So = {Nucleic Acids Res. 2006 Jul 1;34(Web Server issue):W10-4.},
-	Stat = {In-Process},
-	Title = {FISH--family identification of sequence homologues using structure anchored hidden Markov models},
-	Volume = {34},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbSkIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJYcDvpGUAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1Rhbmdyb3QAABAACAAAwXGNSQAAABEACAAAwPAG1QAAAAEAGABGbSkARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlRhbmdyb3Q6MjAwNi5wZGYADgASAAgAMgAwADAANgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9UYW5ncm90LzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvVGFuZ3JvdC8yMDA2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Reeves:2006fk,
-	Annote = {Talks about variability of secondary structures within a superfamily.},
-	Author = {Reeves, Gabrielle A. and Dallman, Timothy J. and Redfern, Oliver C. and Akpor, Adrian and Orengo, Christine A.},
-	Date-Added = {2006-08-02 11:32:43 -0400},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Journal = {Journal of Molecular Biology},
-	Keywords = {domain families, superfamilies, structural variation, secondary structure embellishments},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Reeves/2006.pdf},
-	Number = {3},
-	Pages = {725--741},
-	Title = {Structural Diversity of Domain Superfamilies in the CATH Database},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6WK7-4K3CX9S-1/2/c2fa28313a9d4d00acbe6864f0261a0a},
-	Volume = {360},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbOsIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLA8Dzcb5QREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABlJlZXZlcwAQAAgAAMFxjUkAAAARAAgAAMDz1C4AAAABABgARmzrAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpSZWV2ZXM6MjAwNi5wZGYAAA4AEgAIADIAMAAwADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvUmVldmVzLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL1JlZXZlcy8yMDA2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6WK7-4K3CX9S-1/2/c2fa28313a9d4d00acbe6864f0261a0a}}
-
-@article{Prlic:2000fk,
-	Abstract = {Sequence alignment is a standard method to infer evolutionary, structural, and functional relationships among sequences. The quality of alignments depends on the substitution matrix used. Here we derive matrices based on superimpositions from protein pairs of similar structure, but of low or no sequence similarity. In a performance test the matrices are compared with 12 other previously published matrices. It is found that the structure-derived matrices are applicable for comparisons of distantly related sequences. We investigate the influence of evolutionary relationships of protein pairs on the alignment accuracy.},
-	Affiliation = {Center of Applied Molecular Engineering, Institute for Chemistry and Biochemistry, University of Salzburg, Jakob-Haringerstrasse 3, A-5020 Salzburg, Austria.},
-	Annote = {SDM/HSDM},
-	Au = {Prlic A and Domingues FS and Sippl MJ},
-	Author = {Prli{\'c}, A and Domingues, F S and Sippl, M J},
-	Da = {20001010},
-	Date-Added = {2006-07-18 12:28:11 -0700},
-	Date-Modified = {2008-05-28 22:27:37 -0700},
-	Dcom = {20001207},
-	Edat = {2000/08/31 11:00},
-	Group = {Alphabets; Reviewed; ROC; Forward; Printed; Backward},
-	Jid = {8801484},
-	Journal = {Protein Eng},
-	Jt = {Protein engineering.},
-	Keywords = {Comparative Study, Models, Molecular, Protein Conformation, Research Support, Non-U.S. Gov't, Sequence Alignment/*methods},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Prli%C4%87/2000.pdf},
-	Lr = {20041117},
-	Mhda = {2001/02/28 10:01},
-	Number = {8},
-	Own = {NLM},
-	Pages = {545--550},
-	Pl = {ENGLAND},
-	Pmid = {10964983},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Read = {Yes},
-	Sb = {IM},
-	So = {Protein Eng. 2000 Aug;13(8):545-50.},
-	Stat = {MEDLINE},
-	Title = {Structure-derived substitution matrices for alignment of distantly related sequences},
-	Volume = {13},
-	Year = {2000},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGYAAAAAAGYAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbOEIMjAwMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJscFKhTsAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAC1BybGkjNDY2Q0UxAAAQAAgAAMFxjUkAAAARAAgAAMFK56sAAAABABgARmzhAEZrzwBGatUARmobAEZkaABAiUMAAgBGaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpQcmxpIzQ2NkNFMToyMDAwLnBkZgAOABIACAAyADAAMAAwAC4AcABkAGYADwAIAAMAaABzAHIAEgA+VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1BybGljzIEvMjAwMC5wZGYAEwABLwAAFQACABP//wAA0h4fICFYJGNsYXNzZXNaJGNsYXNzbmFtZaMhIiNdTlNNdXRhYmxlRGF0YVZOU0RhdGFYTlNPYmplY3RvECQALgAuAC8ALgAuAC8ALgAuAC8ALgAuAC8AQQByAHQAaQBjAGwAZQBzAC8AUAByAGwAaQBjAwEALwAyADAAMAAwAC4AcABkAGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AlUCWgJjAm4CcgKAAocCkALbAuAC4wAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALw}}
-
-@article{Huang:2006lr,
-	Abstract = {MOTIVATION: In recent years, advances have been made in the ability of computational methods to discriminate between homologous and non-homologous proteins in the 'twilight zone' of sequence similarity, where the percent sequence identity is a poor indicator of homology. To make these predictions more valuable to the protein modeler, they must be accompanied by accurate alignments. Pairwise sequence alignments are inferences of orthologous relationships between sequence positions. Evolutionary distance is traditionally modeled using global amino acid substitution matrices. But real differences in the likelihood of substitutions may exist for different structural contexts within proteins, since structural context contributes to the selective pressure. RESULTS: HMMSUM (HMMSTR-based substitution matrices) is a new model for structural context-based amino acid substitution probabilities consisting of a set of 281 matrices, each for a different sequence-structure context. HMMSUM does not require the structure of the protein to be known. Instead, predictions of local structure are made using HMMSTR, a hidden Markov model for local structure. Alignments using the HMMSUM matrices compare favorably to alignments carried out using the BLOSUM matrices or structure-based substitution matrices SDM and HSDM when validated against remote homolog alignments from BAliBASE. HMMSUM has been implemented using local Dynamic Programming and with the Bayesian Adaptive alignment method.},
-	Affiliation = {Center for Bioinformatics, Department of Biology, Rensselaer Polytechnic Institute, Troy, NY 12180, USA.},
-	Aid = {bti828 {$[$}pii{$]$}},
-	Au = {Huang YM and Bystroff C},
-	Author = {Huang, Yao-Ming and Bystroff, Christopher},
-	Da = {20060208},
-	Date-Added = {2006-07-18 12:27:31 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20060331},
-	Dep = {20051213},
-	Edat = {2005/12/15 09:00},
-	Issn = {1367-4803 (Print)},
-	Jid = {9808944},
-	Journal = {Bioinformatics},
-	Jt = {Bioinformatics (Oxford, England)},
-	Keywords = {*Algorithms, Amino Acid Sequence, Amino Acid Substitution, Comparative Study, Conserved Sequence, *Evolution, Molecular, Molecular Sequence Data, Proteins/analysis/*chemistry/classification/*genetics, Reproducibility of Results, Research Support, U.S. Gov't, Non-P.H.S., Sensitivity and Specificity, Sequence Alignment/*methods, Sequence Analysis, Protein/*methods, Sequence Homology, Amino Acid},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Huang/2006.pdf},
-	Mhda = {2006/04/01 09:00},
-	Number = {4},
-	Own = {NLM},
-	Pages = {413-22},
-	Phst = {2005/12/13 {$[$}aheadofprint{$]$}},
-	Pl = {England},
-	Pmid = {16352653},
-	Pst = {ppublish},
-	Pt = {Evaluation Studies},
-	Pubm = {Print-Electronic},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Bioinformatics. 2006 Feb 15;22(4):413-22. Epub 2005 Dec 13.},
-	Stat = {MEDLINE},
-	Title = {Improved pairwise alignments of proteins in the Twilight Zone using local structure predictions},
-	Volume = {22},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbGsIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnK88DhSAwAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUh1YW5nAAAQAAgAAMFxjUkAAAARAAgAAMDhqnwAAAABABgARmxrAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpIdWFuZzoyMDA2LnBkZgAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0h1YW5nLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvSHVhbmcvMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Forrest:2006lr,
-	Abstract = {In this study, we investigate the extent to which techniques for homology modeling that were developed for water-soluble proteins are appropriate for membrane proteins as well. To this end we present an assessment of current strategies for homology modeling of membrane proteins and introduce a benchmark data set of homologous membrane protein structures, called HOMEP. First, we use HOMEP to reveal the relationship between sequence identity and structural similarity in membrane proteins. This analysis indicates that homology modeling is at least as applicable to membrane proteins as it is to water-soluble proteins and that acceptable models (with Calpha-RMSD values to the native of 2 A or less in the transmembrane regions) may be obtained for template sequence identities of 30% or higher if an accurate alignment of the sequences is used. Second, we show that secondary-structure prediction algorithms that were developed for water-soluble proteins perform approximately as well for membrane proteins. Third, we provide a comparison of a set of commonly used sequence alignment algorithms as applied to membrane proteins. We find that high-accuracy alignments of membrane protein sequences can be obtained using state-of-the-art profile-to-profile methods that were developed for water-soluble proteins. Improvements are observed when weights derived from the secondary structure of the query and the template are used in the scoring of the alignment, a result which relies on the accuracy of the secondary-structure prediction of the query sequence. The most accurate alignments were obtained using template profiles constructed with the aid of structural alignments. In contrast, a simple sequence-to-sequence alignment algorithm, using a membrane protein-specific substitution matrix, shows no improvement in alignment accuracy. We suggest that profile-to-profile alignment methods should be adopted to maximize the accuracy of homology models of membrane proteins.},
-	Affiliation = {Howard Hughes Medical Institute, Department of Biochemistry and Molecular Biophysics, Center for Computational Biology and Bioinformatics, Columbia University, New York, New York 10032.},
-	Aid = {10.1529/biophysj.106.082313 {$[$}doi{$]$}},
-	Annote = {This paper is mostly useful because it utilizes a bunch of different sequence alignment programs which may come in handy.},
-	Au = {Forrest LR and Tang CL and Honig B},
-	Author = {Forrest, Lucy R and Tang, Christopher L and Honig, Barry},
-	Da = {20060629},
-	Date-Added = {2006-07-05 13:01:21 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dep = {20060428},
-	Edat = {2006/05/02 09:00},
-	Issn = {0006-3495 (Print)},
-	Jid = {0370626},
-	Journal = {Biophys J},
-	Jt = {Biophysical journal.},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Forrest/2006.pdf},
-	Mhda = {2006/05/02 09:00},
-	Number = {2},
-	Own = {NLM},
-	Pages = {508-17},
-	Phst = {2006/04/28 {$[$}aheadofprint{$]$}},
-	Pl = {United States},
-	Pmid = {16648166},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print-Electronic},
-	Sb = {IM},
-	So = {Biophys J. 2006 Jul;91(2):508-17. Epub 2006 Apr 28.},
-	Stat = {In-Data-Review},
-	Title = {On the accuracy of homology modeling and sequence alignment methods applied to membrane proteins},
-	Volume = {91},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbD0IMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLLsDP7NAAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0ZvcnJlc3QAABAACAAAwXGNSQAAABEACAAAwNBPQAAAAAEAGABGbD0ARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkZvcnJlc3Q6MjAwNi5wZGYADgASAAgAMgAwADAANgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Gb3JyZXN0LzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRm9ycmVzdC8yMDA2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Sander:1991lr,
-	Abstract = {The database of known protein three-dimensional structures can be significantly increased by the use of sequence homology, based on the following observations. (1) The database of known sequences, currently at more than 12,000 proteins, is two orders of magnitude larger than the database of known structures. (2) The currently most powerful method of predicting protein structures is model building by homology. (3) Structural homology can be inferred from the level of sequence similarity. (4) The threshold of sequence similarity sufficient for structural homology depends strongly on the length of the alignment. Here, we first quantify the relation between sequence similarity, structure similarity, and alignment length by an exhaustive survey of alignments between proteins of known structure and report a homology threshold curve as a function of alignment length. We then produce a database of homology-derived secondary structure of proteins (HSSP) by aligning to each protein of known structure all sequences deemed homologous on the basis of the threshold curve. For each known protein structure, the derived database contains the aligned sequences, secondary structure, sequence variability, and sequence profile. Tertiary structures of the aligned sequences are implied, but not modeled explicitly. The database effectively increases the number of known protein structures by a factor of five to more than 1800. The results may be useful in assessing the structural significance of matches in sequence database searches, in deriving preferences and patterns for structure prediction, in elucidating the structural role of conserved residues, and in modeling three-dimensional detail by homology.},
-	Affiliation = {European Molecular Biology Laboratory, Heidelberg, Federal Republic of Germany.},
-	Aid = {10.1002/prot.340090107 {$[$}doi{$]$}},
-	Annote = {This paper describes the muliple alignment strategy used in the 1992 PNAS paper by Bork, Sander and Valencia that identified MreB/FtsA and others as actin homogs.},
-	Au = {Sander C and Schneider R},
-	Author = {Sander, C and Schneider, R},
-	Da = {19910522},
-	Date-Added = {2006-06-20 14:30:21 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19910522},
-	Edat = {1991/01/01},
-	Issn = {0887-3585 (Print)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins.},
-	Keywords = {Amino Acid Sequence, Animals, Calibration, *Databases, Factual, Molecular Sequence Data, Protein Conformation, Proteins/*chemistry/genetics, Reproducibility of Results, *Sequence Homology, Nucleic Acid, Stereoisomerism, Thermodynamics, Variation (Genetics)},
-	Language = {eng},
-	Lr = {20031114},
-	Mhda = {1991/01/01 00:01},
-	Number = {1},
-	Own = {NLM},
-	Pages = {56-68},
-	Pl = {UNITED STATES},
-	Pmid = {2017436},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Proteins. 1991;9(1):56-68.},
-	Stat = {MEDLINE},
-	Title = {Database of homology-derived protein structures and the structural meaning of sequence alignment},
-	Volume = {9},
-	Year = {1991}}
-
-@article{Bork:1992fk,
-	Abstract = {The functionally diverse actin, hexokinase, and hsp70 protein families have in common an ATPase domain of known three-dimensional structure. Optimal superposition of the three structures and alignment of many sequences in each of the three families has revealed a set of common conserved residues, distributed in five sequence motifs, which are involved in ATP binding and in a putative interdomain hinge. From the multiple sequence alignment in these motifs a pattern of amino acid properties required at each position is defined. The discriminatory power of the pattern is in part due to the use of several known three-dimensional structures and many sequences and in part to the "property" method of generalizing from observed amino acid frequencies to amino acid fitness at each sequence position. A sequence data base search with the pattern significantly matches sugar kinases, such as fuco-, glucono-, xylulo-, ribulo-, and glycerokinase, as well as the prokaryotic cell cycle proteins MreB, FtsA, and StbA. These are predicted to have subdomains with the same tertiary structure as the ATPase subdomains Ia and IIa of hexokinase, actin, and Hsc70, a very similar ATP binding pocket, and the capacity for interdomain hinge motion accompanying functional state changes. A common evolutionary origin for all of the proteins in this class is proposed.},
-	Affiliation = {European Molecular Biology Laboratory, Heidelberg, Federal Republic of Germany.},
-	Au = {Bork P and Sander C and Valencia A},
-	Author = {Bork, P and Sander, C and Valencia, A},
-	Da = {19920915},
-	Date-Added = {2006-06-19 15:21:36 -0700},
-	Date-Modified = {2008-05-30 02:48:15 -0700},
-	Dcom = {19920915},
-	Edat = {1992/08/15},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci USA},
-	Jt = {Proceedings of the National Academy of Sciences of the United States of America.},
-	Keywords = {Actins/chemistry/*genetics, Adenosinetriphosphatase/chemistry/*genetics, Amino Acid Sequence, Animals, Cell Cycle, Comparative Study, Escherichia coli/genetics, *Evolution, Heat-Shock Proteins/chemistry/*genetics, Hexokinase/chemistry/genetics, Humans, Models, Molecular, Molecular Sequence Data, Phosphotransferases/chemistry/*genetics, *Phosphotransferases (Alcohol Group Acceptor), Protein Conformation, Research Support, Non-U.S. Gov't, Sequence Homology, Nucleic Acid},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Bork/1992.pdf},
-	Lr = {20041117},
-	Mhda = {1992/08/15 00:01},
-	Number = {16},
-	Own = {NLM},
-	Pages = {7290--7294},
-	Pl = {UNITED STATES},
-	Pmid = {1323828},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Actins)},
-	Sb = {IM},
-	So = {Proc Natl Acad Sci U S A. 1992 Aug 15;89(16):7290-4.},
-	Stat = {MEDLINE},
-	Title = {An {ATPase} domain common to prokaryotic cell cycle proteins, sugar kinases, actin, and hsp70 heat shock proteins},
-	Volume = {89},
-	Year = {1992},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa/gIMTk5Mi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKR8C8cSRQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABEJvcmsAEAAIAADBcY1JAAAAEQAIAADAvNOUAAAAAQAYAEZr+ABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6Qm9yazoxOTkyLnBkZgAADgASAAgAMQA5ADkAMgAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Cb3JrLzE5OTIucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0JvcmsvMTk5Mi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Leipe:2002lr,
-	Abstract = {Sequences and available structures were compared for all the widely distributed representatives of the P-loop GTPases and GTPase-related proteins with the aim of constructing an evolutionary classification for this superclass of proteins and reconstructing the principal events in their evolution. The GTPase superclass can be divided into two large classes, each of which has a unique set of sequence and structural signatures (synapomorphies). The first class, designated TRAFAC (after translation factors) includes enzymes involved in translation (initiation, elongation, and release factors), signal transduction (in particular, the extended Ras-like family), cell motility, and intracellular transport. The second class, designated SIMIBI (after signal recognition particle, MinD, and BioD), consists of signal recognition particle (SRP) GTPases, the assemblage of MinD-like ATPases, which are involved in protein localization, chromosome partitioning, and membrane transport, and a group of metabolic enzymes with kinase or related phosphate transferase activity. These two classes together contain over 20 distinct families that are further subdivided into 57 subfamilies (ancient lineages) on the basis of conserved sequence motifs, shared structural features, and domain architectures. Ten subfamilies show a universal phyletic distribution compatible with presence in the last universal common ancestor of the extant life forms (LUCA). These include four translation factors, two OBG-like GTPases, the YawG/YlqF-like GTPases (these two subfamilies also consist of predicted translation factors), the two signal-recognition-associated GTPases, and the MRP subfamily of MinD-like ATPases. The distribution of nucleotide specificity among the proteins of the GTPase superclass indicates that the common ancestor of the entire superclass was a GTPase and that a secondary switch to ATPase activity has occurred on several independent occasions during evolution. The functions of most GTPases that are traceable to LUCA are associated with translation. However, in contrast to other superclasses of P-loop NTPases (RecA-F1/F0, AAA+, helicases, ABC), GTPases do not participate in NTP-dependent nucleic acid unwinding and reorganizing activities. Hence, we hypothesize that the ancestral GTPase was an enzyme with a generic regulatory role in translation, with subsequent diversification resulting in acquisition of diverse functions in transport, protein trafficking, and signaling. In addition to the classification of previously known families of GTPases and related ATPases, we introduce several previously undetected families and describe new functional predictions.},
-	Affiliation = {National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA.},
-	Aid = {S0022283601953781 {$[$}pii{$]$}},
-	Au = {Leipe DD and Wolf YI and Koonin EV and Aravind L},
-	Author = {Leipe, Detlef D and Wolf, Yuri I and Koonin, Eugene V and Aravind, L},
-	Da = {20020327},
-	Date-Added = {2006-06-19 15:20:54 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20020416},
-	Edat = {2002/03/28 10:00},
-	Issn = {0022-2836 (Print)},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology.},
-	Keywords = {Adenosinetriphosphatase/*chemistry/*classification, Amino Acid Sequence, Animals, Computational Biology, Conserved Sequence, *Evolution, Molecular, GTP Phosphohydrolase-Linked Elongation Factors/chemistry/classification, GTP Phosphohydrolases/*chemistry/*classification, Heterotrimeric GTP-Binding Proteins/chemistry/classification, Humans, Kinesin/chemistry/classification, Models, Molecular, Molecular Sequence Data, Monomeric GTP-Binding Proteins/chemistry/classification, Multigene Family/genetics, Myosins/chemistry/classification, Phylogeny, Protein Conformation, Research Support, U.S. Gov't, P.H.S., Sequence Alignment, Signal Recognition Particle/chemistry},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Leipe/2002.pdf},
-	Lr = {20051117},
-	Mhda = {2002/04/17 10:01},
-	Number = {1},
-	Own = {NLM},
-	Pages = {41-72},
-	Pl = {England},
-	Pmid = {11916378},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Signal Recognition Particle)},
-	Sb = {IM},
-	So = {J Mol Biol. 2002 Mar 15;317(1):41-72.},
-	Stat = {MEDLINE},
-	Title = {Classification and evolution of P-loop GTPases and related ATPases},
-	Volume = {317},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbJoIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJX8C8cPRQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUxlaXBlAAAQAAgAAMFxjUkAAAARAAgAAMC802QAAAABABgARmyaAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpMZWlwZToyMDAyLnBkZgAOABIACAAyADAAMAAyAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0xlaXBlLzIwMDIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTGVpcGUvMjAwMi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Cheng:2006yq,
-	Abstract = {MOTIVATION: Recognizing proteins that have similar tertiary structure is the key step of template-based protein structure prediction methods. Traditionally, a variety of alignment methods are used to identify similar folds, based on sequence similarity and sequence-structure compatibility. Although these methods are complementary, their integration has not been thoroughly exploited. Statistical machine learning methods provide tools for integrating multiple features, but so far these methods have been used primarily for protein and fold classification, rather than addressing the retrieval problem of fold recognition-finding a proper template for a given query protein. RESULTS: Here we present a two-stage machine learning, information retrieval, approach to fold recognition. First, we use alignment methods to derive pairwise similarity features for query-template protein pairs. We also use global profile-profile alignments in combination with predicted secondary structure, relative solvent accessibility, contact map, and beta-strand pairing to extract pairwise structural compatibility features. Second, we apply support vector machines to these features to predict the structural relevance (i.e. in the same fold or not) of the query-template pairs. For each query, the continuous relevance scores are used to rank the templates. The FOLDpro approach is modular, scalable, and effective. Compared to 11 other fold recognition methods, FOLDpro yields the best results in almost all standard categories on a comprehensive benchmark dataset. Using predictions of the top-ranked template, the sensitivity is about 85%, 56%, and 27% at the family, superfamily, and fold levels respectively. Using the 5 top-ranked templates, the sensitivity increases to 90%, 70%, and 48%. AVAILABILITY: The FOLDpro server is available with the SCRATCH suite through http://www.igb.uci.edu/servers/psss.html.},
-	Affiliation = {Institute for Genomics and Bioinformatics, School of Information and Computer Sciences, University of California, Irvine.},
-	Aid = {10.1093/bioinformatics/btl102 {$[$}doi{$]$}},
-	Annote = {This has a really nice review of sequence comparison techniques and other methods of comparing sequences of proteins.},
-	Author = {Cheng, J and Baldi, P},
-	Da = {20060320},
-	Date-Added = {2006-06-12 14:28:54 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dep = {20060317},
-	Edat = {2006/03/21 09:00},
-	Issn = {1367-4803 (Print)},
-	Jid = {9808944},
-	Journal = {Bioinformatics},
-	Language = {ENG},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Cheng/2006.pdf},
-	Mhda = {2006/03/21 09:00},
-	Own = {NLM},
-	Pmid = {16547073},
-	Pst = {aheadofprint},
-	Pt = {JOURNAL ARTICLE},
-	Pubm = {Print-Electronic},
-	So = {Bioinformatics. 2006 Mar 17;.},
-	Stat = {Publisher},
-	Title = {A machine learning information retrieval approach to protein fold recognition.},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbA4IMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKC8CzKn0AAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUNoZW5nAAAQAAgAAMFxjUkAAAARAAgAAMCzjO0AAAABABgARmwOAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpDaGVuZzoyMDA2LnBkZgAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0NoZW5nLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ2hlbmcvMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Edgar:2006uq,
-	Author = {Edgar, Robert C and Batzoglou, Serafim},
-	Date-Added = {2006-06-12 14:23:49 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Issn = {3},
-	Journal = {Current Opinion in Structural Biology},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Edgar/2006.pdf},
-	Pages = {368--373},
-	T2 = {Nucleic acids/Sequences and topology - Anna Marie Pyle and Jonathan Widom/Nick V Grishin and Sarah A Teichmann},
-	Title = {Multiple sequence alignment},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6VS6-4JWFGTG-2/2/02ab69ca5aac71752b4e2abebd686333},
-	Volume = {16},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbC8IMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKocCzKV4AAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUVkZ2FyAAAQAAgAAMFxjUkAAAARAAgAAMCzi84AAAABABgARmwvAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpFZGdhcjoyMDA2LnBkZgAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0VkZ2FyLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRWRnYXIvMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6VS6-4JWFGTG-2/2/02ab69ca5aac71752b4e2abebd686333}}
-
-@article{Dunbrack:2006qy,
-	Author = {Dunbrack, Jr, Roland L},
-	Date-Added = {2006-06-12 14:22:08 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Issn = {3},
-	Journal = {Current Opinion in Structural Biology},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Roland%20L%20Dunbrack/2006.pdf},
-	Pages = {374--384},
-	T2 = {Nucleic acids/Sequences and topology - Anna Marie Pyle and Jonathan Widom/Nick V Grishin and Sarah A Teichmann},
-	Title = {Sequence comparison and protein structure prediction},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6VS6-4K0FFT1-2/2/47de9c65b99da8510fbe08ae93211295},
-	Volume = {16},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGuAAAAAAGuAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbPUIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKWcCzKQEAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAEVJvbGFuZCBMIER1bmJyYWNrAAAQAAgAAMFxjUkAAAARAAgAAMCzi3EAAAABABgARmz1AEZrzwBGatUARmobAEZkaABAiUMAAgBMaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpSb2xhbmQgTCBEdW5icmFjazoyMDA2LnBkZgAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgBIVXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1JvbGFuZCBMIER1bmJyYWNrLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAvLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUm9sYW5kIEwgRHVuYnJhY2svMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AmsCcAJ5AoQCiAKWAp0CpgLYAt0C4AAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALt},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6VS6-4K0FFT1-2/2/47de9c65b99da8510fbe08ae93211295}}
-
-@article{Kolodny:2006fk,
-	Author = {Kolodny, Rachel and Petrey, Donald and Honig, Barry},
-	Date-Added = {2006-06-12 14:19:25 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Issn = {3},
-	Journal = {Current Opinion in Structural Biology},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Kolodny/2006.pdf},
-	Pages = {393--398},
-	T2 = {Nucleic acids/Sequences and topology - Anna Marie Pyle and Jonathan Widom/Nick V Grishin and Sarah A Teichmann},
-	Title = {Protein structure comparison: implications for the nature of 'fold space', and structure and function prediction},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6VS6-4JW7WPY-1/2/24d6d0592d2d6d30df67c39b68cbee21},
-	Volume = {16},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbIYIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKxcCzKFUAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0tvbG9kbnkAABAACAAAwXGNSQAAABEACAAAwLOKxQAAAAEAGABGbIYARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOktvbG9kbnk6MjAwNi5wZGYADgASAAgAMgAwADAANgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9Lb2xvZG55LzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvS29sb2RueS8yMDA2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6VS6-4JW7WPY-1/2/24d6d0592d2d6d30df67c39b68cbee21}}
-
-@article{Andreeva:2006lr,
-	Author = {Andreeva, Antonina and Murzin, Alexey G},
-	Date-Added = {2006-06-12 14:13:05 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Issn = {3},
-	Journal = {Current Opinion in Structural Biology},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Andreeva/2006.pdf},
-	Pages = {399--408},
-	T2 = {Nucleic acids/Sequences and topology - Anna Marie Pyle and Jonathan Widom/Nick V Grishin and Sarah A Teichmann},
-	Title = {Evolution of protein fold in the presence of functional constraints},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6VS6-4JVT1KX-3/2/adb0e93134175f0beb735242246a7858},
-	Volume = {16},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa+EIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJvcCzJrhQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEFuZHJlZXZhABAACAAAwXGNSQAAABEACAAAwLOJKAAAAAEAGABGa+EARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkFuZHJlZXZhOjIwMDYucGRmAAAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0FuZHJlZXZhLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0FuZHJlZXZhLzIwMDYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6VS6-4JVT1KX-3/2/adb0e93134175f0beb735242246a7858}}
-
-@article{Heger:2003fk,
-	Abstract = {Domains are considered as the basic units of protein folding, evolution, and function. Decomposing each protein into modular domains is thus a basic prerequisite for accurate functional classification of biological molecules. Here, we present ADDA, an automatic algorithm for domain decomposition and clustering of all protein domain families. We use alignments derived from an all-on-all sequence comparison to define domains within protein sequences based on a global maximum likelihood model. In all, 90% of domain boundaries are predicted within 10% of domain size when compared with the manual domain definitions given in the SCOP database. A representative database of 249,264 protein sequences were decomposed into 450,462 domains. These domains were clustered on the basis of sequence similarities into 33,879 domain families containing at least two members with less than 40% sequence identity. Validation against family definitions in the manually curated databases SCOP and PFAM indicates almost perfect unification of various large domain families while contamination by unrelated sequences remains at a low level. The global survey of protein-domain space by ADDA confirms that most large and universal domain families are already described in PFAM and/or SMART. However, a survey of the complete set of mobile modules leads to the identification of 1479 new interesting domain families which shuffle around in multi-domain proteins. The data are publicly available at ftp://ftp.ebi.ac.uk/pub/contrib/heger/adda.},
-	Affiliation = {EMBL-EBI, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK. andreas.heger@helsinki.fi},
-	Aid = {S0022283603002699 {$[$}pii{$]$}},
-	Au = {Heger A and Holm L},
-	Author = {Heger, Andreas and Holm, Liisa},
-	Da = {20030422},
-	Date-Added = {2006-06-10 15:47:23 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20030509},
-	Edat = {2003/04/23 05:00},
-	Issn = {0022-2836 (Print)},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology.},
-	Keywords = {Algorithms, Amino Acid Sequence, Animals, Cluster Analysis, Databases, Protein, Homeodomain Proteins/chemistry, Humans, Models, Molecular, Molecular Sequence Data, *Protein Structure, Tertiary, Proteins/chemistry/classification, *Sequence Alignment, Sequence Homology, Amino Acid},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Heger/2003.pdf},
-	Lr = {20041117},
-	Mhda = {2003/05/13 05:00},
-	Number = {3},
-	Own = {NLM},
-	Pages = {749-67},
-	Pl = {England},
-	Pmid = {12706730},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Homeodomain Proteins)},
-	Sb = {IM},
-	So = {J Mol Biol. 2003 May 2;328(3):749-67.},
-	Stat = {MEDLINE},
-	Title = {Exhaustive enumeration of protein domain families},
-	Volume = {328},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbF8IMjAwMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJd72JGCUAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUhlZ2VyAAAQAAgAAMFxjUkAAAARAAgAAL2JepUAAAABABgARmxfAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpIZWdlcjoyMDAzLnBkZgAOABIACAAyADAAMAAzAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0hlZ2VyLzIwMDMucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvSGVnZXIvMjAwMy5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Theobald:2005uq,
-	Abstract = {Many dissimilar protein sequences fold into similar structures. A central and persistent challenge facing protein structural analysis is the discrimination between homology and convergence for structurally similar domains that lack significant sequence similarity. Classic examples are the OB-fold and SH3 domains, both small, modular beta-barrel protein superfolds. The similarities among these domains have variously been attributed to common descent or to convergent evolution. Using a sequence profile-based phylogenetic technique, we analyzed all structurally characterized OB-fold, SH3, and PDZ domains with less than 40% mutual sequence identity. An all-against-all, profile-versus-profile analysis of these domains revealed many previously undetectable significant interrelationships. The matrices of scores were used to infer phylogenies based on our derivation of the relationships between sequence similarity E-values and evolutionary distances. The resulting clades of domains correlate remarkably well with biological function, as opposed to structural similarity, indicating that the functionally distinct sub-families within these superfolds are homologous. This method extends phylogenetics into the challenging "twilight zone" of sequence similarity, providing the first objective resolution of deep evolutionary relationships among distant protein families.},
-	Affiliation = {Department of Chemistry and Biochemistry, UCB 215, University of Colorado, Boulder, CO 80309-0215, USA.},
-	Aid = {S0022-2836(05)01051-X {$[$}pii{$]$}},
-	Au = {Theobald DL and Wuttke DS},
-	Author = {Theobald, Douglas L and Wuttke, Deborah S},
-	Da = {20051121},
-	Date-Added = {2006-06-10 15:46:43 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20051228},
-	Dep = {20050920},
-	Edat = {2005/11/04 09:00},
-	Gr = {GM59414/GM/NIGMS},
-	Issn = {0022-2836 (Print)},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Jt = {Journal of molecular biology.},
-	Keywords = {*Evolution, Molecular, Neural Networks (Computer), Nucleic Acids/metabolism, *Phylogeny, *Protein Folding, Protein Structure, Tertiary, Proteins/*chemistry/*metabolism, Research Support, N.I.H., Extramural, Research Support, Non-U.S. Gov't},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Theobald/2005.pdf},
-	Mhda = {2005/12/29 09:00},
-	Number = {3},
-	Own = {NLM},
-	Pages = {722-37},
-	Phst = {2005/09/20 {$[$}aheadofprint{$]$}},
-	Pl = {England},
-	Pmid = {16266719},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print-Electronic},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {J Mol Biol. 2005 Dec 2;354(3):722-37. Epub 2005 Sep 20.},
-	Stat = {MEDLINE},
-	Title = {Divergent evolution within protein superfolds inferred from profile-based phylogenetics},
-	Volume = {354},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbS0IMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKdcB42rYAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACFRoZW9iYWxkABAACAAAwXGNSQAAABEACAAAwHk9JgAAAAEAGABGbS0ARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlRoZW9iYWxkOjIwMDUucGRmAAAOABIACAAyADAAMAA1AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1RoZW9iYWxkLzIwMDUucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL1RoZW9iYWxkLzIwMDUucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Miller:2002qy,
-	Abstract = {Despite the variety of protein sizes, shapes, and backbone configurations found in nature, the design of novel protein folds remains an open problem. Within simple lattice models it has been shown that all structures are not equally suitable for design. Rather, certain structures are distinguished by unusually high designability: the number of amino acid sequences for which they represent the unique lowest energy state; sequences associated with such structures possess both robustness to mutation and thermodynamic stability. Here we report that highly designable backbone conformations also emerge in a realistic off-lattice model. The highly designable conformations of a chain of 23 amino acids are identified and found to be remarkably insensitive to model parameters. Although some of these conformations correspond closely to known natural protein folds, such as the zinc finger and the helix-turn-helix motifs, others do not resemble known folds and may be candidates for novel fold design.},
-	Affiliation = {NEC Research Institute, Princeton, New Jersey, USA.},
-	Aid = {10.1002/prot.10107 {$[$}doi{$]$}},
-	Au = {Miller J and Zeng C and Wingreen NS and Tang C},
-	Author = {Miller, Jonathan and Zeng, Chen and Wingreen, Ned S and Tang, Chao},
-	Ci = {Copyright 2002 Wiley-Liss, Inc.},
-	Da = {20020523},
-	Date-Added = {2006-06-10 15:46:09 -0700},
-	Date-Modified = {2008-03-28 11:17:08 -0700},
-	Dcom = {20020625},
-	Edat = {2002/05/10 10:00},
-	Group = {Designability},
-	Issn = {1097-0134 (Electronic)},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins.},
-	Keywords = {Amino Acid Motifs, Amino Acids/chemistry, Animals, *Models, Molecular, *Protein Conformation, Protein Folding, Proteins/*chemistry},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Miller/2002.pdf},
-	Lr = {20031114},
-	Mhda = {2002/06/26 10:01},
-	Number = {4},
-	Own = {NLM},
-	Pages = {506-12},
-	Pl = {United States},
-	Pmid = {12001229},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Proteins. 2002 Jun 1;47(4):506-12.},
-	Stat = {MEDLINE},
-	Title = {Emergence of highly designable protein-backbone conformations in an off-lattice model},
-	Volume = {47},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbLsIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKw8CvPAQAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABk1pbGxlcgAQAAgAAMFxjUkAAAARAAgAAMCvnnQAAAABABgARmy7AEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpNaWxsZXI6MjAwMi5wZGYAAA4AEgAIADIAMAAwADIALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTWlsbGVyLzIwMDIucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL01pbGxlci8yMDAyLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Huttelmaier:2005fj,
-	Abstract = {Localization of beta-actin messenger RNA to sites of active actin polymerization modulates cell migration during embryogenesis, differentiation and possibly carcinogenesis. This localization requires the oncofetal protein ZBP1 (Zipcode binding protein 1), which binds to a conserved 54-nucleotide element in the 3'-untranslated region of the beta-actin mRNA known as the 'zipcode'. ZBP1 promotes translocation of the beta-actin transcript to actin-rich protrusions in primary fibroblasts and neurons. It is not known how the ZBP1-RNA complex achieves asymmetric protein sorting by localizing beta-actin mRNA. Here we show that chicken ZBP1 modulates the translation of beta-actin mRNA. ZBP1 associates with the beta-actin transcript in the nucleus and prevents premature translation in the cytoplasm by blocking translation initiation. Translation only occurs when the ZBP1-RNA complex reaches its destination at the periphery of the cell. At the endpoint of mRNA transport, the protein kinase Src promotes translation by phosphorylating a key tyrosine residue in ZBP1 that is required for binding to RNA. These sequential events provide both temporal and spatial control over beta-actin mRNA translation, which is important for cell migration and neurite outgrowth.},
-	Affiliation = {Department of Anatomy and Structural Biology, Albert Einstein College of Medicine, 1300 Morris Park Avenue, Bronx, New York 10461, USA. stefan.huettelmaier@medizin.uni-halle.de},
-	Aid = {10.1038/nature04115 {$[$}doi{$]$}},
-	Au = {Huttelmaier S and Zenklusen D and Lederer M and Dictenberg J and Lorenz M and Meng X and Bassell GJ and Condeelis J and Singer RH},
-	Author = {Huttelmaier, Stefan and Zenklusen, Daniel and Lederer, Marcell and Dictenberg, Jason and Lorenz, Mike and Meng, Xiuhua and Bassell, Gary J and Condeelis, John and Singer, Robert H},
-	Cin = {Nature. 2005 Nov 24;438(7067):432-5. PMID: 16306974},
-	Da = {20051124},
-	Date-Added = {2006-06-10 15:45:23 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20051215},
-	Edat = {2005/11/25 09:00},
-	Issn = {1476-4687 (Electronic)},
-	Jid = {0410462},
-	Journal = {Nature},
-	Jt = {Nature.},
-	Keywords = {Actins/*analysis/*biosynthesis/genetics, Animals, Avian Proteins/genetics/*metabolism, Cell Line, *Cell Polarity, Chickens, Glycoproteins/genetics/metabolism, Humans, Molecular Sequence Data, Phosphorylation, *Protein Biosynthesis, Proto-Oncogene Proteins pp60(c-src)/*metabolism, RNA, Messenger/genetics/metabolism, RNA, Small Interfering/genetics/metabolism, RNA-Binding Proteins/genetics/*metabolism, Research Support, N.I.H., Extramural, Research Support, Non-U.S. Gov't, Research Support, U.S. Gov't, Non-P.H.S.},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Huttelmaier/2005.pdf},
-	Mhda = {2005/12/16 09:00},
-	Number = {7067},
-	Own = {NLM},
-	Pages = {512-5},
-	Phst = {2005/08/08 {$[$}accepted{$]$}},
-	Pl = {England},
-	Pmid = {16306994},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Actins)},
-	Sb = {IM},
-	Si = {GENBANK/AF117106},
-	So = {Nature. 2005 Nov 24;438(7067):512-5.},
-	Stat = {MEDLINE},
-	Title = {Spatial regulation of beta-actin translation by Src-dependent phosphorylation of ZBP1},
-	Volume = {438},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGcAAAAAAGcAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbG0IMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJecBX8yQAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAC0h1dHRlbG1haWVyAAAQAAgAAMFxjUkAAAARAAgAAMBYVZQAAAABABgARmxtAEZrzwBGatUARmobAEZkaABAiUMAAgBGaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpIdXR0ZWxtYWllcjoyMDA1LnBkZgAOABIACAAyADAAMAA1AC4AcABkAGYADwAIAAMAaABzAHIAEgBCVXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0h1dHRlbG1haWVyLzIwMDUucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxApLi4vLi4vLi4vLi4vQXJ0aWNsZXMvSHV0dGVsbWFpZXIvMjAwNS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AlkCXgJnAnICdgKEAosClALAAsUCyAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALV}}
-
-@article{Van-Houten:1993lr,
-	Abstract = {During the process of E. coli nucleotide excision repair, DNA damage recognition and processing are achieved by the action of the uvrA, uvrB, and uvrC gene products. The availability of highly purified proteins has lead to a detailed molecular description of E. coli nucleotide excision repair that serves as a model for similar processes in eukaryotes. An interesting aspect of this repair system is the protein complex's ability to work on a vast array of DNA lesions that differ widely in their chemical composition and molecular architecture. Here we propose a model for damage recognition in which the UvrB protein serves as the component that confers enhanced specificity to a preincision complex. We hypothesize that one major determinant for the formation of a stable preincision complex appears to be the disruption of base stacking interactions by DNA lesions.},
-	Affiliation = {Department of Pathology, University of Vermont, Burlington 05405-0068.},
-	Aid = {10.1002/bies.950150108 {$[$}doi{$]$}},
-	Au = {Van Houten B and Snowden A},
-	Author = {Van Houten, B and Snowden, A},
-	Da = {19930504},
-	Date-Added = {2006-06-10 15:23:39 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19930504},
-	Edat = {1993/01/01},
-	Gs = {uvrA},
-	Issn = {0265-9247 (Print)},
-	Jid = {8510851},
-	Journal = {Bioessays},
-	Jt = {BioEssays : news and reviews in molecular, cellular and developmental biology.},
-	Keywords = {DNA Damage, DNA Repair, Deoxyribonucleases/*genetics, Escherichia coli/*genetics},
-	Language = {eng},
-	Lr = {20051116},
-	Mhda = {1993/01/01 00:01},
-	Number = {1},
-	Own = {NLM},
-	Pages = {51-9},
-	Pl = {ENGLAND},
-	Pmid = {8466476},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rf = {49},
-	Rn = {EC 3.1.- (Deoxyribonucleases)},
-	Sb = {IM},
-	So = {Bioessays. 1993 Jan;15(1):51-9.},
-	Stat = {MEDLINE},
-	Title = {Mechanism of action of the Escherichia coli UvrABC nuclease: clues to the damage recognition problem},
-	Volume = {15},
-	Year = {1993}}
-
-@article{Riley:2006lr,
-	Abstract = {The goal of this group project has been to coordinate and bring up-to-date information on all genes of Escherichia coli K-12. Annotation of the genome of an organism entails identification of genes, the boundaries of genes in terms of precise start and end sites, and description of the gene products. Known and predicted functions were assigned to each gene product on the basis of experimental evidence or sequence analysis. Since both kinds of evidence are constantly expanding, no annotation is complete at any moment in time. This is a snapshot analysis based on the most recent genome sequences of two E.coli K-12 bacteria. An accurate and up-to-date description of E.coli K-12 genes is of particular importance to the scientific community because experimentally determined properties of its gene products provide fundamental information for annotation of innumerable genes of other organisms. Availability of the complete genome sequence of two K-12 strains allows comparison of their genotypes and mutant status of alleles.},
-	Affiliation = {Josephine Bay Paul Center, Marine Biological Laboratory, Woods Hole, MA 02543, USA. mriley@mbl.edu},
-	Aid = {34/1/1 {$[$}pii{$]$}},
-	Annote = {Information on the annotation of the E. coli K-12 genome which I have been using for "fishing".},
-	Au = {Riley M and Abe T and Arnaud MB and Berlyn MK and Blattner FR and Chaudhuri RR and Glasner JD and Horiuchi T and Keseler IM and Kosuge T and Mori H and Perna NT and Plunkett G 3rd and Rudd KE and Serres MH and Thomas GH and Thomson NR and Wishart D and Wanner BL},
-	Author = {Riley, Monica and Abe, Takashi and Arnaud, Martha B and Berlyn, Mary K B and Blattner, Frederick R and Chaudhuri, Roy R and Glasner, Jeremy D and Horiuchi, Takashi and Keseler, Ingrid M and Kosuge, Takehide and Mori, Hirotada and Perna, Nicole T and Plunkett, Guy 3rd and Rudd, Kenneth E and Serres, Margrethe H and Thomas, Gavin H and Thomson, Nicholas R and Wishart, David and Wanner, Barry L},
-	Da = {20060106},
-	Date-Added = {2006-06-06 23:52:51 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20060120},
-	Dep = {20060105},
-	Edat = {2006/01/07 09:00},
-	Gr = {1 R13 GM74562-01/GM/NIGMS},
-	Issn = {1362-4962 (Electronic)},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research.},
-	Keywords = {Congresses, Cooperative Behavior, Escherichia coli K12/*genetics, Escherichia coli Proteins/*genetics, *Genome, Bacterial, Genomics, Research Support, N.I.H., Extramural, Research Support, Non-U.S. Gov't, Research Support, U.S. Gov't, Non-P.H.S., Terminology},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Riley/2006.pdf},
-	Mhda = {2006/01/21 09:00},
-	Number = {1},
-	Own = {NLM},
-	Pages = {1-9},
-	Phst = {2006 {$[$}ppublish{$]$}},
-	Pl = {England},
-	Pmid = {16397293},
-	Pst = {epublish},
-	Pt = {Journal Article},
-	Pubm = {Electronic-Print},
-	Rn = {0 (Escherichia coli Proteins)},
-	Sb = {IM},
-	So = {Nucleic Acids Res. 2006 Jan 5;34(1):1-9. Print 2006.},
-	Stat = {MEDLINE},
-	Title = {Escherichia coli K-12: a cooperatively developed annotation snapshot--2005},
-	Volume = {34},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbPEIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLUcCrslQAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVJpbGV5AAAQAAgAAMFxjUkAAAARAAgAAMCsFMQAAAABABgARmzxAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpSaWxleToyMDA2LnBkZgAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1JpbGV5LzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUmlsZXkvMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Kull:1996kx,
-	Abstract = {Kinesin is the founding member of a superfamily of microtubule based motor proteins that perform force-generating tasks such as organelle transport and chromosome segregation. It has two identical approximately 960-amino-acid chains containing an amino-terminal globular motor domain, a central alpha-helical region that enables dimer formation through a coiled-coil, and a carboxy-terminal tail domain that binds light chains and possibly an organelle receptor. The kinesin motor domain of approximately 340 amino acids, which can produce movement in vitro, is much smaller than that of myosin (approximately 850 amino acids) and dynein (1,000 amino acids), and is the smallest known molecular motor. Here, we report the crystal structure of the human kinesin motor domain with bound ADP determined to 1.8-A resolution by X-ray crystallography. The motor consists primarily of a single alpha/beta arrowhead-shaped domain with dimensions of 70 x 45 x 45 A. Unexpectedly, it has a striking structural similarity to the core of the catalytic domain of the actin-based motor myosin. Although kinesin and myosin have virtually no amino-acid sequence++ identity, and exhibit distinct enzymatic and motile properties, our results suggest that these two classes of mechanochemical enzymes evolved from a common ancestor and share a similar force-generating strategy.},
-	Affiliation = {Department of Biochemistry/Biophysics, University of California, San Francisco, California 94143, USA.},
-	Annote = {This paper showed that myosin and kinesin share a similar ATPase subdomain and force-generating mechanism.
-
-The PDB codes for the proteins studied are 1BG2 (kinesin) and 2MYS chain A (myosin).},
-	Au = {Kull FJ and Sablin EP and Lau R and Fletterick RJ and Vale RD},
-	Author = {Kull, F J and Sablin, E P and Lau, R and Fletterick, R J and Vale, R D},
-	Cin = {Nature. 1996 Apr 11;380(6574):483-4. PMID: 8606761},
-	Da = {19960520},
-	Date-Added = {2006-06-06 15:04:12 -0700},
-	Date-Modified = {2008-05-29 12:33:06 -0700},
-	Dcom = {19960520},
-	Edat = {1996/04/11},
-	Jid = {0410462},
-	Journal = {Nature},
-	Jt = {Nature.},
-	Keywords = {Adenosine Diphosphate/chemistry, Amino Acid Sequence, Crystallography, X-Ray, Humans, Kinesin/*chemistry, Models, Molecular, Molecular Sequence Data, Myosins/*chemistry, Protein Conformation, Protein Structure, Secondary, Research Support, U.S. Gov't, P.H.S., Sequence Alignment},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Kull/1996.pdf},
-	Lr = {20041117},
-	Mhda = {1996/04/11 00:01},
-	Number = {6574},
-	Own = {NLM},
-	Pages = {550--555},
-	Pl = {ENGLAND},
-	Pmid = {8606779},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {58-64-0 (Adenosine Diphosphate)},
-	Sb = {IM},
-	Si = {PDB/UNKNOWN},
-	So = {Nature. 1996 Apr 11;380(6574):550-5.},
-	Stat = {MEDLINE},
-	Title = {Crystal structure of the kinesin motor domain reveals a structural similarity to myosin},
-	Volume = {380},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbI8IMTk5Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJqb7pHjEAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABEt1bGwAEAAIAADBcY1JAAAAEQAIAAC+6YChAAAAAQAYAEZsjwBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6S3VsbDoxOTk2LnBkZgAADgASAAgAMQA5ADkANgAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9LdWxsLzE5OTYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0t1bGwvMTk5Ni5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Rayment:1993fj,
-	Abstract = {Directed movement is a characteristic of many living organisms and occurs as a result of the transformation of chemical energy into mechanical energy. Myosin is one of three families of molecular motors that are responsible for cellular motility. The three-dimensional structure of the head portion of myosin, or subfragment-1, which contains both the actin and nucleotide binding sites, is described. This structure of a molecular motor was determined by single crystal x-ray diffraction. The data provide a structural framework for understanding the molecular basis of motility.},
-	Affiliation = {Department of Biochemistry, University of Wisconsin, Madison 53705.},
-	Annote = {Going "fishing" with the ATPase subdomain and head of myosin.},
-	Au = {Rayment I and Rypniewski WR and Schmidt-Base K and Smith R and Tomchick DR and Benning MM and Winkelmann DA and Wesenberg G and Holden HM},
-	Author = {Rayment, I and Rypniewski, W R and Schmidt-Base, K and Smith, R and Tomchick, D R and Benning, M M and Winkelmann, D A and Wesenberg, G and Holden, H M},
-	Cin = {Science. 1993 Jul 2;261(5117):35-6. PMID: 8316856},
-	Da = {19930727},
-	Date-Added = {2006-06-06 14:50:43 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19930727},
-	Edat = {1993/07/02},
-	Issn = {0036-8075 (Print)},
-	Jid = {0404511},
-	Journal = {Science},
-	Jt = {Science.},
-	Keywords = {Actins/metabolism, Adenosine Triphosphate/metabolism, Amino Acid Sequence, Binding Sites, Crystallization, Image Processing, Computer-Assisted, Methylation, *Models, Molecular, Molecular Sequence Data, Muscle Contraction, Myosin Subfragments/*chemistry/metabolism, *Protein Conformation, Protein Structure, Secondary, Research Support, U.S. Gov't, P.H.S., X-Ray Diffraction},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Rayment/1993.pdf},
-	Lr = {20041117},
-	Mhda = {1993/07/02 00:01},
-	Number = {5117},
-	Own = {NLM},
-	Pages = {50-8},
-	Pl = {UNITED STATES},
-	Pmid = {8316857},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Actins)},
-	Sb = {IM},
-	So = {Science. 1993 Jul 2;261(5117):50-8.},
-	Stat = {MEDLINE},
-	Title = {Three-dimensional structure of myosin subfragment-1: a molecular motor},
-	Volume = {261},
-	Year = {1993},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbOkIMTk5My5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLT8CrZptQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1JheW1lbnQAABAACAAAwXGNSQAAABEACAAAwKvJCwAAAAEAGABGbOkARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlJheW1lbnQ6MTk5My5wZGYADgASAAgAMQA5ADkAMwAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9SYXltZW50LzE5OTMucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUmF5bWVudC8xOTkzLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Bueno:2006uq,
-	Annote = {This article mentions the fact that topology is more important than sequence in determining protein folding and note that widely divergent sequences can give rise to the same structure.  Following the references in this article in regards to this might yield some interesting leads.},
-	Author = {Bueno, Marta and Ayuso-Tejedor, Sara and Sancho, Javier},
-	Date-Added = {2006-06-06 13:59:52 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Issn = {3},
-	Journal = {Journal of Molecular Biology},
-	Keywords = {protein folding, apoflavodoxin, transition state, folding nucleus, protein stability},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Bueno/2006.pdf},
-	Pages = {813--824},
-	Title = {Do Proteins with Similar Folds Have Similar Transition State Structures? A Diffuse Transition State of the 169 Residue Apoflavodoxin},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6WK7-4JS1M3K-2/2/629da8d066d225439260180d7c590663},
-	Volume = {359},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbAIIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLKsCrOrZQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUJ1ZW5vAAAQAAgAAMFxjUkAAAARAAgAAMCrnSYAAAABABgARmwCAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpCdWVubzoyMDA2LnBkZgAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0J1ZW5vLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQnVlbm8vMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6WK7-4JS1M3K-2/2/629da8d066d225439260180d7c590663}}
-
-@article{Chakrabarti:2006qy,
-	Abstract = {Accurate multiple sequence alignments of proteins are very important to several areas of computational biology and provide an understanding of phylogenetic history of domain families, their identification and classification. This article presents a new algorithm, REFINER, that refines a multiple sequence alignment by iterative realignment of its individual sequences with the predetermined conserved core (block) model of a protein family. Realignment of each sequence can correct misalignments between a given sequence and the rest of the profile and at the same time preserves the family's overall block model. Large-scale benchmarking studies showed a noticeable improvement of alignment after refinement. This can be inferred from the increased alignment score and enhanced sensitivity for database searching using the sequence profiles derived from refined alignments compared with the original alignments. A standalone version of the program is available by ftp distribution (ftp://ftp.ncbi.nih.gov/pub/REFINER) and will be incorporated into the next release of the Cn3D structure/alignment viewer.},
-	Affiliation = {National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA.},
-	Aid = {34/9/2598 {$[$}pii{$]$}},
-	Annote = {They come up with a tool to refine multiple alignments and demonstrate the improvement with a metric similar to Hwa with an ROC curve but using the VAST to classify structurally similar proteins (a tool available from NCBI).  VAST and DALI seem to have similar missions in life.  Note that the increases in sensitivity are about the same level as the increase of HP+diagonal over BL62.},
-	Au = {Chakrabarti S and Lanczycki CJ and Panchenko AR and Przytycka TM and Thiessen PA and Bryant SH},
-	Author = {Chakrabarti, Saikat and Lanczycki, Christopher J and Panchenko, Anna R and Przytycka, Teresa M and Thiessen, Paul A and Bryant, Stephen H},
-	Da = {20060518},
-	Date-Added = {2006-06-06 13:54:17 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20060530},
-	Dep = {20060517},
-	Edat = {2006/05/19 09:00},
-	Issn = {1362-4962 (Electronic)},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research.},
-	Keywords = {*Algorithms, Amino Acid Sequence, Comparative Study, Conserved Sequence, Internet, Molecular Sequence Data, Quality Control, Reproducibility of Results, Research Support, N.I.H., Intramural, Sequence Alignment/*methods/standards, Sequence Analysis, Protein/*methods/standards},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Chakrabarti/2006.pdf},
-	Mhda = {2006/05/31 09:00},
-	Number = {9},
-	Own = {NLM},
-	Pages = {2598-606},
-	Phst = {2006 {$[$}ppublish{$]$}},
-	Pl = {England},
-	Pmid = {16707662},
-	Pst = {epublish},
-	Pt = {Journal Article},
-	Pubm = {Electronic-Print},
-	Sb = {IM},
-	So = {Nucleic Acids Res. 2006 May 17;34(9):2598-606. Print 2006.},
-	Stat = {MEDLINE},
-	Title = {Refining multiple sequence alignments with conserved core regions},
-	Volume = {34},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGcAAAAAAGcAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbAkIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJycCrNwdQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAC0NoYWtyYWJhcnRpAAAQAAgAAMFxjUkAAAARAAgAAMCrmXcAAAABABgARmwJAEZrzwBGatUARmobAEZkaABAiUMAAgBGaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpDaGFrcmFiYXJ0aToyMDA2LnBkZgAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgBCVXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0NoYWtyYWJhcnRpLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxApLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ2hha3JhYmFydGkvMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AlkCXgJnAnICdgKEAosClALAAsUCyAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALV}}
-
-@article{Go:1983lr,
-	Aid = {10.1146/annurev.bb.12.060183.001151 {$[$}doi{$]$}},
-	Au = {Go N},
-	Author = {Go, N},
-	Da = {19830817},
-	Date-Added = {2006-06-06 09:53:31 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19830817},
-	Edat = {1983/01/01},
-	Issn = {0084-6589 (Print)},
-	Jid = {0332636},
-	Journal = {Annu Rev Biophys Bioeng},
-	Jt = {Annual review of biophysics and bioengineering.},
-	Keywords = {Amino Acid Sequence, Computers, Disulfides, Models, Structural, *Protein Conformation, Research Support, Non-U.S. Gov't, Stochastic Processes},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Go/1983.pdf},
-	Lr = {20041117},
-	Mhda = {1983/01/01 00:01},
-	Own = {NLM},
-	Pages = {183-210},
-	Pl = {UNITED STATES},
-	Pmid = {6347038},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rf = {93},
-	Rn = {0 (Disulfides)},
-	Sb = {IM},
-	So = {Annu Rev Biophys Bioeng. 1983;12:183-210.},
-	Stat = {MEDLINE},
-	Title = {Theoretical studies of protein folding},
-	Volume = {12},
-	Year = {1983},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGCAAAAAAGCAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbEkIMTk4My5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLOMCrAB5QREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAAkdvABAACAAAwXGNSQAAABEACAAAwKtijgAAAAEAGABGbEkARmvPAEZq1QBGahsARmRoAECJQwACAD1oc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkdvOjE5ODMucGRmAAAOABIACAAxADkAOAAzAC4AcABkAGYADwAIAAMAaABzAHIAEgA5VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0dvLzE5ODMucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIC4uLy4uLy4uLy4uL0FydGljbGVzL0dvLzE5ODMucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQI/AkQCTQJYAlwCagJxAnoCnQKiAqUAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACsg==}}
-
-@article{Honeycutt:1990fk,
-	Abstract = {The possibility that several metastable minima exist in which the folded forms of a polypeptide chain have similar structural characteristics but different energies is suggested. The validity of this hypothesis is illustrated with the aid of simulation methods on a model protein that folds into a beta-barrel structure. Some implications of this hypothesis such as the existence of multiple pathways with intermediates for protein folding are discussed.},
-	Affiliation = {Department of Chemistry and Biochemistry, University of Maryland, College Park 20742.},
-	Au = {Honeycutt JD and Thirumalai D},
-	Author = {Honeycutt, J D and Thirumalai, D},
-	Da = {19900606},
-	Date-Added = {2006-06-06 09:52:57 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19900606},
-	Edat = {1990/05/01},
-	Issn = {0027-8424 (Print)},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci U S A},
-	Jt = {Proceedings of the National Academy of Sciences of the United States of America.},
-	Keywords = {Drug Stability, Kinetics, Models, Molecular, *Protein Conformation, *Proteins, Research Support, Non-U.S. Gov't, Thermodynamics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Honeycutt/1990.pdf},
-	Lr = {20041117},
-	Mhda = {1990/05/01 00:01},
-	Number = {9},
-	Own = {NLM},
-	Pages = {3526-9},
-	Pl = {UNITED STATES},
-	Pmid = {2333297},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Proc Natl Acad Sci U S A. 1990 May;87(9):3526-9.},
-	Stat = {MEDLINE},
-	Title = {Metastability of the folded states of globular proteins},
-	Volume = {87},
-	Year = {1990},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbGkIMTk5MC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLMMCrAGhQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACUhvbmV5Y3V0dAAAEAAIAADBcY1JAAAAEQAIAADAq2LYAAAAAQAYAEZsaQBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6SG9uZXljdXR0OjE5OTAucGRmAA4AEgAIADEAOQA5ADAALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvSG9uZXljdXR0LzE5OTAucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvSG9uZXljdXR0LzE5OTAucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@article{Leonardi:2006qy,
-	Abstract = {MOTIVATION: A central problem in genomics is to determine the function of a protein using the information contained in its amino acid sequence. Variable Length Markov Chains (VLMC) are a promising class of models that can effectively classify proteins into families and they can be estimated in linear time and space. RESULTS: We introduce a new algorithm, called Sparse Probabilistic Suffix Trees (SPST), that identifies equivalences between the contexts of a VLMC. We show that, in many cases, the identification of these equivalences can improve the classification rate of the classical Probabilistic Suffix Trees (PST) algorithm. We also show that better classification can be achieved identifying representative fingerprints in the amino acid chains, and this variation in the SPST algorithm is called F-SPST. AVAILABILITY: The SPST algorithm can be freely downloaded from the site http://www.ime.usp.br/~leonardi/spst/.},
-	Affiliation = {Instituto de Matematica e Estatistica, Universidade de Sao Paulo, Rua do Matao 1010 CEP 05508-090, Sao Paulo, Brazil.},
-	Aid = {10.1093/bioinformatics/btl088 {$[$}doi{$]$}},
-	Author = {Leonardi, FG},
-	Da = {20060310},
-	Date-Added = {2006-05-23 11:04:35 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dep = {20060309},
-	Edat = {2006/03/11 09:00},
-	Issn = {1367-4803 (Print)},
-	Jid = {9808944},
-	Journal = {Bioinformatics},
-	Language = {ENG},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Leonardi/2006.pdf},
-	Mhda = {2006/03/11 09:00},
-	Own = {NLM},
-	Pmid = {16527830},
-	Pst = {aheadofprint},
-	Pt = {JOURNAL ARTICLE},
-	Pubm = {Print-Electronic},
-	So = {Bioinformatics. 2006 Mar 9;.},
-	Stat = {Publisher},
-	Title = {A generalization of the PST algorithm: modeling the sparse nature of protein sequences},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbJsIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKN8CYnI4AAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACExlb25hcmRpABAACAAAwXGNSQAAABEACAAAwJj+/gAAAAEAGABGbJsARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkxlb25hcmRpOjIwMDYucGRmAAAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0xlb25hcmRpLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0xlb25hcmRpLzIwMDYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Bhardwaj:2006lr,
-	Author = {Bhardwaj, Nitin and Stahelin, Robert V. and Langlois, Robert E. and Cho, Wonhwa and Lu, Hui},
-	Date-Added = {2006-05-08 11:01:48 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Issn = {2},
-	Journal = {Journal of Molecular Biology},
-	Keywords = {protein-membrane interactions, function annotation, support vector machines, peripheral proteins, protein function prediction},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Bhardwaj/2006.pdf},
-	Pages = {486--495},
-	Title = {Structural Bioinformatics Prediction of Membrane-binding Proteins},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6WK7-4JKRVYH-3/2/1ab947cb2bc89c726a518cfe5c620e4b},
-	Volume = {359},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa/EIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKrcCE1UtQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEJoYXJkd2FqABAACAAAwXGNSQAAABEACAAAwIU3uwAAAAEAGABGa/EARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkJoYXJkd2FqOjIwMDYucGRmAAAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0JoYXJkd2FqLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0JoYXJkd2FqLzIwMDYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6WK7-4JKRVYH-3/2/1ab947cb2bc89c726a518cfe5c620e4b}}
-
-@article{Wallace:2006tq,
-	Abstract = {We introduce M-Coffee, a meta-method for assembling multiple sequence alignments (MSA) by combining the output of several individual methods into one single MSA. M-Coffee is an extension of T-Coffee and uses consistency to estimate a consensus alignment. We show that the procedure is robust to variations in the choice of constituent methods and reasonably tolerant to duplicate MSAs. We also show that performances can be improved by carefully selecting the constituent methods. M-Coffee outperforms all the individual methods on three major reference datasets: HOMSTRAD, Prefab and Balibase. We also show that on a case-by-case basis, M-Coffee is twice as likely to deliver the best alignment than any individual method. Given a collection of pre-computed MSAs, M-Coffee has similar CPU requirements to the original T-Coffee. M-Coffee is a freeware open-source package available from http://www.tcoffee.org/.},
-	Affiliation = {The Conway Institute of Biomolecular and Biomedical Research, University College Dublin, Ireland.},
-	Aid = {10.1093/nar/gkl091 {$[$}doi{$]$}},
-	Au = {Wallace IM and O'Sullivan O and Higgins DG and Notredame C},
-	Author = {Wallace, Iain M and O'Sullivan, Orla and Higgins, Desmond G and Notredame, Cedric},
-	Da = {20060324},
-	Date-Added = {2006-04-17 10:01:03 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20060406},
-	Dep = {20060323},
-	Edat = {2006/03/25 09:00},
-	Ip = {6},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research.},
-	Keywords = {*Algorithms, Reproducibility of Results, Research Support, Non-U.S. Gov't, Sequence Alignment/*methods, Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wallace/2006.pdf},
-	Mhda = {2006/04/07 09:00},
-	Number = {1362-4962 (Electronic)},
-	Own = {NLM},
-	Pages = {1692-9},
-	Phst = {2006 {$[$}ppublish{$]$}},
-	Pl = {England},
-	Pmid = {16556910},
-	Pst = {epublish},
-	Pt = {Evaluation Studies},
-	Pubm = {Electronic-Print},
-	Sb = {IM},
-	So = {Nucleic Acids Res. 2006 Mar 23;34(6):1692-9. Print 2006.},
-	Stat = {MEDLINE},
-	Title = {M-Coffee: combining multiple sequence alignment methods with T-Coffee},
-	Volume = {34},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbT4IMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKycBpE5kAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1dhbGxhY2UAABAACAAAwXGNSQAAABEACAAAwGl2CQAAAAEAGABGbT4ARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOldhbGxhY2U6MjAwNi5wZGYADgASAAgAMgAwADAANgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9XYWxsYWNlLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvV2FsbGFjZS8yMDA2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Bloom:2006io,
-	Abstract = {The biophysical properties that enable proteins to so readily evolve to perform diverse biochemical tasks are largely unknown. Here, we show that a protein's capacity to evolve is enhanced by the mutational robustness conferred by extra stability. We use simulations with model lattice proteins to demonstrate how extra stability increases evolvability by allowing a protein to accept a wider range of beneficial mutations while still folding to its native structure. We confirm this view experimentally by mutating marginally stable and thermostable variants of cytochrome P450 BM3. Mutants of the stabilized parent were more likely to exhibit new or improved functions. Only the stabilized P450 parent could tolerate the highly destabilizing mutations needed to confer novel activities such as hydroxylating the antiinflammatory drug naproxen. Our work establishes a crucial link between protein stability and evolution. We show that we can exploit this link to discover protein functions, and we suggest how natural evolution might do the same.},
-	Affiliation = {Division of Chemistry and Chemical Engineering, Biochemistry and Molecular Biophysics Option, Mail Code 210-41, California Institute of Technology, Pasadena, CA 91125.},
-	Aid = {0510098103 {$[$}pii{$]$}},
-	Author = {Bloom, JD and Labthavikul, ST and Otey, CR and Arnold, FH},
-	Da = {20060403},
-	Date-Added = {2006-04-12 09:16:02 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dep = {20060331},
-	Edat = {2006/04/04 09:00},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci U S A},
-	Language = {ENG},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Bloom/2006.pdf},
-	Mhda = {2006/04/04 09:00},
-	Number = {0027-8424 (Print)},
-	Own = {NLM},
-	Pmid = {16581913},
-	Pst = {aheadofprint},
-	Pt = {JOURNAL ARTICLE},
-	Pubm = {Print-Electronic},
-	So = {Proc Natl Acad Sci U S A. 2006 Mar 31;.},
-	Stat = {Publisher},
-	Title = {Protein stability promotes evolvability},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa/QIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKGMBidVYAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUJsb29tAAAQAAgAAMFxjUkAAAARAAgAAMBi18YAAAABABgARmv0AEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpCbG9vbToyMDA2LnBkZgAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0Jsb29tLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQmxvb20vMjAwNi5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Gebhard:2006rd,
-	Author = {Gebhard, Leopoldo G. and Risso, Valeria A. and Santos, Javier and Ferreyra, Raul G. and Noguera, Martin E. and Ermacora, Mario R.},
-	Date-Added = {2006-04-11 09:40:53 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Ep = {288},
-	Journal = {Journal of Molecular Biology},
-	Keywords = {protein folding, sequence patterns, conformational information, folding code, folding units},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Gebhard/2006.pdf},
-	Number = {1},
-	Pages = {280--288},
-	Title = {Mapping the Distribution of Conformational Information Throughout a Protein Sequence},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6WK7-4J78X07-7/2/a8ae56a66ea05cea84d3912b1205169f},
-	Volume = {358},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbEUIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnL08BhKgIAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0dlYmhhcmQAABAACAAAwXGNSQAAABEACAAAwGGMcgAAAAEAGABGbEUARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkdlYmhhcmQ6MjAwNi5wZGYADgASAAgAMgAwADAANgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9HZWJoYXJkLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvR2ViaGFyZC8yMDA2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6WK7-4J78X07-7/2/a8ae56a66ea05cea84d3912b1205169f}}
-
-@article{Paccanaro:2006ny,
-	Abstract = {An important problem in genomics is automatically clustering homologous proteins when only sequence information is available. Most methods for clustering proteins are local, and are based on simply thresholding a measure related to sequence distance. We first show how locality limits the performance of such methods by analysing the distribution of distances between protein sequences. We then present a global method based on spectral clustering and provide theoretical justification of why it will have a remarkable improvement over local methods. We extensively tested our method and compared its performance with other local methods on several subsets of the SCOP (Structural Classification of Proteins) database, a gold standard for protein structure classification. We consistently observed that, the number of clusters that we obtain for a given set of proteins is close to the number of superfamilies in that set; there are fewer singletons; and the method correctly groups most remote homologs. In our experiments, the quality of the clusters as quantified by a measure that combines sensitivity and specificity was consistently better [on average, improvements were 84% over hierarchical clustering, 34% over Connected Component Analysis (CCA) (similar to GeneRAGE) and 72% over another global method, TribeMCL].},
-	Affiliation = {Bioinformatics Group, The Genome Centre, Barts and The London School of Medicine, Queen Mary, University of London, Charterhouse Square, London EC1M 6BQ, UK. albertopaccanaro@yale.edu},
-	Aid = {34/5/1571 {$[$}pii{$]$}},
-	Au = {Paccanaro A and Casbon JA and Saqi MA},
-	Author = {Paccanaro, Alberto and Casbon, James A and Saqi, Mansoor A S},
-	Da = {20060320},
-	Date-Added = {2006-04-11 09:16:31 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20060410},
-	Dep = {20060317},
-	Edat = {2006/03/21 09:00},
-	Ip = {5},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Jt = {Nucleic acids research.},
-	Keywords = {Algorithms, Cluster Analysis, Comparative Study, Proteins/classification, Research Support, Non-U.S. Gov't, Sequence Analysis, Protein/*methods, *Sequence Homology, Amino Acid},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Paccanaro/2006.pdf},
-	Mhda = {2006/04/11 09:00},
-	Number = {1362-4962 (Electronic)},
-	Own = {NLM},
-	Pages = {1571-80},
-	Phst = {2006 {$[$}ppublish{$]$}},
-	Pl = {England},
-	Pmid = {16547200},
-	Pst = {epublish},
-	Pt = {Evaluation Studies},
-	Pubm = {Electronic-Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	So = {Nucleic Acids Res. 2006 Mar 17;34(5):1571-80. Print 2006.},
-	Stat = {MEDLINE},
-	Title = {Spectral clustering of protein sequences},
-	Volume = {34},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbNMIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKNcBhI+AAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACVBhY2NhbmFybwAAEAAIAADBcY1JAAAAEQAIAADAYYZQAAAAAQAYAEZs0wBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6UGFjY2FuYXJvOjIwMDYucGRmAA4AEgAIADIAMAAwADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvUGFjY2FuYXJvLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUGFjY2FuYXJvLzIwMDYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@article{Skolnick:2006mt,
-	Author = {Skolnick, Jeffrey},
-	Date-Added = {2006-04-11 08:20:24 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Ep = {171},
-	Journal = {Current Opinion in Structural Biology},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Skolnick/2006.pdf},
-	Number = {2},
-	Pages = {166--171},
-	T2 = {Theory and simulation/Macromolecular assemblages - Joel Janin and Michael Levitt/Edward H Egelman and Andrew GW Leslie},
-	Title = {In quest of an empirical potential for protein structure prediction},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6VS6-4JF8H7V-1/2/dffe26442754e9dd86fa5121f9db7420},
-	Volume = {16},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbRYIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJXcBhFzkAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACFNrb2xuaWNrABAACAAAwXGNSQAAABEACAAAwGF5qQAAAAEAGABGbRYARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlNrb2xuaWNrOjIwMDYucGRmAAAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1Nrb2xuaWNrLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL1Nrb2xuaWNrLzIwMDYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6VS6-4JF8H7V-1/2/dffe26442754e9dd86fa5121f9db7420}}
-
-@article{Ginalski:2006ok,
-	Author = {Ginalski, Krzysztof},
-	Date-Added = {2006-04-11 08:17:58 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Ep = {177},
-	Journal = {Current Opinion in Structural Biology},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Ginalski/2006.pdf},
-	Number = {2},
-	Pages = {172--177},
-	T2 = {Theory and simulation/Macromolecular assemblages - Joel Janin and Michael Levitt/Edward H Egelman and Andrew GW Leslie},
-	Title = {Comparative modeling for protein structure prediction},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6VS6-4JCCJMC-1/2/2f8a64397c708c732c0faf05fe02f01d},
-	Volume = {16},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbEcIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKTMBhFqoAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEdpbmFsc2tpABAACAAAwXGNSQAAABEACAAAwGF5GgAAAAEAGABGbEcARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkdpbmFsc2tpOjIwMDYucGRmAAAOABIACAAyADAAMAA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0dpbmFsc2tpLzIwMDYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0dpbmFsc2tpLzIwMDYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6VS6-4JCCJMC-1/2/2f8a64397c708c732c0faf05fe02f01d}}
-
-@article{Fischer:2006eu,
-	Author = {Fischer, Daniel},
-	Date-Added = {2006-04-11 08:16:33 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Ep = {182},
-	Journal = {Current Opinion in Structural Biology},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Fischer/2006.pdf},
-	Number = {2},
-	Pages = {178--182},
-	T2 = {Theory and simulation/Macromolecular assemblages - Joel Janin and Michael Levitt/Edward H Egelman and Andrew GW Leslie},
-	Title = {Servers for protein structure prediction},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6VS6-4JHMHPW-3/2/7a3d9d59821dbcc72d1c357e27b68328},
-	Volume = {16},
-	Year = {2006},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbDwIMjAwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJgcBhFi0AAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0Zpc2NoZXIAABAACAAAwXGNSQAAABEACAAAwGF4nQAAAAEAGABGbDwARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkZpc2NoZXI6MjAwNi5wZGYADgASAAgAMgAwADAANgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9GaXNjaGVyLzIwMDYucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRmlzY2hlci8yMDA2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6VS6-4JHMHPW-3/2/7a3d9d59821dbcc72d1c357e27b68328}}
-
-@article{Wei:2003ia,
-	Abstract = {Binary patterning of polar and nonpolar amino acids has been used as the key design feature for constructing large combinatorial libraries of de novo proteins. Each position in a binary patterned sequence is designed explicitly to be either polar or nonpolar; however, the precise identities of these amino acids are varied extensively. The combinatorial underpinnings of the "binary code" strategy preclude explicit design of particular side chains at specified positions. Therefore, packing interactions cannot be specified a priori. To assess whether the binary code strategy can nonetheless produce well-folded de novo proteins, we constructed a second-generation library based upon a new structural scaffold designed to fold into 102-residue four-helix bundles. Characterization of five proteins chosen arbitrarily from this new library revealed that (1) all are alpha-helical and quite stable; (2) four of the five contain an abundance of tertiary interactions indicative of well-ordered structures; and (3) one protein forms a well-folded structure with native-like features. The proteins from this new 102-residue library are substantially more stable and dramatically more native-like than those from an earlier binary patterned library of 74-residue sequences. These findings demonstrate that chain length is a crucial determinant of structural order in libraries of de novo four-helix bundles. Moreover, these results show that the binary code strategy--if applied to an appropriately designed structural scaffold--can generate large collections of stably folded and/or native-like proteins.},
-	Affiliation = {Department of Chemistry, Princeton University, Princeton, NJ 08544-1009, USA.},
-	Au = {Hecht MH},
-	Author = {Wei, Yinan and Liu, Tun and Sazinsky, Stephen L and Moffet, David A and Pelczer, Istvan and Hecht, Michael H},
-	Da = {20021220},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20030721},
-	Edat = {2002/12/21 04:00},
-	Gr = {R01 GM 62869/GM/NIGMS},
-	Jid = {9211750},
-	Journal = {Protein Sci},
-	Jt = {Protein science : a publication of the Protein Society.},
-	Keywords = {Amino Acid Sequence and Calorimetry, Differential Scanning and Circular Dichroism and Combinatorial Chemistry Techniques/*methods and Models, Molecular and Molecular Sequence Data and Nuclear Magnetic Resonance, Biomolecular and *Peptide Library and Protein Engineering/*methods and Protein Structure, Secondary and Proteins/*chemistry/genetics/isolation \& purification and Research Support, U.S. Gov't, P.H.S.},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wei/2003.pdf},
-	Lr = {20041117},
-	Mhda = {2003/07/23 05:00},
-	Number = {1},
-	Own = {NLM},
-	Pages = {92-102},
-	Pl = {United States},
-	Pmid = {12493832},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Stably folded de novo proteins from a designed combinatorial library},
-	Volume = {12},
-	Year = {2003},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbUMIMjAwMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJ/r/+M8ZQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA1dlaQAAEAAIAADBcY1JAAAAEQAIAAC//qRGAAAAAQAYAEZtQwBGa88ARmrVAEZqGwBGZGgAQIlDAAIAPmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6V2VpOjIwMDMucGRmAA4AEgAIADIAMAAwADMALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvV2VpLzIwMDMucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvV2VpLzIwMDMucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==}}
-
-@article{Govindarajan:1999py,
-	Abstract = {Many seemingly unrelated protein families share common folds. Theoretical models based on structure designability have suggested that a few folds should be very common while many others have low probability. In agreement with the predictions of these models, we show that the distribution of observed protein families over different folds can be modeled with a highly-stretched exponential. Our results suggest that there are approximately 4,000 possible folds, some so unlikely that only approximately 2,000 folds existing among naturally-occurring proteins. Due to the large number of extremely rare folds, constructing a comprehensive database of all existent folds would be difficult. Constructing a database of the most-likely folds representing the vast majority of protein families would be considerably easier.},
-	Affiliation = {Department of Chemistry, University of Michigan, Ann Arbor 48109-1055, USA.},
-	Aid = {10.1002/(SICI)1097-0134(19990601)35:4<408::AID-PROT4>3.0.CO;2-A {$[$}pii{$]$}},
-	Annote = {N = 4000, with 2000 common
-
-VERY nice, pedagogical explanation of how the model is derived, binomial distribution of fams among folds
-best fit was a stretched exponential
-use SCOP 1.37},
-	Au = {Goldstein RA},
-	Author = {Govindarajan, S and Recabarren, R and Goldstein, R A},
-	Da = {19990812},
-	Date-Modified = {2008-03-28 15:38:58 -0700},
-	Dcom = {19990812},
-	Edat = {1999/06/26 10:00},
-	Gr = {LM0577/LM/NLM},
-	Group = {Cited; Fold Space; Reviewed},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins.},
-	Keywords = {Likelihood Functions and Models, Chemical and *Protein Folding and Research Support, U.S. Gov't, Non-P.H.S. and Research Support, U.S. Gov't, P.H.S.},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Govindarajan/1999},
-	Lr = {20041117},
-	Mhda = {2000/06/20 09:00},
-	Number = {4},
-	Own = {NLM},
-	Pages = {408-14},
-	Pl = {UNITED STATES},
-	Pmid = {10382668},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Estimating the total number of protein folds},
-	Volume = {35},
-	Year = {1999},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbE4EMTk5OQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA7rT8Hffm1QREYgcHJ2dwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAADEdvdmluZGFyYWphbgAQAAgAAMFxjUkAAAARAAgAAMHf7u0AAAABABgARmxOAEZrzwBGatUARmobAEZkaABAiUMAAgBDaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpHb3ZpbmRhcmFqYW46MTk5OQAADgAKAAQAMQA5ADkAOQAPAAgAAwBoAHMAcgASAD9Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvR292aW5kYXJhamFuLzE5OTkAABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAmLi4vLi4vLi4vLi4vQXJ0aWNsZXMvR292aW5kYXJhamFuLzE5OTnSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5Ak0CUgJbAmYCagJ4An8CiAKxArYCuQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALG}}
-
-@article{Liu:2004qm,
-	Abstract = {Currently, of the 10(6) known protein sequences, only about 10(4) structures have been solved. Based on homologies and similarities, proteins are grouped into different families in which each has a structural prototype, namely, the fold, and some share the same folds. However, the total number of folds and families, and furthermore, the distribution of folds over families in nature, are still an enigma. Here, we report a study on the distribution of folds over families and the total number of folds in nature, using a maximum probability principle and the moment method of estimation. A quadratic relation between the numbers of families and folds is found for the number of families in an interval from 6000 to 30,000. For example, about 2700 folds for 23,100 families are obtained, among them about 33 superfolds, including more than 100 families each, and the largest superfold comprises about 800 families. Our results suggest that although the majority of folds have only a single family per fold, a considerably larger number of folds include many more families each than in the database, and the distribution of folds over families in nature differs markedly from the sampled distribution. The long tail of fold distribution is first estimated in this article. The results fit the data for different versions of the structural classification of proteins (SCOP) excellently, and the goodness-of-fit tests strongly support the results. In addition, the method of directly "enlarging" the sample to the population may be useful in inferring distributions of species in different fields.},
-	Affiliation = {National Lab of Solid State Microstructure, Department of Physics and Institute of Biophysics, Nanjing University, Nanjing, China.},
-	Aid = {10.1002/prot.10514 {$[$}doi{$]$}},
-	Annote = {Fold Space
-N = 2714 to 8000
-estimate # of folds based on the number of families, they find that N depends quadratically on M number of families
-use SCOP database
-assume random sampling from folds in nature; sampling is actually quite biased...
-
-has a nice review of the literature},
-	Au = {Wang W},
-	Author = {Liu, Xinsheng and Fan, Ke and Wang, Wei},
-	Ci = {Copyright 2003 Wiley-Liss, Inc.},
-	Da = {20040128},
-	Date-Modified = {2007-04-25 09:46:50 -0700},
-	Dcom = {20040226},
-	Edat = {2004/01/30 05:00},
-	Group = {Cited; Fold Space; Reviewed},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Jt = {Proteins.},
-	Keywords = {Databases, Protein and Nature and Probability and Protein Conformation and *Protein Folding and Proteins/*chemistry/*classification and Research Support, U.S. Gov't, Non-P.H.S.},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Liu/2004a.pdf},
-	Lr = {20041117},
-	Mhda = {2004/02/27 05:00},
-	Number = {3},
-	Own = {NLM},
-	Pages = {491-9},
-	Pl = {United States},
-	Pmid = {14747997},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {The number of protein folds and their distribution over families in nature},
-	Volume = {54},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbKYJMjAwNGEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKtcA0YOxQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA0xpdQAAEAAIAADBcY1JAAAAEQAIAADANNFsAAAAAQAYAEZspgBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6TGl1OjIwMDRhLnBkZgAADgAUAAkAMgAwADAANABhAC4AcABkAGYADwAIAAMAaABzAHIAEgA7VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0xpdS8yMDA0YS5wZGYAABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAiLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTGl1LzIwMDRhLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCRwJMAlUCYAJkAnICeQKCAqcCrAKvAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAArw=}}
-
-@article{Wang:1998zp,
-	Abstract = {The issue of the number of protein folds is steeped in controversy despite its significance for understanding evolution and predicting protein structure from amino acid sequence. Using various assumptions, several research groups have tackled this problem with very different results. In the present study, a more rigorous statistical approach is used to address this question. From three different data sets, the total number of protein folds is estimated to be about 650. A detailed theoretical analysis suggests that (i) a random sample of non-transmembrane protein families has been selected for crystallization and structural determination, (ii) except for about 40 folds, most protein folds occurring in nature contain about the same number of different protein families. With the estimation of the total number of protein folds, the number of naturally occurring superfamilies can then be estimated as 1150.},
-	Affiliation = {National Laboratory of Biomacromolecules, Institute of Biophysics, Academia Sinica, Beijing, Peoples Republic of China.},
-	Annote = {N = 650
-
-we already "know" more folds than this...
-usual thing; fitting function, uses SCOP},
-	Au = {Wang ZX},
-	Author = {Wang, Z X},
-	Da = {19981130},
-	Date-Modified = {2007-04-27 11:55:10 -0700},
-	Dcom = {19981130},
-	Edat = {1998/09/28},
-	Group = {Cited; Fold Space; Reviewed},
-	Jid = {8801484},
-	Journal = {Protein Eng},
-	Jt = {Protein engineering.},
-	Keywords = {Likelihood Functions and *Models, Molecular and Models, Statistical and *Models, Theoretical and Poisson Distribution and *Protein Folding and Proteins/*chemistry/classification and Research Support, Non-U.S. Gov't},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Wang/1998.pdf},
-	Lr = {20041117},
-	Mhda = {1998/09/28 00:01},
-	Number = {8},
-	Own = {NLM},
-	Pages = {621-6},
-	Pl = {ENGLAND},
-	Pmid = {9749914},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {A re-estimation for the total numbers of protein folds and superfamilies},
-	Volume = {11},
-	Year = {1998},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbUAIMTk5OC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKSsA0X/NQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFdhbmcAEAAIAADBcY1JAAAAEQAIAADANNBzAAAAAQAYAEZtQABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6V2FuZzoxOTk4LnBkZgAADgASAAgAMQA5ADkAOAAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9XYW5nLzE5OTgucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1dhbmcvMTk5OC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Fauchere:1983ys,
-	Author = {Fauchere, J-L and Pliska, VE},
-	Date-Added = {2005-11-16 11:46:15 -0800},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Journal = {European Journal of Medicinal Chemistry},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Fauchere/1983.pdf},
-	Number = {4},
-	Pages = {369-375},
-	Title = {Hydrophobic parameters-pi of amino-acid side-chains from the partitioning of n-acetyl-amino-acid amides},
-	Volume = {18},
-	Year = {1983},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbDoIMTk4My5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLLL+iaXpQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEZhdWNoZXJlABAACAAAwXGNSQAAABEACAAAv6LZ+gAAAAEAGABGbDoARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkZhdWNoZXJlOjE5ODMucGRmAAAOABIACAAxADkAOAAzAC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0ZhdWNoZXJlLzE5ODMucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0ZhdWNoZXJlLzE5ODMucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Nozaki:1971mh,
-	Abstract = {The solubilities of amino acids, diglycine, and triglycine have been measured in water and aqueous ethanol as well as dioxane solutions. Free energies of transfer of amino acid side chains and backbone peptide units from water to ethanol and dioxane solutions have been calculated from these data. The results show the similarity between the effects of ethanol and dioxane on the stability of those side chains and peptide units. In particular, the free energies of transfer of hydrophobic side chains to 100% ethanol and dioxane are essentially identical, and have been used to establish a hydrophobicity scale for hydrophobic side chains.
-},
-	Author = {Nozaki, Yasuhiko and Tanford, Charles},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Eprint = {http://www.jbc.org/cgi/reprint/246/7/2211.pdf},
-	Journal = {J. Biol. Chem.},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Nozaki/1971.pdf},
-	Number = {7},
-	Pages = {2211-2217},
-	Title = {The Solubility of Amino Acids and Two Glycine Peptides in Aqueous Ethanol and Dioxane Solutions: Establishment of a Hydrophobicity Scale},
-	Url = {http://www.jbc.org/cgi/content/abstract/246/7/2211},
-	Volume = {246},
-	Year = {1971},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbMwIMTk3MS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLGL+guXoAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABk5vemFraQAQAAgAAMFxjUkAAAARAAgAAL+hKfoAAAABABgARmzMAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpOb3pha2k6MTk3MS5wZGYAAA4AEgAIADEAOQA3ADEALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTm96YWtpLzE5NzEucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL05vemFraS8xOTcxLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=},
-	Bdsk-Url-1 = {http://www.jbc.org/cgi/content/abstract/246/7/2211}}
-
-@article{Thomas:1996ey,
-	Author = {Thomas, Paul D. and Dill, Ken A.},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Ep = {469},
-	Journal = {Journal of Molecular Biology},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Thomas/1996a.pdf},
-	Number = {2},
-	Pages = {457--469},
-	Sp = {457},
-	Title = {Statistical Potentials Extracted From Protein Structures: How Accurate Are They?},
-	Ty = {JOUR},
-	Url = {http://www.sciencedirect.com/science/article/B6WK7-45PV59P-23/2/7e0034112cdd814dca9db14eb2b3b6d8},
-	Volume = {257},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbS4JMTk5NmEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLCL+fZxEAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABlRob21hcwAQAAgAAMFxjUkAAAARAAgAAL+f15EAAAABABgARm0uAEZrzwBGatUARmobAEZkaABAiUMAAgBCaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpUaG9tYXM6MTk5NmEucGRmAA4AFAAJADEAOQA5ADYAYQAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9UaG9tYXMvMTk5NmEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvVGhvbWFzLzE5OTZhLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=},
-	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6WK7-45PV59P-23/2/7e0034112cdd814dca9db14eb2b3b6d8}}
-
-@article{Karlin:1990vf,
-	Author = {Karlin, Samuel and Dembo, Amir and Kawabata, Tsutomu},
-	Date-Added = {2005-11-07 08:56:44 -0800},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Journal = {The Annals of Statistics},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Karlin/1990a.pdf},
-	Month = {June},
-	Number = {2},
-	Pages = {571-581},
-	Title = {Statistical Composition of High-Scoring Segments from Molecular Sequences},
-	Volume = {18},
-	Year = {1990},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbHwJMTk5MGEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKCL+SiHQAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkthcmxpbgAQAAgAAMFxjUkAAAARAAgAAL+S+PQAAAABABgARmx8AEZrzwBGatUARmobAEZkaABAiUMAAgBCaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpLYXJsaW46MTk5MGEucGRmAA4AFAAJADEAOQA5ADAAYQAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9LYXJsaW4vMTk5MGEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvS2FybGluLzE5OTBhLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Thomas:1996xz,
-	Abstract = {We present a method (ENERGI) for extracting energy-like quantities from a data base of protein structures. In this paper, we use the method to generate pairwise additive amino acid "energy" scores. These scores are obtained by iteration until they correctly discriminate a set of known protein folds from decoy conformations. The method succeeds in lattice model tests and in the gapless threading problem as defined by Maiorov and Crippen [Maiorov, V. N. & Crippen, G. M. (1992) J. Mol. Biol. 227, 876-888]. A more challenging test of threading a larger set of test proteins derived from the representative set of Hobohm and Sander [Hobohm, U. & Sander, C. (1994) Protein Sci. 3, 522-524] is used as a "workbench" for exploring how the ENERGI scores depend on their parameter sets.},
-	Affiliation = {Graduate Group in Biophysics, University of California, San Francisco 94143-0448, USA.},
-	Annote = {Dill},
-	Au = {Dill KA},
-	Author = {Thomas, P D and Dill, K A},
-	Da = {19961204},
-	Date-Modified = {2008-05-29 12:17:34 -0700},
-	Dcom = {19961204},
-	Edat = {1996/10/15},
-	Group = {Alphabets; Reviewed; ROC; Forward; Printed; Backward},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci USA},
-	Keywords = {Amino Acid Sequence and Amino Acids and Binding Sites and Biophysics/methods and Models, Chemical and *Protein Conformation and *Protein Folding and Proteins/*chemistry and Research Support, Non-U.S. Gov't and Research Support, U.S. Gov't, P.H.S.},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Thomas/1996.pdf},
-	Lr = {20041117},
-	Mhda = {1996/10/15 00:01},
-	Number = {21},
-	Own = {NLM},
-	Pages = {11628--11633},
-	Pl = {UNITED STATES},
-	Pmid = {8876187},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Read = {Yes},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {An iterative method for extracting energy-like quantities from protein structures},
-	Volume = {93},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbS4IMTk5Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLB8EYaDFQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABlRob21hcwAQAAgAAMFxjUkAAAARAAgAAMEYyqEAAAABABgARm0uAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpUaG9tYXM6MTk5Ni5wZGYAAA4AEgAIADEAOQA5ADYALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvVGhvbWFzLzE5OTYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL1Rob21hcy8xOTk2LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Anfinsen:1973gu,
-	Au = {Anfinsen CB},
-	Author = {Anfinsen, C B},
-	Da = {19730921},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19730921},
-	Edat = {1973/07/20},
-	Jid = {0404511},
-	Journal = {Science},
-	Keywords = {Amino Acid Sequence and Animals and Antigen-Antibody Reactions and Cattle and Epitopes and Exonucleases and Goats/immunology and Models, Structural and Pancreas/enzymology and Peptides and Proinsulin and *Protein Conformation and Protein Denaturation and *Ribonucleases and Spectrometry, Fluorescence and Staphylococcus/enzymology and Swine and Viscosity},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Anfinsen/1973.pdf},
-	Lr = {20031114},
-	Mhda = {1973/07/20 00:01},
-	Number = {96},
-	Own = {NLM},
-	Pages = {223-30},
-	Pl = {UNITED STATES},
-	Pmid = {4124164},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {EC 3.1.- (Ribonucleases)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Principles that govern the folding of protein chains},
-	Volume = {181},
-	Year = {1973},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa+IIMTk3My5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLx7+eUisAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEFuZmluc2VuABAACAAAwXGNSQAAABEACAAAv57CqwAAAAEAGABGa+IARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkFuZmluc2VuOjE5NzMucGRmAAAOABIACAAxADkANwAzAC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0FuZmluc2VuLzE5NzMucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0FuZmluc2VuLzE5NzMucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{li:765,
-	Author = {Hao Li and Chao Tang and Ned S. Wingreen},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Journal = {Physical Review Letters},
-	Keywords = {proteins; matrix algebra; molecular orientation; structure functions; eigenvalues and eigenfunctions; water},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Li/1997.pdf},
-	Number = {4},
-	Pages = {765-768},
-	Publisher = {APS},
-	Title = {Nature of Driving Force for Protein Folding: A Result From Analyzing the Statistical Potential},
-	Url = {http://link.aps.org/abstract/PRL/v79/p765},
-	Volume = {79},
-	Year = {1997},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGCAAAAAAGCAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbKAIMTk5Ny5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJ0L+eUM9QREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAAkxpABAACAAAwXGNSQAAABEACAAAv57BTwAAAAEAGABGbKAARmvPAEZq1QBGahsARmRoAECJQwACAD1oc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkxpOjE5OTcucGRmAAAOABIACAAxADkAOQA3AC4AcABkAGYADwAIAAMAaABzAHIAEgA5VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0xpLzE5OTcucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIC4uLy4uLy4uLy4uL0FydGljbGVzL0xpLzE5OTcucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQI/AkQCTQJYAlwCagJxAnoCnQKiAqUAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACsg==},
-	Bdsk-Url-1 = {http://link.aps.org/abstract/PRL/v79/p765}}
-
-@article{Socolich:2005sb,
-	Annote = {10.1038/nature03991},
-	Author = {Socolich, Michael and Lockless, Steve W. and Russ, William P. and Lee, Heather and Gardner, Kevin H. and Ranganathan, Rama},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Ep = {518},
-	Journal = {Nature},
-	L3 = {http://www.nature.com/nature/journal/v437/n7058/suppinfo/nature03991{\_}S1.html},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Socolich/2005.pdf},
-	M3 = {10.1038/nature03991},
-	Number = {7058},
-	Pages = {512--518},
-	Sn = {0028-0836},
-	Sp = {512},
-	Title = {Evolutionary information for specifying a protein fold},
-	Ty = {JOUR},
-	Url = {http://dx.doi.org/10.1038/nature03991},
-	Volume = {437},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbRkIMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKwb9d9PgAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACFNvY29saWNoABAACAAAwXGNSQAAABEACAAAv15XaAAAAAEAGABGbRkARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlNvY29saWNoOjIwMDUucGRmAAAOABIACAAyADAAMAA1AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1NvY29saWNoLzIwMDUucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL1NvY29saWNoLzIwMDUucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1038/nature03991}}
-
-@article{Dembo:1991az,
-	Author = {Dembo, Amir and Karlin, Samuel},
-	Date-Added = {2005-11-07 09:00:00 -0800},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Journal = {The Annals of Probability},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Dembo/1991.pdf},
-	Month = {October},
-	Number = {4},
-	Pages = {1756-1767},
-	Title = {Strong Limit Theorems of Empirical Distributions for Large Segmental Exceedances of Partial Sums of Markov Variables},
-	Volume = {19},
-	Year = {1991},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbCQIMTk5MS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKzb+SiVwAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABURlbWJvAAAQAAgAAMFxjUkAAAARAAgAAL+S+dwAAAABABgARmwkAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpEZW1ibzoxOTkxLnBkZgAOABIACAAxADkAOQAxAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0RlbWJvLzE5OTEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRGVtYm8vMTk5MS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Kamtekar:1993qm,
-	Abstract = {A general strategy is described for the de novo design of proteins. In this strategy the sequence locations of hydrophobic and hydrophilic residues were specified explicitly, but the precise identities of the side chains were not constrained and varied extensively. This strategy was tested by constructing a large collection of synthetic genes whose protein products were designed to fold into four-helix bundle proteins. Each gene encoded a different amino acid sequence, but all sequences shared the same pattern of polar and nonpolar residues. Characterization of the expressed proteins indicated that most of the designed sequences folded into compact alpha-helical structures. Thus, a simple binary code of polar and nonpolar residues arranged in the appropriate order can drive polypeptide chains to collapse into globular alpha-helical folds.},
-	Affiliation = {Department of Chemistry, Princeton University, NJ 08544.},
-	Au = {Hecht MH},
-	Author = {Kamtekar, S and Schiffer, J M and Xiong, H and Babik, J M and Hecht, M H},
-	Cin = {Science. 1994 Sep 9;265(5178):1511. PMID: 8079161},
-	Da = {19940119},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19940119},
-	Edat = {1993/12/10},
-	Jid = {0404511},
-	Journal = {Science},
-	Keywords = {Amino Acid Sequence and Base Sequence and Codon and Gene Library and Genes, Synthetic and Molecular Sequence Data and Molecular Weight and Oligodeoxyribonucleotides and *Protein Conformation and *Protein Engineering and Protein Folding and Protein Structure, Secondary and Proteins/*chemistry/genetics/isolation \& purification and Research Support, Non-U.S. Gov't},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Kamtekar/1993.pdf},
-	Lr = {20041117},
-	Mhda = {1993/12/10 00:01},
-	Number = {5140},
-	Own = {NLM},
-	Pages = {1680-5},
-	Pl = {UNITED STATES},
-	Pmid = {8259512},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Protein design by binary patterning of polar and nonpolar amino acids},
-	Volume = {262},
-	Year = {1993},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbHkIMTk5My5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKW7+L1tkAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEthbXRla2FyABAACAAAwXGNSQAAABEACAAAv4xHWQAAAAEAGABGbHkARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkthbXRla2FyOjE5OTMucGRmAAAOABIACAAxADkAOQAzAC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0thbXRla2FyLzE5OTMucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0thbXRla2FyLzE5OTMucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Li:1998mu,
-	Abstract = {Protein structures are a very special class among all possible structures. It has been suggested that a "designability principle" plays a crucial role in nature's selection of protein sequences and structures. Here, we provide a theoretical base for such a selection principle, using a simple model of protein folding based on hydrophobic interactions. A structure is reduced to a string of 0s and 1s, which represent the surface and core sites, respectively, as the backbone is traced. Each structure is therefore associated with one point in a high dimensional space. Sequences are represented by strings of their hydrophobicities and thus can be mapped into the same space. A sequence that lies closer to a particular structure in this space than to any other structures will have that structure as its ground state. Atypical structures, namely those far away from other structures in the high dimensional space, have more sequences that fold into them and are thermodynamically more stable. We argue that the most common folds of proteins are the most atypical in the space of possible structures.},
-	Affiliation = {NEC Research Institute, 4 Independence Way, Princeton, NJ 08540, USA.},
-	Au = {Wingreen NS},
-	Author = {Li, H and Tang, C and Wingreen, N S},
-	Da = {19980604},
-	Date-Modified = {2008-03-28 11:17:16 -0700},
-	Dcom = {19980604},
-	Edat = {1998/06/06},
-	Group = {Designability},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci USA},
-	Keywords = {Models, Biological and *Protein Folding and Proteins/*chemistry and Solvents and Structure-Activity Relationship and Surface Properties},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Li/1998.pdf},
-	Lr = {20001218},
-	Mhda = {1998/06/06 00:01},
-	Number = {9},
-	Own = {NLM},
-	Pages = {4987-90},
-	Pl = {UNITED STATES},
-	Pmid = {9560215},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Solvents)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Are protein folds atypical?},
-	Volume = {95},
-	Year = {1998},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGCAAAAAAGCAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbKAIMTk5OC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJ0b+L4+wAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAAkxpABAACAAAwXGNSQAAABEACAAAv4xUbAAAAAEAGABGbKAARmvPAEZq1QBGahsARmRoAECJQwACAD1oc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkxpOjE5OTgucGRmAAAOABIACAAxADkAOQA4AC4AcABkAGYADwAIAAMAaABzAHIAEgA5VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0xpLzE5OTgucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIC4uLy4uLy4uLy4uL0FydGljbGVzL0xpLzE5OTgucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQI/AkQCTQJYAlwCagJxAnoCnQKiAqUAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACsg==}}
-
-@article{Hecht:2004xl,
-	Abstract = {Combinatorial libraries of de novo amino acid sequences can provide a rich source of diversity for the discovery of novel proteins with interesting and important activities. Randomly generated sequences, however, rarely fold into well-ordered proteinlike structures. To enhance the quality of a library, features of rational design must be used to focus sequence diversity into those regions of sequence space that are most likely to yield folded structures. This review describes how focused libraries can be constructed by designing the binary pattern of polar and nonpolar amino acids to favor proteins that contain abundant secondary structure, while simultaneously burying hydrophobic side chains and exposing hydrophilic side chains to solvent. The "binary code" for protein design was used to construct several libraries of de novo proteins, including both alpha-helical and beta-sheet structures. The recently determined solution structure of a binary patterned four-helix bundle is well ordered, thereby demonstrating that sequences that have neither been selected by evolution (in vivo or in vitro) nor designed by computer can form nativelike proteins. Examples are presented demonstrating how binary patterned libraries have successfully produced well-ordered structures, cofactor binding, catalytic activity, self-assembled monolayers, amyloid-like nanofibrils, and protein-based biomaterials.},
-	Affiliation = {Department of Chemistry, Princeton University, Princeton, NJ 08544, USA. hecht@princeton.edu},
-	Aid = {13/7/1711 {$[$}pii{$]$}},
-	Au = {Wei Y},
-	Author = {Hecht, M H and Das, A and Go, A and Bradley, L H and Wei, Y},
-	Da = {20040624},
-	Date-Modified = {2008-05-29 12:00:22 -0700},
-	Dcom = {20050125},
-	Edat = {2004/06/25 05:00},
-	Gr = {R01 GM062869/GM/NIGMS},
-	Jid = {9211750},
-	Journal = {Protein Sci},
-	Keywords = {*Models, Chemical and *Protein Engineering and *Protein Folding and *Protein Structure, Tertiary and Proteins/*chemistry and Research Support, U.S. Gov't, P.H.S.},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Hecht/2004.pdf},
-	Mhda = {2005/01/26 09:00},
-	Number = {7},
-	Own = {NLM},
-	Pages = {1711--1723},
-	Pl = {United States},
-	Pmid = {15215517},
-	Pst = {ppublish},
-	Pt = {Review, Tutorial},
-	Pubm = {Print},
-	Rf = {73},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {De novo proteins from designed combinatorial libraries},
-	Volume = {13},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbF4IMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLMr+L6TdQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABUhlY2h0AAAQAAgAAMFxjUkAAAARAAgAAL+MWbcAAAABABgARmxeAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpIZWNodDoyMDA0LnBkZgAOABIACAAyADAAMAA0AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0hlY2h0LzIwMDQucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvSGVjaHQvMjAwNC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Dembo:1994ms,
-	Author = {Dembo, Amir and Karlin, Samuel and Zeitouni, Ofer},
-	Date-Added = {2005-11-07 09:02:49 -0800},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Journal = {The Annals of Probability},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Dembo/1994.pdf},
-	Month = {October},
-	Number = {4},
-	Pages = {2022-2039},
-	Title = {Limit Distribution of Maximal Non-Aligned Two-Sequence Segmental Score},
-	Volume = {22},
-	Year = {1994},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbCQIMTk5NC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKz7+SizQAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABURlbWJvAAAQAAgAAMFxjUkAAAARAAgAAL+S+7QAAAABABgARmwkAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpEZW1ibzoxOTk0LnBkZgAOABIACAAxADkAOQA0AC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0RlbWJvLzE5OTQucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRGVtYm8vMTk5NC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{zhou:148103,
-	Author = {Fei Zhou and Gevorg Grigoryan and Steve R. Lustig and Amy E. Keating and Gerbrand Ceder and Dane Morgan},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Eid = {148103},
-	Journal = {Physical Review Letters},
-	Keywords = {proteins; molecular biophysics},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Zhou/2005.pdf},
-	Number = {14},
-	Numpages = {4},
-	Pages = {148103},
-	Publisher = {APS},
-	Title = {Coarse-Graining Protein Energetics in Sequence Variables},
-	Url = {http://link.aps.org/abstract/PRL/v95/e148103},
-	Volume = {95},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbVwIMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKfb9jWSNQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFpob3UAEAAIAADBcY1JAAAAEQAIAAC/Y7uTAAAAAQAYAEZtXABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6WmhvdToyMDA1LnBkZgAADgASAAgAMgAwADAANQAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9aaG91LzIwMDUucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1pob3UvMjAwNS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6},
-	Bdsk-Url-1 = {http://link.aps.org/abstract/PRL/v95/e148103}}
-
-@url{Eddy:nw,
-	Author = {Eddy, S R},
-	Date-Added = {2005-10-28 13:28:15 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Title = {HMMER: sequence analysis using profile hidden Markov models},
-	Url = {http://hmmer.wustl.edu/},
-	Bdsk-Url-1 = {http://hmmer.wustl.edu/}}
-
-@article{Altschul:1997kn,
-	Abstract = {The BLAST programs are widely used tools for searching protein and DNA databases for sequence similarities. For protein comparisons, a variety of definitional, algorithmic and statistical refinements described here permits the execution time of the BLAST programs to be decreased substantially while enhancing their sensitivity to weak similarities. A new criterion for triggering the extension of word hits, combined with a new heuristic for generating gapped alignments, yields a gapped BLAST program that runs at approximately three times the speed of the original. In addition, a method is introduced for automatically combining statistically significant alignments produced by BLAST into a position-specific score matrix, and searching the database using this matrix. The resulting Position-Specific Iterated BLAST (PSI-BLAST) program runs at approximately the same speed per iteration as gapped BLAST, but in many cases is much more sensitive to weak but biologically relevant sequence similarities. PSI-BLAST is used to uncover several new and interesting members of the BRCT superfamily.},
-	Affiliation = {National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA. altschul@ncbi.nlm.nih.gov},
-	Aid = {gka562 {$[$}pii{$]$}},
-	Au = {Lipman DJ},
-	Author = {Altschul, S F and Madden, T L and Schaffer, A A and Zhang, J and Zhang, Z and Miller, W and Lipman, D J},
-	Da = {19971002},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19971002},
-	Edat = {1997/09/01},
-	Gr = {LM05110/LM/NLM},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Keywords = {Algorithms and Amino Acid Sequence and Animals and DNA/*chemistry and *Databases, Factual and Humans and Molecular Sequence Data and Proteins/*chemistry and Research Support, U.S. Gov't, P.H.S. and *Sequence Alignment and *Software},
-	Language = {eng},
-	Lr = {20041117},
-	Mhda = {1997/09/01 00:01},
-	Number = {17},
-	Own = {NLM},
-	Pages = {3389-402},
-	Pl = {ENGLAND},
-	Pmid = {9254694},
-	Pst = {ppublish},
-	Pt = {Review, Tutorial},
-	Pubm = {Print},
-	Rf = {90},
-	Rn = {9007-49-2 (DNA)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Gapped BLAST and PSI-BLAST: a new generation of protein database search programs},
-	Volume = {25},
-	Year = {1997}}
-
-@article{Pearson:1990as,
-	Abstract = {The FASTA program can search the NBRF protein sequence library (2.5 million residues) in less than 20 min on an IBM-PC microcomputer and unambiguously detect proteins that shared a common ancestor billions of years in the past. FASTA is both fast and selective because it initially considers only amino acid identities. Its sensitivity is increased not only by using the PAM250 matrix to score and rescore regions with large numbers of identities but also by joining initial regions. The results of searches with FASTA compare favorably with results using NWS-based programs that are 100 times slower. FASTA is slightly less sensitive but considerably more selective. It is not clear that NWS-based programs would be more successful in finding distantly related members of the G-protein-coupled receptor family. The joining step by FASTA to calculate the initn score is especially useful for sequences that share regions of sequence similarity that are separated by variable-length loops. FASTP and FASTA were designed to identify protein sequences that have descended from a common ancestor, and they have proved very useful for this task. In many cases, a FASTA sequence search will result in a list of high scoring library sequences that are homologous to the query sequence, or the search will result in a list of sequences with similarity scores that cannot be distinguished from the bulk of the library. In either case, the question of whether there are sequences in the library that are clearly related to the query sequence has been answered unambiguously. Unfortunately, the results often will not be so clear-cut, and careful analysis of similarity scores, statistical significance, the actual aligned residues, and the biological context are required. In the course of analyzing the G-protein-coupled receptor family, several proteins were found that, because of a high initn score and a low init1 score that increased almost 2-fold with optimization, appeared to be members of this family which were not previously recognized. RDF2 analysis showed borderline z values, and only a careful examination of the sequence alignments that focused on the conserved residues provided convincing evidence that the high scores were fortuitous. As sequence comparison methods become more powerful by becoming more sensitive, they become more likely to mislead, and even greater care is required.},
-	Au = {Pearson WR},
-	Author = {Pearson, W R},
-	Da = {19900426},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19900426},
-	Edat = {1990/01/01},
-	Jid = {0212271},
-	Journal = {Methods Enzymol},
-	Keywords = {Algorithms and *Amino Acid Sequence and Animals and *Base Sequence and Comparative Study and DNA/*genetics and Eye Proteins/genetics and *Gene Library and *Information Systems and Molecular Sequence Data and Opsin and Proteins/*genetics and Receptors, Adrenergic, beta/genetics and *Sequence Homology, Nucleic Acid and Software},
-	Language = {eng},
-	Lr = {20031114},
-	Mhda = {1990/01/01 00:01},
-	Number = {0076-6879},
-	Own = {NLM},
-	Pages = {63-98},
-	Pl = {UNITED STATES},
-	Pmid = {2156132},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {9007-49-2 (DNA)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Rapid and sensitive sequence comparison with FASTP and FASTA},
-	Volume = {183},
-	Year = {1990}}
-
-@article{Humphrey:1996ve,
-	Abstract = {VMD is a molecular graphics program designed for the display and analysis of molecular assemblies, in particular biopolymers such as proteins and nucleic acids. VMD can simultaneously display any number of structures using a wide variety of rendering styles and coloring methods. Molecules are displayed as one or more "representations," in which each representation embodies a particular rendering method and coloring scheme for a selected subset of atoms. The atoms displayed in each representation are chosen using an extensive atom selection syntax, which includes Boolean operators and regular expressions. VMD provides a complete graphical user interface for program control, as well as a text interface using the Tcl embeddable parser to allow for complex scripts with variable substitution, control loops, and function calls. Full session logging is supported, which produces a VMD command script for later playback. High-resolution raster images of displayed molecules may be produced by generating input scripts for use by a number of photorealistic image-rendering applications. VMD has also been expressly designed with the ability to animate molecular dynamics (MD) simulation trajectories, imported either from files or from a direct connection to a running MD simulation. VMD is the visualization component of MDScope, a set of tools for interactive problem solving in structural biology, which also includes the parallel MD program NAMD, and the MDCOMM software used to connect the visualization and simulation programs. VMD is written in C++, using an object-oriented design; the program, including source code and extensive documentation, is freely available via anonymous ftp and through the World Wide Web.},
-	Affiliation = {Theoretical Biophysics Group, University of Illinois, Urbana 61801, USA.},
-	Aid = {0263785596000185 {$[$}pii{$]$}},
-	Au = {Schulten K},
-	Author = {Humphrey, W and Dalke, A and Schulten, K},
-	Da = {19961204},
-	Date-Modified = {2008-05-29 12:37:40 -0700},
-	Dcom = {19961204},
-	Edat = {1996/02/01},
-	Gr = {5 P41 RR05969-04/RR/NCRR},
-	Jid = {9014762},
-	Journal = {J Mol Graph},
-	Keywords = {*Computer Graphics and *Computer Simulation and Computers and *Models, Molecular and Nucleic Acids/chemistry and Proteins/chemistry and Research Support, Non-U.S. Gov't and Research Support, U.S. Gov't, Non-P.H.S. and Research Support, U.S. Gov't, P.H.S. and User-Computer Interface},
-	Language = {eng},
-	Lr = {20041117},
-	Mhda = {1996/02/01 00:01},
-	Number = {1},
-	Own = {NLM},
-	Pages = {27--28, 33--8},
-	Pl = {UNITED STATES},
-	Pmid = {8744570},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {{VMD: Visual Molecular Dynamics}},
-	Volume = {14},
-	Year = {1996}}
-
-@article{Brenner:2000km,
-	Abstract = {The ASTRAL compendium provides several databases and tools to aid in the analysis of protein structures, particularly through the use of their sequences. The SPACI scores included in the system summarize the overall characteristics of a protein structure. A structural alignments database indicates residue equivalencies in superimposed protein domain structures. The PDB sequence-map files provide a linkage between the amino acid sequence of the molecule studied (SEQRES records in a database entry) and the sequence of the atoms experimentally observed in the structure (ATOM records). These maps are combined with information in the SCOPdatabase to provide sequences of protein domains. Selected subsets of the domain database, with varying degrees of similarity measured in several different ways, are also available. ASTRALmay be accessed at http://astral.stanford.edu/},
-	Affiliation = {Department of Structural Biology, Stanford University, Fairchild Building D-109, Stanford, CA 94305-5126, USA. brenner@compbio.berkeley.edu},
-	Aid = {gkd052 {$[$}pii{$]$}},
-	Au = {Levitt M},
-	Author = {Brenner, S E and Koehl, P and Levitt, M},
-	Da = {20000225},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20000225},
-	Edat = {1999/12/11 09:00},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Keywords = {Amino Acid Sequence and *Database Management Systems and *Databases, Factual and *Protein Conformation and Research Support, U.S. Gov't, Non-P.H.S. and Sequence Homology, Amino Acid},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Brenner/2000.pdf},
-	Lr = {20041117},
-	Mhda = {2000/03/04 09:00},
-	Number = {1},
-	Own = {NLM},
-	Pages = {254-6},
-	Pl = {ENGLAND},
-	Pmid = {10592239},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {The ASTRAL compendium for protein structure and sequence analysis},
-	Volume = {28},
-	Year = {2000},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbAAIMjAwMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnK+L+HsrRQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0JyZW5uZXIAABAACAAAwXGNSQAAABEACAAAv4gVJAAAAAEAGABGbAAARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkJyZW5uZXI6MjAwMC5wZGYADgASAAgAMgAwADAAMAAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9CcmVubmVyLzIwMDAucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQnJlbm5lci8yMDAwLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Chandonia:2002gu,
-	Abstract = {The ASTRAL compendium provides several databases and tools to aid in the analysis of protein structures, particularly through the use of their sequences. It is partially derived from the SCOP database of protein domains, and it includes sequences for each domain as well as other resources useful for studying these sequences and domain structures. Several major improvements have been made to the ASTRAL compendium since its initial release 2 years ago. The number of protein domain sequences included has doubled from 15 190 to 30 867, and additional databases have been added. The Rapid Access Format (RAF) database contains manually curated mappings linking the biological amino acid sequences described in the SEQRES records of PDB entries to the amino acid sequences structurally observed (provided in the ATOM records) in a format designed for rapid access by automated tools. This information is used to derive sequences for protein domains in the SCOP database. In cases where a SCOP domain spans several protein chains, all of which can be traced back to a single genetic source, a 'genetic domain' sequence is created by concatenating the sequences of each chain in the order found in the original gene sequence. Both the original-style library of SCOP sequences and a new library including genetic domain sequences are available. Selected representative subsets of each of these libraries, based on multiple criteria and degrees of similarity, are also included. ASTRAL may be accessed at http://astral.stanford.edu/.},
-	Affiliation = {Berkeley Structural Genomics Center, Ernest Orlando Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA.},
-	Au = {Brenner SE},
-	Author = {Chandonia, John-Marc and Walker, Nigel S and Lo Conte, Loredana and Koehl, Patrice and Levitt, Michael and Brenner, Steven E},
-	Da = {20011225},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20020121},
-	Edat = {2001/12/26 10:00},
-	Gr = {GM1455/GM/NIGMS},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Keywords = {Amino Acid Sequence and Animals and Computer Graphics and Database Management Systems and *Databases, Protein and Information Storage and Retrieval and Internet and Models, Molecular and Molecular Sequence Data and Protein Folding and Protein Structure, Tertiary and Proteins/*chemistry/genetics and Research Support, Non-U.S. Gov't and Research Support, U.S. Gov't, Non-P.H.S. and Research Support, U.S. Gov't, P.H.S. and Sequence Alignment and Sequence Homology, Nucleic Acid},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Chandonia/2002.pdf},
-	Lr = {20041117},
-	Mhda = {2002/01/22 10:01},
-	Number = {1},
-	Own = {NLM},
-	Pages = {260-3},
-	Pl = {England},
-	Pmid = {11752310},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {ASTRAL compendium enhancements},
-	Volume = {30},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbAsIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJ+7hyT3AAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACUNoYW5kb25pYQAAEAAIAADBcY1JAAAAEQAIAAC4cr/wAAAAAQAYAEZsCwBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6Q2hhbmRvbmlhOjIwMDIucGRmAA4AEgAIADIAMAAwADIALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvQ2hhbmRvbmlhLzIwMDIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ2hhbmRvbmlhLzIwMDIucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@book{Gumbel:1958xo,
-	Address = {New York City, NY},
-	Author = {E J Gumbel},
-	Date-Added = {2005-10-25 11:58:31 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Publisher = {Columbia University Press},
-	Title = {Statistics of Extremes},
-	Year = {1958}}
-
-@article{Henikoff:1992xd,
-	Abstract = {Methods for alignment of protein sequences typically measure similarity by using a substitution matrix with scores for all possible exchanges of one amino acid with another. The most widely used matrices are based on the Dayhoff model of evolutionary rates. Using a different approach, we have derived substitution matrices from about 2000 blocks of aligned sequence segments characterizing more than 500 groups of related proteins. This led to marked improvements in alignments and in searches using queries from each of the groups.},
-	Affiliation = {Howard Hughes Medical Institute, Fred Hutchinson Cancer Research Center, Seattle, WA 98104.},
-	Au = {Henikoff JG},
-	Author = {Henikoff, S and Henikoff, J G},
-	Da = {19921223},
-	Date-Modified = {2008-05-28 22:28:26 -0700},
-	Dcom = {19921223},
-	Edat = {1992/11/15},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci USA},
-	Keywords = {Algorithms and *Amino Acid Sequence and Animals and Caenorhabditis elegans/genetics and Comparative Study and Drosophila/genetics and Lod Score and Mathematics and Molecular Sequence Data and Probability and Proteins/chemistry/*genetics and Research Support, U.S. Gov't, P.H.S. and *Sequence Homology, Amino Acid and *Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Henikoff/1992.pdf},
-	Lr = {20041117},
-	Mhda = {1992/11/15 00:01},
-	Number = {22},
-	Own = {NLM},
-	Pages = {10915--10919},
-	Pl = {UNITED STATES},
-	Pmid = {1438297},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Amino acid substitution matrices from protein blocks},
-	Volume = {89},
-	Year = {1992},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbGQIMTk5Mi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKMb9ODRVQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEhlbmlrb2ZmABAACAAAwXGNSQAAABEACAAAv05vhQAAAAEAGABGbGQARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkhlbmlrb2ZmOjE5OTIucGRmAAAOABIACAAxADkAOQAyAC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0hlbmlrb2ZmLzE5OTIucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0hlbmlrb2ZmLzE5OTIucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Smith:1981vl,
-	Au = {Waterman MS},
-	Author = {Smith, T F and Waterman, M S},
-	Da = {19811029},
-	Date-Modified = {2008-05-29 11:56:57 -0700},
-	Dcom = {19811029},
-	Edat = {1981/03/25},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Keywords = {*Base Sequence and *Models, Chemical},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Smith/1981.pdf},
-	Lr = {20001218},
-	Mhda = {1981/03/25 00:01},
-	Number = {1},
-	Own = {NLM},
-	Pages = {195--197},
-	Pl = {ENGLAND},
-	Pmid = {7265238},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Identification of common molecular subsequences},
-	Volume = {147},
-	Year = {1981},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGKAAAAAAGKAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbRcIMTk4MS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA+7asHvpVwAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABVNtaXRoAAAQAAgAAMFxjUkAAAARAAgAAMHwFdwAAAABABgARm0XAEZrzwBGatUARmobAEZkaABAiUMAAgBAaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpTbWl0aDoxOTgxLnBkZgAOABIACAAxADkAOAAxAC4AcABkAGYADwAIAAMAaABzAHIAEgA8VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1NtaXRoLzE5ODEucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAjLi4vLi4vLi4vLi4vQXJ0aWNsZXMvU21pdGgvMTk4MS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkcCTAJVAmACZAJyAnkCggKoAq0CsAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK9}}
-
-@article{Pearson:2000bu,
-	Abstract = {The FASTA3 and FASTA2 packages provide a flexible set of sequence-comparison programs that are particularly valuable because of their accurate statistical estimates and high-quality alignments. Traditionally, sequence similarity searches have sought to ask one question: "Is my query sequence homologous to anything in the database?" Both FASTA and BLAST can provide reliable answers to this question with their statistical estimates; if the expectation value E is < 0.001-0.01 and you are not doing hundreds of searches a day, the answer is probably yes. In general, the most effective search strategies follow these rules: 1. Whenever possible, compare at the amino acid level, rather than the nucleotide level. Search first with protein sequences (blastp, fasta3, and ssearch3), then with translated DNA sequences (fastx, blastx), and only at the DNA level as a last resort (Table 5). 2. Search the smallest database that is likely to contain the sequence of interest (but it must contain many unrelated sequences for accurate statistical estimates). 3. Use sequence statistics, rather than percent identity or percent similarity, as your primary criterion for sequence homology. 4. Check that the statistics are likely to be accurate by looking for the highest-scoring unrelated sequence, using prss3 to confirm the expectation, and searching with shuffled copies of the query sequence [randseq, searches with shuffled sequences should have E approx 1.0]. 5. Consider searches with different gap penalties and other scoring matrices. Searches with long query sequences against full-length sequence libraries will not change dramatically when BLOSUM62 is used instead of BLOSUM50 (20), or a gap penalty of -14/-2 is used in place of -12/-2. However, shallower or more stringent scoring matrices are more effective at uncovering relationships in partial sequences (3,18), and they can be used to sharpen dramatically the scope of the similarity search. However, as illustrated in the last section, the E value is only the first step in characterizing a sequence relationship. Once one has confidence that the sequences are homologous, one should look at the sequence alignments and percent identities, particularly when searching with lower quality sequences. When sequence alignments are very short, the alignment should become more significant when a shallower scoring matrix is used, e.g., BLOSUM62 rather than BLOSUM50 (remember to change the gap penalties). Homology can be reliably inferred from statistically significant similarity. Whereas homology implies common three-dimensional structure, homology need not imply common function. Orthologous sequences usually have similar functions, but paralogous sequences often acquire very different functional roles. Motif databases, such as PROSITE (21), can provide evidence for the conservation of critical functional residues. However, motif identity in the absence of overall sequence similarity is not a reliable indicator of homology.},
-	Affiliation = {University of Virginia, Charlottesville, USA.},
-	Au = {Pearson WR},
-	Author = {Pearson, W R},
-	Da = {19991116},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19991116},
-	Edat = {1999/11/05},
-	Gr = {LM04961/LM/NLM},
-	Jid = {9214969},
-	Journal = {Methods Mol Biol},
-	Keywords = {Amino Acid Sequence and *Database Management Systems and Evolution, Molecular and *Information Storage and Retrieval and Molecular Sequence Data and Research Support, U.S. Gov't, P.H.S. and Sequence Alignment/*methods and Sequence Homology, Amino Acid},
-	Language = {eng},
-	Lr = {20041117},
-	Mhda = {1999/11/05 00:01},
-	Number = {1064-3745},
-	Own = {NLM},
-	Pages = {185-219},
-	Pl = {UNITED STATES},
-	Pmid = {10547837},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Flexible sequence similarity searching with the FASTA3 program package},
-	Volume = {132},
-	Year = {2000}}
-
-@url{blocks_url,
-	Date-Added = {2005-09-29 09:43:33 -0400},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Title = {Blocks WWW Server},
-	Url = {http://blocks.fhcrc.org/},
-	Bdsk-Url-1 = {http://blocks.fhcrc.org/}}
-
-@article{Helling:2001ag,
-	Abstract = {It has been noted that natural proteins adapt only a limited number of folds. Several researchers have investigated why and how nature has selected this small number of folds. Using simple models of protein folding, we demonstrate systematically that there is a "designability principle" behind nature's selection of protein folds. The designability of a structure (fold) is measured by the number of sequences that can design the structure--that is, sequences that possess the structure as their unique ground state. Structures differ drastically in terms of their designability. A small number of highly designable structures emerge with a number of associated sequences much larger than the average. These highly designable structures possess proteinlike secondary structures, motifs, and even tertiary symmetries. In addition, they are thermodynamically more stable and fold faster than other structures. These results suggest that protein structures are selected in nature because they are readily designed and stable against mutations, and that such a selection simultaneously leads to thermodynamic stability.},
-	Affiliation = {NEC Research Institute, 4 Independence Way, Princeton, NJ 08540, USA.},
-	Aid = {S1093326300001376 {$[$}pii{$]$}},
-	Au = {Tang C},
-	Author = {Helling, R and Li, H and Melin, R and Miller, J and Wingreen, N and Zeng, C and Tang, C},
-	Da = {20010530},
-	Date-Modified = {2008-05-29 12:12:27 -0700},
-	Dcom = {20011205},
-	Edat = {2001/05/31 10:00},
-	Group = {Designability},
-	Jid = {9716237},
-	Journal = {J Mol Graph Model},
-	Keywords = {Amino Acid Motifs and Computer Simulation and Humans and Mathematics and Models, Molecular and *Protein Conformation and *Protein Folding and Protein Structure, Secondary and Protein Structure, Tertiary and Proteins/*chemistry and Thermodynamics},
-	Language = {eng},
-	Lr = {20041117},
-	Mhda = {2002/01/05 10:01},
-	Number = {1},
-	Own = {NLM},
-	Pages = {157--167},
-	Pl = {United States},
-	Pmid = {11381527},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {The designability of protein structures},
-	Volume = {19},
-	Year = {2001}}
-
-@article{Barton:1987uj,
-	Abstract = {An algorithm is presented for the multiple alignment of protein sequences that is both accurate and rapid computationally. The approach is based on the conventional dynamic-programming method of pairwise alignment. Initially, two sequences are aligned, then the third sequence is aligned against the alignment of both sequences one and two. Similarly, the fourth sequence is aligned against one, two and three. This is repeated until all sequences have been aligned. Iteration is then performed to yield a final alignment. The accuracy of sequence alignment is evaluated from alignment of the secondary structures in a family of proteins. For the globins, the multiple alignment was on average 99% accurate compared to 90% for pairwise comparison of sequences. For the alignment of immunoglobulin constant and variable domains, the use of many sequences yielded an alignment of 63% average accuracy compared to 41% average for individual variable/constant alignments. The multiple alignment algorithm yields an assignment of disulphide connectivity in mammalian serotransferrin that is consistent with crystallographic data, whereas pairwise alignments give an alternative assignment.},
-	Affiliation = {Department of Crystallography, Birkbeck College, London, U.K.},
-	Au = {Sternberg MJ},
-	Author = {Barton, G J and Sternberg, M J},
-	Da = {19880302},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19880302},
-	Edat = {1987/11/20},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Keywords = {*Algorithms and Amino Acid Sequence and *Globins and Immunoglobulins and Molecular Sequence Data and Protein Conformation and Research Support, Non-U.S. Gov't and Transferrin},
-	Language = {eng},
-	Lr = {20041117},
-	Mhda = {1987/11/20 00:01},
-	Number = {2},
-	Own = {NLM},
-	Pages = {327-37},
-	Pl = {ENGLAND},
-	Pmid = {3430611},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {9004-22-2 (Globins)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {A strategy for the rapid multiple alignment of protein sequences. Confidence levels from tertiary structure comparisons},
-	Volume = {198},
-	Year = {1987}}
-
-@article{Li:1996wo,
-	Abstract = {Protein structures in nature often exhibit a high degree of regularity (for example, secondary structure and tertiary symmetries) that is absent from random compact conformations. With the use of a simple lattice model of protein folding, it was demonstrated that structural regularities are related to high "designability" and evolutionary stability. The designability of each compact structure is measured by the number of sequences that can design the structure-that is, sequences that possess the structure as their nondegenerate ground state. Compact structures differ markedly in terms of their designability; highly designable structures emerge with a number of associated sequences much larger than the average. These highly designable structures possess "proteinlike" secondary structure and even tertiary symmetries. In addition, they are thermodynamically more stable than other structures. These results suggest that protein structures are selected in nature because they are readily designed and stable against mutations, and that such a selection simultaneously leads to thermodynamic stability.},
-	Affiliation = {NEC Research Institute, 4 Independence Way, Princeton, NJ 08540, USA.},
-	Au = {Wingreen N},
-	Author = {Li, H and Helling, R and Tang, C and Wingreen, N},
-	Cin = {Science. 1996 Aug 2;273(5275):610. PMID: 8701315},
-	Da = {19960903},
-	Date-Modified = {2008-05-29 11:58:45 -0700},
-	Dcom = {19960903},
-	Edat = {1996/08/02},
-	Group = {Designability},
-	Jid = {0404511},
-	Journal = {Science},
-	Keywords = {*Amino Acid Sequence and Evolution, Molecular and *Models, Molecular and Mutation and *Protein Conformation and *Protein Folding and Protein Structure, Secondary and Protein Structure, Tertiary and Proteins/*chemistry/genetics and Thermodynamics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Li/1996.pdf},
-	Lr = {20011126},
-	Mhda = {1996/08/02 00:01},
-	Number = {5275},
-	Own = {NLM},
-	Pages = {666--669},
-	Pl = {UNITED STATES},
-	Pmid = {8662562},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Emergence of preferred structures in a simple model of protein folding},
-	Volume = {273},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGCAAAAAAGCAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbKAIMTk5Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJz8DkAulQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAAkxpABAACAAAwXGNSQAAABEACAAAwORlWQAAAAEAGABGbKAARmvPAEZq1QBGahsARmRoAECJQwACAD1oc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkxpOjE5OTYucGRmAAAOABIACAAxADkAOQA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA5VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0xpLzE5OTYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIC4uLy4uLy4uLy4uL0FydGljbGVzL0xpLzE5OTYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQI/AkQCTQJYAlwCagJxAnoCnQKiAqUAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACsg==}}
-
-@unpublished{Eddy:1997mx,
-	Author = {Eddy, S R},
-	Date-Added = {2005-08-29 10:09:31 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Eddy/1997.pdf},
-	Note = {Available at http://selab.wustl.edu/cgi-bin/selab.pl?mode=publications},
-	Title = {Maximum likelihood fitting of extreme value distributions},
-	Year = {1997},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbC4IMTk5Ny5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKAr84itVQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABEVkZHkAEAAIAADBcY1JAAAAEQAIAAC/OO1FAAAAAQAYAEZsLgBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6RWRkeToxOTk3LnBkZgAADgASAAgAMQA5ADkANwAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9FZGR5LzE5OTcucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0VkZHkvMTk5Ny5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Park:1998iy,
-	Abstract = {The sequences of related proteins can diverge beyond the point where their relationship can be recognised by pairwise sequence comparisons. In attempts to overcome this limitation, methods have been developed that use as a query, not a single sequence, but sets of related sequences or a representation of the characteristics shared by related sequences. Here we describe an assessment of three of these methods: the SAM-T98 implementation of a hidden Markov model procedure; PSI-BLAST; and the intermediate sequence search (ISS) procedure. We determined the extent to which these procedures can detect evolutionary relationships between the members of the sequence database PDBD40-J. This database, derived from the structural classification of proteins (SCOP), contains the sequences of proteins of known structure whose sequence identities with each other are 40% or less. The evolutionary relationships that exist between those that have low sequence identities were found by the examination of their structural details and, in many cases, their functional features. For nine false positive predictions out of a possible 432,680, i.e. at a false positive rate of about 1/50,000, SAM-T98 found 35% of the true homologous relationships in PDBD40-J, whilst PSI-BLAST found 30% and ISS found 25%. Overall, this is about twice the number of PDBD40-J relations that can be detected by the pairwise comparison procedures FASTA (17%) and GAP-BLAST (15%). For distantly related sequences in PDBD40-J, those pairs whose sequence identity is less than 30%, SAM-T98 and PSI-BLAST detect three times the number of relationships found by the pairwise methods.},
-	Affiliation = {MRC Laboratory of Molecular Biology, Hills Road, Cambridge, CB2 2QH, UK.},
-	Aid = {S0022283698922215 {$[$}pii{$]$}},
-	Au = {Chothia C},
-	Author = {Park, J and Karplus, K and Barrett, C and Hughey, R and Haussler, D and Hubbard, T and Chothia, C},
-	Ci = {Copyright 1998 Academic Press.},
-	Da = {19990128},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19990128},
-	Edat = {1998/12/05},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Keywords = {Comparative Study and Databases, Factual and Evaluation Studies and Evolution, Molecular and Markov Chains and Proteins/chemistry/genetics and Research Support, U.S. Gov't, Non-P.H.S. and Research Support, U.S. Gov't, P.H.S. and Sequence Alignment/*methods/statistics \& numerical data and Sequence Homology, Amino Acid},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Park/1998.pdf},
-	Lr = {20041117},
-	Mhda = {1998/12/05 00:01},
-	Number = {4},
-	Own = {NLM},
-	Pages = {1201-10},
-	Pl = {ENGLAND},
-	Pmid = {9837738},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {S},
-	Stat = {MEDLINE},
-	Title = {Sequence comparisons using multiple sequences detect three times as many remote homologues as pairwise methods},
-	Volume = {284},
-	Year = {1998},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbNYIMTk5OC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnK6r842OQAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFBhcmsAEAAIAADBcY1JAAAAEQAIAAC/OTtUAAAAAQAYAEZs1gBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6UGFyazoxOTk4LnBkZgAADgASAAgAMQA5ADkAOAAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9QYXJrLzE5OTgucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1BhcmsvMTk5OC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Russell:1992tc,
-	Abstract = {An algorithm is presented for the accurate and rapid generation of multiple protein sequence alignments from tertiary structure comparisons. A preliminary multiple sequence alignment is performed using sequence information, which then determines an initial superposition of the structures. A structure comparison algorithm is applied to all pairs of proteins in the superimposed set and a similarity tree calculated. Multiple sequence alignments are then generated by following the tree from the branches to the root. At each branchpoint of the tree, a structure-based sequence alignment and coordinate transformations are output, with the multiple alignment of all structures output at the root. The algorithm encoded in STAMP (STructural Alignment of Multiple Proteins) is shown to give alignments in good agreement with published structural accounts within the dehydrogenase fold domains, globins, and serine proteinases. In order to reduce the need for visual verification, two similarity indices are introduced to determine the quality of each generated structural alignment. Sc quantifies the global structural similarity between pairs or groups of proteins, whereas Pij' provides a normalized measure of the confidence in the alignment of each residue. STAMP alignments have the quality of each alignment characterized by Sc and Pij' values and thus provide a reproducible resource for studies of residue conservation within structural motifs.},
-	Affiliation = {Laboratory of Molecular Biophysics, University of Oxford, England.},
-	Aid = {10.1002/prot.340140216 {$[$}doi{$]$}},
-	Au = {Barton GJ},
-	Author = {Russell, R B and Barton, G J},
-	Da = {19921029},
-	Date-Modified = {2008-05-29 12:03:20 -0700},
-	Dcom = {19921029},
-	Edat = {1992/10/01},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Keywords = {*Algorithms and Amino Acid Sequence and Animals and Comparative Study and Confidence Intervals and Globins/chemistry and Humans and Molecular Sequence Data and *Protein Structure, Tertiary and Research Support, Non-U.S. Gov't and *Sequence Alignment and Sequence Homology, Amino Acid and Serine Endopeptidases/chemistry and Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Russell/1992.pdf},
-	Lr = {20041117},
-	Mhda = {1992/10/01 00:01},
-	Number = {2},
-	Own = {NLM},
-	Pages = {309--323},
-	Pl = {UNITED STATES},
-	Pmid = {1409577},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {EC 3.4.21 (Serine Endopeptidases)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Multiple protein sequence alignment from tertiary structure comparison: assignment of global and residue confidence levels},
-	Volume = {14},
-	Year = {1992},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbPgIMTk5Mi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKqb803qBQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1J1c3NlbGwAABAACAAAwXGNSQAAABEACAAAvzVBEAAAAAEAGABGbPgARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlJ1c3NlbGw6MTk5Mi5wZGYADgASAAgAMQA5ADkAMgAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9SdXNzZWxsLzE5OTIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUnVzc2VsbC8xOTkyLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Andreeva:2004ta,
-	Abstract = {The Structural Classification of Proteins (SCOP) database is a comprehensive ordering of all proteins of known structure, according to their evolutionary and structural relationships. Protein domains in SCOP are hierarchically classified into families, superfamilies, folds and classes. The continual accumulation of sequence and structural data allows more rigorous analysis and provides important information for understanding the protein world and its evolutionary repertoire. SCOP participates in a project that aims to rationalize and integrate the data on proteins held in several sequence and structure databases. As part of this project, starting with release 1.63, we have initiated a refinement of the SCOP classification, which introduces a number of changes mostly at the levels below superfamily. The pending SCOP reclassification will be carried out gradually through a number of future releases. In addition to the expanded set of static links to external resources, available at the level of domain entries, we have started modernization of the interface capabilities of SCOP allowing more dynamic links with other databases. SCOP can be accessed at http://scop.mrc-lmb.cam.ac.uk/scop.},
-	Affiliation = {MRC Centre for Protein Engineering, Hills Road, Cambridge CB2 2QH, UK.},
-	Aid = {32/suppl{\_}1/D226 {$[$}pii{$]$}},
-	Au = {Murzin AG},
-	Author = {Andreeva, Antonina and Howorth, Dave and Brenner, Steven E and Hubbard, Tim J P and Chothia, Cyrus and Murzin, Alexey G},
-	Da = {20031218},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20040120},
-	Edat = {2003/12/19 05:00},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Keywords = {Animals and Antibodies/chemistry/classification and Capsid Proteins/chemistry/classification and Computational Biology and *Databases, Protein and Humans and Internet and Protein Kinases/chemistry/classification and Protein Structure, Secondary and Protein Structure, Tertiary and Proteins/*chemistry/*classification and Research Support, Non-U.S. Gov't},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Andreeva/2004.pdf},
-	Lr = {20050202},
-	Mhda = {2004/01/21 05:00},
-	Number = {Database issue},
-	Own = {NLM},
-	Pages = {D226-9},
-	Pl = {England},
-	Pmid = {14681400},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {EC 2.7.1.37 (Protein Kinases)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {SCOP database in 2004: refinements integrate structure and sequence family data},
-	Volume = {32},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa+EIMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJvrxczeYAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEFuZHJlZXZhABAACAAAwXGNSQAAABEACAAAvF0+ZgAAAAEAGABGa+EARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkFuZHJlZXZhOjIwMDQucGRmAAAOABIACAAyADAAMAA0AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0FuZHJlZXZhLzIwMDQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0FuZHJlZXZhLzIwMDQucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Murzin:1995ij,
-	Abstract = {To facilitate understanding of, and access to, the information available for protein structures, we have constructed the Structural Classification of Proteins (scop) database. This database provides a detailed and comprehensive description of the structural and evolutionary relationships of the proteins of known structure. It also provides for each entry links to co-ordinates, images of the structure, interactive viewers, sequence data and literature references. Two search facilities are available. The homology search permits users to enter a sequence and obtain a list of any structures to which it has significant levels of sequence similarity. The key word search finds, for a word entered by the user, matches from both the text of the scop database and the headers of Brookhaven Protein Databank structure files. The database is freely accessible on World Wide Web (WWW) with an entry point to URL http: parallel scop.mrc-lmb.cam.ac.uk magnitude of scop.},
-	Affiliation = {MRC Laboratory of Molecular Biology and Centre for Protein Engineering, Cambridge, England.},
-	Aid = {S0022283685701593 {$[$}pii{$]$}},
-	Au = {Chothia C},
-	Author = {Murzin, A G and Brenner, S E and Hubbard, T and Chothia, C},
-	Da = {19950525},
-	Date-Modified = {2008-05-29 12:29:31 -0700},
-	Dcom = {19950525},
-	Edat = {1995/04/07},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Keywords = {Amino Acid Sequence and *Databases, Factual and Protein Folding and Proteins/chemistry/*classification and Research Support, Non-U.S. Gov't and Sequence Analysis},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Murzin/1995.pdf},
-	Lr = {20041117},
-	Mhda = {1995/04/07 00:01},
-	Number = {4},
-	Own = {NLM},
-	Pages = {536--540},
-	Pl = {ENGLAND},
-	Pmid = {7723011},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {{SCOP}: a structural classification of proteins database for the investigation of sequences and structures},
-	Volume = {247},
-	Year = {1995},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbMUIMTk5NS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJp7kCtHkAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABk11cnppbgAQAAgAAMFxjUkAAAARAAgAALkDFukAAAABABgARmzFAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpNdXJ6aW46MTk5NS5wZGYAAA4AEgAIADEAOQA5ADUALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTXVyemluLzE5OTUucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL011cnppbi8xOTk1LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Lo-Conte:2002ui,
-	Abstract = {The SCOP (Structural Classification of Proteins) database is a comprehensive ordering of all proteins of known structure, according to their evolutionary and structural relationships. Protein domains in SCOP are grouped into species and hierarchically classified into families, superfamilies, folds and classes. Recently, we introduced a new set of features with the aim of standardizing access to the database, and providing a solid basis to manage the increasing number of experimental structures expected from structural genomics projects. These features include: a new set of identifiers, which uniquely identify each entry in the hierarchy; a compact representation of protein domain classification; a new set of parseable files, which fully describe all domains in SCOP and the hierarchy itself. These new features are reflected in the ASTRAL compendium. The SCOP search engine has also been updated, and a set of links to external resources added at the level of domain entries. SCOP can be accessed at http://scop.mrc-lmb.cam.ac.uk/scop.},
-	Affiliation = {MRC Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, UK. loredana@mrc-lmb.cam.ac.uk},
-	Au = {Murzin AG},
-	Author = {Lo Conte, Loredana and Brenner, Steven E and Hubbard, Tim J P and Chothia, Cyrus and Murzin, Alexey G},
-	Da = {20011225},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20020121},
-	Edat = {2001/12/26 10:00},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Keywords = {Animals and *Databases, Protein and Evolution, Molecular and *Genome and Information Storage and Retrieval and Internet and *Protein Structure, Tertiary and Proteins/chemistry/classification/*genetics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Lo%20Conte/2002.pdf},
-	Lr = {20031114},
-	Mhda = {2002/01/22 10:01},
-	Number = {1},
-	Own = {NLM},
-	Pages = {264-7},
-	Pl = {England},
-	Pmid = {11752311},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {SCOP database in 2002: refinements accommodate structural genomics},
-	Volume = {30},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbKgIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKH7kCtHkAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACExvIENvbnRlABAACAAAwXGNSQAAABEACAAAuQMW6QAAAAEAGABGbKgARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkxvIENvbnRlOjIwMDIucGRmAAAOABIACAAyADAAMAAyAC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0xvIENvbnRlLzIwMDIucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0xvIENvbnRlLzIwMDIucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Brenner:1998rm,
-	Abstract = {Pairwise sequence comparison methods have been assessed using proteins whose relationships are known reliably from their structures and functions, as described in the SCOP database [Murzin, A. G., Brenner, S. E., Hubbard, T. & Chothia C. (1995) J. Mol. Biol. 247, 536-540]. The evaluation tested the programs BLAST [Altschul, S. F., Gish, W., Miller, W., Myers, E. W. & Lipman, D. J. (1990). J. Mol. Biol. 215, 403-410], WU-BLAST2 [Altschul, S. F. & Gish, W. (1996) Methods Enzymol. 266, 460-480], FASTA [Pearson, W. R. & Lipman, D. J. (1988) Proc. Natl. Acad. Sci. USA 85, 2444-2448], and SSEARCH [Smith, T. F. & Waterman, M. S. (1981) J. Mol. Biol. 147, 195-197] and their scoring schemes. The error rate of all algorithms is greatly reduced by using statistical scores to evaluate matches rather than percentage identity or raw scores. The E-value statistical scores of SSEARCH and FASTA are reliable: the number of false positives found in our tests agrees well with the scores reported. However, the P-values reported by BLAST and WU-BLAST2 exaggerate significance by orders of magnitude. SSEARCH, FASTA ktup = 1, and WU-BLAST2 perform best, and they are capable of detecting almost all relationships between proteins whose sequence identities are >30%. For more distantly related proteins, they do much less well; only one-half of the relationships between proteins with 20-30% identity are found. Because many homologs have low sequence similarity, most distant relationships cannot be detected by any pairwise comparison method; however, those which are identified may be used with confidence.},
-	Affiliation = {MRC Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, United Kingdom. brenner@hyper.stanford.edu},
-	Au = {Hubbard TJ},
-	Author = {Brenner, S E and Chothia, C and Hubbard, T J},
-	Da = {19980622},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19980622},
-	Edat = {1998/05/30},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci USA},
-	Keywords = {Algorithms and Animals and Databases, Factual and *Evolution, Molecular and Humans and Proteins/chemistry/*genetics and Research Support, Non-U.S. Gov't and Sequence Alignment/*methods},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Brenner/1998.pdf},
-	Lr = {20041117},
-	Mhda = {1998/05/30 00:01},
-	Number = {11},
-	Own = {NLM},
-	Pages = {6073-8},
-	Pl = {UNITED STATES},
-	Pmid = {9600919},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {S},
-	Stat = {MEDLINE},
-	Title = {Assessing sequence comparison methods with reliable structurally identified distant evolutionary relationships},
-	Volume = {95},
-	Year = {1998},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbAAIMTk5OC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnK+b84t6tQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB0JyZW5uZXIAABAACAAAwXGNSQAAABEACAAAvzkaGwAAAAEAGABGbAAARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkJyZW5uZXI6MTk5OC5wZGYADgASAAgAMQA5ADkAOAAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9CcmVubmVyLzE5OTgucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQnJlbm5lci8xOTk4LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Chandonia:2004nx,
-	Abstract = {The ASTRAL Compendium provides several databases and tools to aid in the analysis of protein structures, particularly through the use of their sequences. Partially derived from the SCOP database of protein structure domains, it includes sequences for each domain and other resources useful for studying these sequences and domain structures. The current release of ASTRAL contains 54,745 domains, more than three times as many as the initial release 4 years ago. ASTRAL has undergone major transformations in the past 2 years. In addition to several complete updates each year, ASTRAL is now updated on a weekly basis with preliminary classifications of domains from newly released PDB structures. These classifications are available as a stand-alone database, as well as integrated into other ASTRAL databases such as representative subsets. To enhance the utility of ASTRAL to structural biologists, all SCOP domains are now made available as PDB-style coordinate files as well as sequences. In addition to sequences and representative subsets based on SCOP domains, sequences and subsets based on PDB chains are newly included in ASTRAL. Several search tools have been added to ASTRAL to facilitate retrieval of data by individual users and automated methods. ASTRAL may be accessed at http://astral.stanford. edu/.},
-	Affiliation = {Berkeley Structural Genomics Center, Physical Biosciences Division, Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA.},
-	Aid = {32/suppl{\_}1/D189 {$[$}pii{$]$}},
-	Au = {Brenner SE},
-	Author = {Chandonia, John-Marc and Hon, Gary and Walker, Nigel S and Lo Conte, Loredana and Koehl, Patrice and Levitt, Michael and Brenner, Steven E},
-	Da = {20031218},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20040120},
-	Edat = {2003/12/19 05:00},
-	Gr = {1-P50-GM62412/GM/NIGMS},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Keywords = {Animals and *Computational Biology and *Databases, Protein and Humans and Information Storage and Retrieval and Internet and Protein Structure, Tertiary and Proteins/*chemistry/classification and Research Support, Non-U.S. Gov't and Research Support, U.S. Gov't, Non-P.H.S. and Research Support, U.S. Gov't, P.H.S. and Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Chandonia/2004.pdf},
-	Lr = {20050202},
-	Mhda = {2004/01/21 05:00},
-	Number = {Database issue},
-	Own = {NLM},
-	Pages = {D189-92},
-	Pl = {England},
-	Pmid = {14681391},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	Si = {PDB/1ABC},
-	Stat = {MEDLINE},
-	Title = {The ASTRAL Compendium in 2004},
-	Volume = {32},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbAsIMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJ/LwI2L4AAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACUNoYW5kb25pYQAAEAAIAADBcY1JAAAAEQAIAAC8CUk+AAAAAQAYAEZsCwBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6Q2hhbmRvbmlhOjIwMDQucGRmAA4AEgAIADIAMAAwADQALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvQ2hhbmRvbmlhLzIwMDQucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvQ2hhbmRvbmlhLzIwMDQucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@article{Dill:1995ha,
-	Abstract = {General principles of protein structure, stability, and folding kinetics have recently been explored in computer simulations of simple exact lattice models. These models represent protein chains at a rudimentary level, but they involve few parameters, approximations, or implicit biases, and they allow complete explorations of conformational and sequence spaces. Such simulations have resulted in testable predictions that are sometimes unanticipated: The folding code is mainly binary and delocalized throughout the amino acid sequence. The secondary and tertiary structures of a protein are specified mainly by the sequence of polar and nonpolar monomers. More specific interactions may refine the structure, rather than dominate the folding code. Simple exact models can account for the properties that characterize protein folding: two-state cooperativity, secondary and tertiary structures, and multistage folding kinetics--fast hydrophobic collapse followed by slower annealing. These studies suggest the possibility of creating "foldable" chain molecules other than proteins. The encoding of a unique compact chain conformation may not require amino acids; it may require only the ability to synthesize specific monomer sequences in which at least one monomer type is solvent-averse.},
-	Affiliation = {Department of Pharmaceutical Chemistry, University of California, San Francisco 94143-1204, USA.},
-	Au = {Chan HS},
-	Author = {Dill, K A and Bromberg, S and Yue, K and Fiebig, K M and Yee, D P and Thomas, P D and Chan, H S},
-	Da = {19950824},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19950824},
-	Edat = {1995/04/01},
-	Jid = {9211750},
-	Journal = {Protein Sci},
-	Keywords = {Amino Acid Sequence and Evolution and Hydrogen Bonding and Models, Molecular and Molecular Sequence Data and Mutation and Protein Conformation and Protein Denaturation and *Protein Folding and Research Support, Non-U.S. Gov't and Research Support, U.S. Gov't, Non-P.H.S. and Research Support, U.S. Gov't, P.H.S. and Temperature and Thermodynamics},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Dill/1995.pdf},
-	Lr = {20041117},
-	Mhda = {1995/04/01 00:01},
-	Number = {4},
-	Own = {NLM},
-	Pages = {561-602},
-	Pl = {UNITED STATES},
-	Pmid = {7613459},
-	Pst = {ppublish},
-	Pt = {Review},
-	Pubm = {Print},
-	Rf = {335},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Principles of protein folding--a perspective from simple exact models},
-	Volume = {4},
-	Year = {1995},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbCgIMTk5NS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJ4L8yO7lQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABERpbGwAEAAIAADBcY1JAAAAEQAIAAC/Mp4pAAAAAQAYAEZsKABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6RGlsbDoxOTk1LnBkZgAADgASAAgAMQA5ADkANQAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9EaWxsLzE5OTUucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0RpbGwvMTk5NS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Chan:1989tn,
-	Author = {Chan, H S and Dill, K A},
-	Date-Added = {2005-08-25 09:34:13 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Journal = {Macromolecules},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Chan/1989.pdf},
-	Number = {12},
-	Pages = {4559-4573},
-	Title = {Compact polymers},
-	Volume = {22},
-	Year = {1989},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbAoIMTk4OS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKPb8NA+hQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABENoYW4AEAAIAADBcY1JAAAAEQAIAAC/DWZYAAAAAQAYAEZsCgBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6Q2hhbjoxOTg5LnBkZgAADgASAAgAMQA5ADgAOQAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9DaGFuLzE5ODkucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0NoYW4vMTk4OS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Lau:1989pl,
-	Author = {Lau, K F and Dill, K A},
-	Date-Added = {2005-08-25 09:36:51 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Journal = {Macromolecules},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Lau/1989.pdf},
-	Number = {10},
-	Pages = {3986-3997},
-	Title = {A Lattice Statistical Mechanics Model of the Conformational and Sequence Spaces of Proteins},
-	Volume = {22},
-	Year = {1989},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbJYIMTk4OS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJtb8NAjxQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA0xhdQAAEAAIAADBcY1JAAAAEQAIAAC/DWSsAAAAAQAYAEZslgBGa88ARmrVAEZqGwBGZGgAQIlDAAIAPmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6TGF1OjE5ODkucGRmAA4AEgAIADEAOQA4ADkALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTGF1LzE5ODkucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvTGF1LzE5ODkucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==}}
-
-@article{Karlin:1993yd,
-	Abstract = {Score-based measures of molecular-sequence features provide versatile aids for the study of proteins and DNA. They are used by many sequence data base search programs, as well as for identifying distinctive properties of single sequences. For any such measure, it is important to know what can be expected to occur purely by chance. The statistical distribution of high-scoring segments has been described elsewhere. However, molecular sequences will frequently yield several high-scoring segments for which some combined assessment is in order. This paper describes the statistical distribution for the sum of the scores of multiple high-scoring segments and illustrates its application to the identification of possible transmembrane segments and the evaluation of sequence similarity.},
-	Affiliation = {Department of Mathematics, Stanford University, CA 94305.},
-	Au = {Altschul SF},
-	Author = {Karlin, S and Altschul, S F},
-	Da = {19930722},
-	Date-Modified = {2008-05-29 12:17:06 -0700},
-	Dcom = {19930722},
-	Edat = {1993/06/15},
-	Gr = {GM39907-02/GM/NIGMS},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci USA},
-	Keywords = {*Amino Acid Sequence and Animals and Antithrombin III/genetics and *Base Sequence and Chickens and *DNA and Drosophila/genetics and *Drosophila Proteins and Evolution and Eye Proteins/genetics and Fowlpox virus/genetics and Humans and Membrane Glycoproteins/genetics and Molecular Sequence Data and Probability and *Proteins and *Receptor Protein-Tyrosine Kinases and Receptors, Cell Surface/genetics and Receptors, Serotonin/genetics and Research Support, U.S. Gov't, Non-P.H.S. and Research Support, U.S. Gov't, P.H.S. and *Sequence Analysis and *Sequence Homology, Amino Acid},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Karlin/1993.pdf},
-	Lr = {20041117},
-	Mhda = {1993/06/15 00:01},
-	Number = {12},
-	Own = {NLM},
-	Pages = {5873--5877},
-	Pl = {UNITED STATES},
-	Pmid = {8390686},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {EC 2.7.1.112 (sev protein, Drosophila)},
-	Sb = {X},
-	Stat = {MEDLINE},
-	Title = {Applications and statistics for multiple high-scoring segments in molecular sequences},
-	Volume = {90},
-	Year = {1993},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbHwIMTk5My5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKB78Wn6lQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkthcmxpbgAQAAgAAMFxjUkAAAARAAgAAL8XAhkAAAABABgARmx8AEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpLYXJsaW46MTk5My5wZGYAAA4AEgAIADEAOQA5ADMALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvS2FybGluLzE5OTMucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0thcmxpbi8xOTkzLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Karlin:1991bg,
-	Affiliation = {Department of Mathematics, Stanford University, California 94305.},
-	Au = {Altschul SF},
-	Author = {Karlin, S and Bucher, P and Brendel, V and Altschul, S F},
-	Da = {19910919},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19910919},
-	Edat = {1991/01/01},
-	Gr = {GM39907-02/GM/NIGMS},
-	Jid = {8505748},
-	Journal = {Annu Rev Biophys Biophys Chem},
-	Keywords = {*Amino Acid Sequence and Animals and *Base Sequence and Comparative Study and DNA/chemistry/*genetics and Escherichia coli/genetics and Humans and Leucine Zippers/genetics and Probability and Proteins/chemistry/*genetics and RNA, Messenger/genetics and Research Support, U.S. Gov't, Non-P.H.S. and Research Support, U.S. Gov't, P.H.S. and Sequence Homology, Nucleic Acid and *Statistics and Terminology},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Karlin/1991.pdf},
-	Lr = {20041117},
-	Mhda = {1991/01/01 00:01},
-	Number = {0883-9182},
-	Own = {NLM},
-	Pages = {175-203},
-	Pl = {UNITED STATES},
-	Pmid = {1867715},
-	Pst = {ppublish},
-	Pt = {Review},
-	Pubm = {Print},
-	Rf = {103},
-	Rn = {9007-49-2 (DNA)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Statistical methods and insights for protein and DNA sequences},
-	Volume = {20},
-	Year = {1991},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbHwIMTk5MS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKBr8Wn1lQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkthcmxpbgAQAAgAAMFxjUkAAAARAAgAAL8XAckAAAABABgARmx8AEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpLYXJsaW46MTk5MS5wZGYAAA4AEgAIADEAOQA5ADEALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvS2FybGluLzE5OTEucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0thcmxpbi8xOTkxLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Yu:2002sv,
-	Abstract = {The score statistics of a recently introduced 'hybrid alignment' algorithm is studied in detail numerically. An extensive survey across the 2216 models of protein domains contained in the Pfam v5.4 database (Bateman et al., Nucleic Acids Res., 28, 263-266, 2000) verifies the theoretical predictions: For the position-specific scoring functions used in the Pfam models, the score statistics of hybrid alignment obey the Gumbel distribution, with the key Gumbel parameter lambda taking on the asymptotic value 1 universally for all models. Thus, the use of hybrid alignment eliminates the time-consuming computer simulations normally needed to assign p-values to alignment scores, freeing the users to experiment with different scoring parameters and functions. The performance of the hybrid algorithm in detecting sequence homology is also studied. For protein sequences from the SCOP database (Murzin et al., J. Mol. Biol., 247, 536-540, 1995) using uniform scoring functions, the performance is found to be comparable to the best of the existing methods. Preliminary results using the PfamA database suggest that the hybrid algorithm achieves similar performance as existing methods for position-specific scoring systems as well. Hybrid alignment is thereby established as a high performance alignment algorithm with well-characterized, universal statistics.},
-	Affiliation = {Department of Physics, Florida Atlantic University, 777 Glades Road, Boca Raton 33431-0991, USA. yyu@fau.edu},
-	Au = {Hwa T},
-	Author = {Yu, Yi-Kuo and Bundschuh, Ralf and Hwa, Terence},
-	Da = {20020620},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20030127},
-	Edat = {2002/06/21 10:00},
-	Jid = {9808944},
-	Journal = {Bioinformatics},
-	Keywords = {Algorithms and Computational Biology and Computer Simulation and Databases, Protein and ROC Curve and Research Support, Non-U.S. Gov't and Research Support, U.S. Gov't, Non-P.H.S. and Sequence Alignment/*statistics \& numerical data},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Yu/2002.pdf},
-	Lr = {20041117},
-	Mhda = {2003/01/28 04:00},
-	Number = {6},
-	Own = {NLM},
-	Pages = {864-72},
-	Pl = {England},
-	Pmid = {12075022},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Hybrid alignment: high-performance with universal statistics},
-	Volume = {18},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGCAAAAAAGCAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbVcIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKJb8yPXhQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAAll1ABAACAAAwXGNSQAAABEACAAAvzKf6AAAAAEAGABGbVcARmvPAEZq1QBGahsARmRoAECJQwACAD1oc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOll1OjIwMDIucGRmAAAOABIACAAyADAAMAAyAC4AcABkAGYADwAIAAMAaABzAHIAEgA5VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1l1LzIwMDIucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIC4uLy4uLy4uLy4uL0FydGljbGVzL1l1LzIwMDIucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQI/AkQCTQJYAlwCagJxAnoCnQKiAqUAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACsg==}}
-
-@article{Rice:2004xt,
-	Abstract = {Of the three domains of life (Eukarya, Bacteria, and Archaea), the least understood is Archaea and its associated viruses. Many Archaea are extremophiles, with species that are capable of growth at some of the highest temperatures and extremes of pH of all known organisms. Phylogenetic rRNA-encoding DNA analysis places many of the hyperthermophilic Archaea (species with an optimum growth > or = 80 degrees C) at the base of the universal tree of life, suggesting that thermophiles were among the first forms of life on earth. Very few viruses have been identified from Archaea as compared to Bacteria and Eukarya. We report here the structure of a hyperthermophilic virus isolated from an archaeal host found in hot springs in Yellowstone National Park. The sequence of the circular double-stranded DNA viral genome shows that it shares little similarity to other known genes in viruses or other organisms. By comparing the tertiary and quaternary structures of the coat protein of this virus with those of a bacterial and an animal virus, we find conformational relationships among all three, suggesting that some viruses may have a common ancestor that precedes the division into three domains of life >3 billion years ago.},
-	Affiliation = {Thermal Biology Institute and Department of Microbiology, Montana State University, Bozeman, MT 59717, USA.},
-	Aid = {0401773101 {$[$}pii{$]$}},
-	Au = {Young M},
-	Author = {Rice, George and Tang, Liang and Stedman, Kenneth and Roberto, Francisco and Spuhler, Josh and Gillitzer, Eric and Johnson, John E and Douglas, Trevor and Young, Mark},
-	Cin = {Proc Natl Acad Sci U S A. 2004 May 18;101(20):7495-6. PMID: 15138303},
-	Da = {20040519},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20040706},
-	Dep = {20040503},
-	Edat = {2004/05/05 05:00},
-	Gr = {GM54076/GM/NIGMS},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci USA},
-	Keywords = {Capsid/*physiology and DNA Viruses/genetics/isolation \& purification/*physiology and Genome, Viral and Microscopy, Electron and Molecular Sequence Data and Protein Structure, Tertiary and Research Support, Non-U.S. Gov't and Research Support, U.S. Gov't, Non-P.H.S. and Research Support, U.S. Gov't, P.H.S. and Sulfolobus/*virology},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Rice/2004.pdf},
-	Lr = {20041117},
-	Mhda = {2004/07/09 05:00},
-	Number = {20},
-	Own = {NLM},
-	Pages = {7716-20},
-	Phst = {2004/05/03 {$[$}aheadofprint{$]$}},
-	Pl = {United States},
-	Pmid = {15123802},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print-Electronic},
-	Sb = {IM},
-	Si = {GENBANK/AY569307},
-	Stat = {MEDLINE},
-	Title = {The structure of a thermophilic archaeal virus shows a double-stranded DNA viral capsid type that spans all domains of life},
-	Volume = {101},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbO8IMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLvb8yCQMAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFJpY2UAEAAIAADBcY1JAAAAEQAIAAC/MmtzAAAAAQAYAEZs7wBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6UmljZToyMDA0LnBkZgAADgASAAgAMgAwADAANAAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9SaWNlLzIwMDQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1JpY2UvMjAwNC5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Rice:2001ls,
-	Abstract = {Viruses of extreme thermophiles are of great interest because they serve as model systems for understanding the biochemistry and molecular biology required for life at high temperatures. In this work, we report the discovery, isolation, and preliminary characterization of viruses and virus-like particles from extreme thermal acidic environments (70-92 degrees C, pH 1.0-4.5) found in Yellowstone National Park. Six unique particle morphologies were found in Sulfolobus enrichment cultures. Three of the particle morphologies are similar to viruses previously isolated from Sulfolobus species from Iceland and/or Japan. Sequence analysis of their viral genomes suggests that they are related to the Icelandic and Japanese isolates. In addition, three virus particle morphologies that had not been previously observed from thermal environments were found. These viruses appear to be completely novel in nature.},
-	Affiliation = {Thermal Biology Institute and Department of Microbiology, Montana State University, Bozeman, MT 59717, USA.},
-	Aid = {231170198 {$[$}pii{$]$}},
-	Au = {Young MJ},
-	Author = {Rice, G and Stedman, K and Snyder, J and Wiedenheft, B and Willits, D and Brumfield, S and McDermott, T and Young, M J},
-	Da = {20011107},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20011207},
-	Dep = {20011023},
-	Edat = {2001/10/19 10:00},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci USA},
-	Keywords = {Archaeal Viruses/*isolation \& purification/ultrastructure and *Heat and Microscopy, Electron and Research Support, U.S. Gov't, Non-P.H.S. and Sulfolobus/*virology and Virion/isolation \& purification},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Rice/2001.pdf},
-	Lr = {20041117},
-	Mhda = {2002/01/05 10:01},
-	Number = {23},
-	Own = {NLM},
-	Pages = {13341-5},
-	Phst = {2001/10/23 {$[$}aheadofprint{$]$}},
-	Pl = {United States},
-	Pmid = {11606757},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print-Electronic},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Viruses from extreme thermal environments},
-	Volume = {98},
-	Year = {2001},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbO8IMjAwMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLvL8yH6sAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABFJpY2UAEAAIAADBcY1JAAAAEQAIAAC/MoIbAAAAAQAYAEZs7wBGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6UmljZToyMDAxLnBkZgAADgASAAgAMgAwADAAMQAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9SaWNlLzIwMDEucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL1JpY2UvMjAwMS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@article{Ent:2002vz,
-	Abstract = {It was the general belief that DNA partitioning in prokaryotes is independent of a cytoskeletal structure, which in eukaryotic cells is indispensable for DNA segregation. Recently, however, immunofluorescence microscopy revealed highly dynamic, filamentous structures along the longitudinal axis of Escherichia coli formed by ParM, a plasmid-encoded protein required for accurate segregation of low-copy-number plasmid R1. We show here that ParM polymerizes into double helical protofilaments with a longitudinal repeat similar to filamentous actin (F-actin) and MreB filaments that maintain the cell shape of non-spherical bacteria. The crystal structure of ParM with and without ADP demonstrates that it is a member of the actin family of proteins and shows a domain movement of 25 degrees upon nucleotide binding. Furthermore, the crystal structure of ParM reveals major differences in the protofilament interface compared with F-actin, despite the similar arrangement of the subunits within the filaments. Thus, there is now evidence for cytoskeletal structures, formed by actin-like filaments that are involved in plasmid partitioning in E.coli.},
-	Affiliation = {MRC Laboratory of Molecular Biology, Hills Road, Cambridge CB2 2QH, UK.},
-	Au = {Lowe J},
-	Author = {van den Ent, Fusinita and Moller-Jensen, Jakob and Amos, Linda A and Gerdes, Kenn and Lowe, Jan},
-	Da = {20021217},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20040204},
-	Edat = {2002/12/18 04:00},
-	Jid = {8208664},
-	Journal = {EMBO J},
-	Keywords = {Actins/*chemistry/*metabolism/*physiology and Crystallography, X-Ray and Cytoskeleton/metabolism and DNA/metabolism and Escherichia coli/metabolism and Escherichia coli Proteins/*chemistry/metabolism/*physiology and Microscopy, Electron and Models, Molecular and Plasmids/*metabolism and Protein Binding and Research Support, Non-U.S. Gov't},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Ent/2002.pdf},
-	Lr = {20041117},
-	Mhda = {2004/02/05 05:00},
-	Number = {24},
-	Own = {NLM},
-	Pages = {6935-43},
-	Pl = {England},
-	Pmid = {12486014},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {9007-49-2 (DNA)},
-	Sb = {IM},
-	Si = {PDB/1MWM},
-	Stat = {MEDLINE},
-	Title = {F-actin-like filaments formed by plasmid segregation protein ParM},
-	Volume = {21},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGEAAAAAAGEAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbDUIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJo78mHaxQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAA0VudAAAEAAIAADBcY1JAAAAEQAIAAC/JoAcAAAAAQAYAEZsNQBGa88ARmrVAEZqGwBGZGgAQIlDAAIAPmhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6RW50OjIwMDIucGRmAA4AEgAIADIAMAAwADIALgBwAGQAZgAPAAgAAwBoAHMAcgASADpVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvRW50LzIwMDIucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAhLi4vLi4vLi4vLi4vQXJ0aWNsZXMvRW50LzIwMDIucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJBAkYCTwJaAl4CbAJzAnwCoAKlAqgAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACtQ==}}
-
-@article{Benson:2004yq,
-	Abstract = {Our discovery that the major coat protein of bacteriophage PRD1 resembles that of human adenovirus raised the unexpected possibility that viruses infecting bacteria could be related by evolution to those infecting animal hosts. We first review the development of this idea. We then describe how we have used structure-based modeling to show that several other viruses with no detectable sequence similarity are likely to have coats constructed from similar proteins-the "double-barrel trimer." There is evidence that the group includes a diversity of viruses infecting very different hosts in all three domains of life: Eukarya; Bacteria; and Archaea that diverged billions of years ago. The current classification of viruses obscures such similarities. We propose that the occurrence of a double-barrel trimer coat protein in an icosahedral dsDNA virus with large facets, irrespective of its host, is a very strong indicator of its membership in a lineage of viruses with a common ancestor.},
-	Affiliation = {The Wistar Institute, 3601 Spruce Street, Philadelphia, Pennsylvania 19104, USA.},
-	Aid = {10.1016/j.molcel.2004.11.016 {$[$}doi{$]$}},
-	Au = {Burnett RM},
-	Author = {Benson, Stacy D and Bamford, Jaana K H and Bamford, Dennis H and Burnett, Roger M},
-	Da = {20041202},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20050113},
-	Edat = {2004/12/03 09:00},
-	Gr = {CA-09171/CA/NCI},
-	Jid = {9802571},
-	Journal = {Mol Cell},
-	Keywords = {Adenoviridae/genetics and Amino Acid Sequence and Animals and Archaea and Bacteria and Bacteriophage PRD1/metabolism and Capsid/metabolism and Cell Lineage and *Evolution and Humans and Models, Molecular and Molecular Sequence Data and Open Reading Frames and Protein Conformation and Protein Structure, Secondary and Research Support, Non-U.S. Gov't and Research Support, U.S. Gov't, P.H.S. and Sequence Homology, Amino Acid and Viruses},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Benson/2004.pdf},
-	Mhda = {2005/01/14 09:00},
-	Number = {5},
-	Own = {NLM},
-	Pages = {673-85},
-	Pl = {United States},
-	Pmid = {15574324},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Does common architecture reveal a viral lineage spanning all three domains of life?},
-	Volume = {16},
-	Year = {2004},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa+4IMjAwNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLwwAAAAAAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkJlbnNvbgAQAAgAAMFxjUkAAAARAAgAAAAAAAAAAAABABgARmvuAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpCZW5zb246MjAwNC5wZGYAAA4AEgAIADIAMAAwADQALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvQmVuc29uLzIwMDQucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0JlbnNvbi8yMDA0LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Bailey:2002ml,
-	Abstract = {We present a novel maximum-likelihood-based algorithm for estimating the distribution of alignment scores from the scores of unrelated sequences in a database search. Using a new method for measuring the accuracy of p-values, we show that our maximum-likelihood-based algorithm is more accurate than existing regression-based and lookup table methods. We explore a more sophisticated way of modeling and estimating the score distributions (using a two-component mixture model and expectation maximization), but conclude that this does not improve significantly over simply ignoring scores with small E-values during estimation. Finally, we measure the classification accuracy of p-values estimated in different ways and observe that inaccurate p-values can, somewhat paradoxically, lead to higher classification accuracy. We explain this paradox and argue that statistical accuracy, not classification accuracy, should be the primary criterion in comparisons of similarity search methods that return p-values that adjust for target sequence length.},
-	Affiliation = {ACMC, Mathematics Department, The University of Queensland, Brisbane, Queensland, 4072 Australia. tbailey@sdsc.edu},
-	Aid = {10.1089/106652702760138637 {$[$}doi{$]$}},
-	Au = {Gribskov M},
-	Author = {Bailey, Timothy L and Gribskov, Michael},
-	Da = {20020806},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20030128},
-	Edat = {2002/08/07 10:00},
-	Jid = {9433358},
-	Journal = {J Comput Biol},
-	Keywords = {Algorithms and Computational Biology and *Computer Simulation and Mathematical Computing and Models, Statistical and Probability and Proteins/*chemistry and Sensitivity and Specificity and Sequence Alignment/*methods/statistics \& numerical data and Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Bailey/2002.pdf},
-	Mhda = {2003/01/29 04:00},
-	Number = {3},
-	Own = {NLM},
-	Pages = {575-93},
-	Pl = {United States},
-	Pmid = {12162893},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Estimating and evaluating the statistics of gapped local-alignment scores},
-	Volume = {9},
-	Year = {2002},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa+YIMjAwMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJ5b8rlcFQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkJhaWxleQAQAAgAAMFxjUkAAAARAAgAAL8r+DEAAAABABgARmvmAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpCYWlsZXk6MjAwMi5wZGYAAA4AEgAIADIAMAAwADIALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvQmFpbGV5LzIwMDIucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0JhaWxleS8yMDAyLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Altschul:1996am,
-	Affiliation = {National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, Maryland 20894, USA.},
-	Au = {Gish W},
-	Author = {Altschul, S F and Gish, W},
-	Da = {19961010},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19961010},
-	Edat = {1996/01/01},
-	Jid = {0212271},
-	Journal = {Methods Enzymol},
-	Keywords = {Algorithms and Amino Acid Sequence and Animals and Base Sequence and Comparative Study and DNA/*chemistry and *Databases, Factual and Drosophila/enzymology and Glutathione Transferase/chemistry/genetics and *Models, Genetic and Models, Statistical and Molecular Sequence Data and Plants/enzymology and Probability and Proteins/*chemistry and Regression Analysis and *Sequence Homology, Amino Acid and *Sequence Homology, Nucleic Acid and Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Altschul/1996.pdf},
-	Lr = {20031114},
-	Mhda = {1996/01/01 00:01},
-	Number = {0076-6879},
-	Own = {NLM},
-	Pages = {460-80},
-	Pl = {UNITED STATES},
-	Pmid = {8743700},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {EC 2.5.1.18 (Glutathione Transferase)},
-	Sb = {IM},
-	Si = {SWISSPROT/Q04522},
-	Stat = {MEDLINE},
-	Title = {Local alignment statistics},
-	Volume = {266},
-	Year = {1996},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa94IMTk5Ni5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLDr8x8yZQREYgQ0FSTwAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEFsdHNjaHVsABAACAAAwXGNSQAAABEACAAAvzJVlgAAAAEAGABGa94ARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkFsdHNjaHVsOjE5OTYucGRmAAAOABIACAAxADkAOQA2AC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0FsdHNjaHVsLzE5OTYucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0FsdHNjaHVsLzE5OTYucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Schaffer:2001pv,
-	Abstract = {PSI-BLAST is an iterative program to search a database for proteins with distant similarity to a query sequence. We investigated over a dozen modifications to the methods used in PSI-BLAST, with the goal of improving accuracy in finding true positive matches. To evaluate performance we used a set of 103 queries for which the true positives in yeast had been annotated by human experts, and a popular measure of retrieval accuracy (ROC) that can be normalized to take on values between 0 (worst) and 1 (best). The modifications we consider novel improve the ROC score from 0.758 +/- 0.005 to 0.895 +/- 0.003. This does not include the benefits from four modifications we included in the 'baseline' version, even though they were not implemented in PSI-BLAST version 2.0. The improvement in accuracy was confirmed on a small second test set. This test involved analyzing three protein families with curated lists of true positives from the non-redundant protein database. The modification that accounts for the majority of the improvement is the use, for each database sequence, of a position-specific scoring system tuned to that sequence's amino acid composition. The use of composition-based statistics is particularly beneficial for large-scale automated applications of PSI-BLAST.},
-	Affiliation = {National Center for Biotechnology Information, National Institutes of Health, 8600 Rockville Pike, Bethesda, MD 20894, USA. schaffer@helix.nih.gov},
-	Au = {Altschul SF},
-	Author = {Schaffer, A A and Aravind, L and Madden, T L and Shavirin, S and Spouge, J L and Wolf, Y I and Koonin, E V and Altschul, S F},
-	Da = {20010713},
-	Date-Modified = {2007-07-25 14:06:03 -0700},
-	Dcom = {20010906},
-	Edat = {2001/07/14 10:00},
-	Group = {Statistics},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Keywords = {Algorithms and Amino Acids/genetics and Animals and Computational Biology/methods/statistics \& numerical data and *Databases, Factual and Humans and Information Storage and Retrieval and Proteins/*genetics and Reproducibility of Results and Sensitivity and Specificity and Sequence Alignment/*methods and *Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Schaffer/2001.pdf},
-	Lr = {20041117},
-	Mhda = {2001/09/08 10:01},
-	Number = {14},
-	Own = {NLM},
-	Pages = {2994-3005},
-	Pl = {England},
-	Pmid = {11452024},
-	Pst = {ppublish},
-	Pt = {Review, Tutorial},
-	Pubm = {Print},
-	Rf = {60},
-	Rn = {0 (Proteins)},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements},
-	Volume = {29},
-	Year = {2001},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbQEIMjAwMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJwL8rjtIAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACFNjaGFmZmVyABAACAAAwXGNSQAAABEACAAAvyvxQgAAAAEAGABGbQEARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlNjaGFmZmVyOjIwMDEucGRmAAAOABIACAAyADAAMAAxAC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL1NjaGFmZmVyLzIwMDEucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL1NjaGFmZmVyLzIwMDEucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Altschul:2001ee,
-	Abstract = {The distribution of optimal local alignment scores of random sequences plays a vital role in evaluating the statistical significance of sequence alignments. These scores can be well described by an extreme-value distribution. The distribution's parameters depend upon the scoring system employed and the random letter frequencies; in general they cannot be derived analytically, but must be estimated by curve fitting. For obtaining accurate parameter estimates, a form of the recently described 'island' method has several advantages. We describe this method in detail, and use it to investigate the functional dependence of these parameters on finite-length edge effects.},
-	Affiliation = {National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894, USA. altschul@ncbi.nlm.nih.gov},
-	Au = {Hwa T},
-	Author = {Altschul, S F and Bundschuh, R and Olsen, R and Hwa, T},
-	Da = {20010110},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20010125},
-	Edat = {2001/01/05 11:00},
-	Jid = {0411011},
-	Journal = {Nucleic Acids Res},
-	Keywords = {Algorithms and Comparative Study and Computational Biology/methods/statistics \& numerical data and Likelihood Functions and Sequence Alignment/methods/*statistics \& numerical data and Sequence Analysis, Protein/methods/statistics \& numerical data and *Statistical Distributions},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Altschul/2001.pdf},
-	Lr = {20010518},
-	Mhda = {2001/02/28 10:01},
-	Number = {2},
-	Own = {NLM},
-	Pages = {351-61},
-	Pl = {ENGLAND},
-	Pmid = {11139604},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {The estimation of statistical parameters for local alignment score distributions},
-	Volume = {29},
-	Year = {2001},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa94IMjAwMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLD78rkIQAAAAAAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEFsdHNjaHVsABAACAAAwXGNSQAAABEACAAAvyvy9AAAAAEAGABGa94ARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkFsdHNjaHVsOjIwMDEucGRmAAAOABIACAAyADAAMAAxAC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0FsdHNjaHVsLzIwMDEucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0FsdHNjaHVsLzIwMDEucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Altschul:1990ee,
-	Abstract = {A new approach to rapid sequence comparison, basic local alignment search tool (BLAST), directly approximates alignments that optimize a measure of local similarity, the maximal segment pair (MSP) score. Recent mathematical results on the stochastic properties of MSP scores allow an analysis of the performance of this method as well as the statistical significance of alignments it generates. The basic algorithm is simple and robust; it can be implemented in a number of ways and applied in a variety of contexts including straightforward DNA and protein sequence database searches, motif searches, gene identification searches, and in the analysis of multiple regions of similarity in long DNA sequences. In addition to its flexibility and tractability to mathematical analysis, BLAST is an order of magnitude faster than existing sequence comparison tools of comparable sensitivity.},
-	Affiliation = {National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD 20894.},
-	Aid = {S0022283680799990 {$[$}pii{$]$}},
-	Au = {Lipman DJ},
-	Author = {Altschul, S F and Gish, W and Miller, W and Myers, E W and Lipman, D J},
-	Da = {19901205},
-	Date-Modified = {2008-05-29 11:57:57 -0700},
-	Dcom = {19901205},
-	Edat = {1990/10/05},
-	Gr = {LM05110/LM/NLM},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Keywords = {Algorithms and Amino Acid Sequence and *Base Sequence and Databases, Factual and *Mutation and Research Support, U.S. Gov't, P.H.S. and Sensitivity and Specificity and Sequence Homology, Nucleic Acid and *Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Altschul/1990.pdf},
-	Lr = {20041117},
-	Mhda = {2001/07/11 10:01},
-	Number = {3},
-	Own = {NLM},
-	Pages = {403--410},
-	Pl = {ENGLAND},
-	Pmid = {2231712},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Basic local alignment search tool},
-	Volume = {215},
-	Year = {1990},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGUAAAAAAGUAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGa94IMTk5MC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLEL8rlCBQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACEFsdHNjaHVsABAACAAAwXGNSQAAABEACAAAvyv2kAAAAAEAGABGa94ARmvPAEZq1QBGahsARmRoAECJQwACAENoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOkFsdHNjaHVsOjE5OTAucGRmAAAOABIACAAxADkAOQAwAC4AcABkAGYADwAIAAMAaABzAHIAEgA/VXNlcnMvYXRvbWljcGlyYXRlL0RvY3VtZW50cy9DYWx0ZWNoL0FydGljbGVzL0FsdHNjaHVsLzE5OTAucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJi4uLy4uLy4uLy4uL0FydGljbGVzL0FsdHNjaHVsLzE5OTAucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJRAlYCXwJqAm4CfAKDAowCtQK6Ar0AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACyg==}}
-
-@article{Zachariah:2005pm,
-	Abstract = {Sequence alignment underpins common tasks in molecular biology, including genome annotation, molecular phylogenetics, and homology modeling. Fundamental to sequence alignment is the placement of gaps, which represent character insertions or deletions. We assessed the ability of a generalized affine gap cost model to reliably detect remote protein homology and to produce high-quality alignments. Generalized affine gap alignment with optimal gap parameters performed as well as the traditional affine gap model in remote homology detection. Evaluation of alignment quality showed that the generalized affine model aligns fewer residue pairs than the traditional affine model but achieves significantly higher per-residue accuracy. We conclude that generalized affine gap costs should be used when alignment accuracy carries more importance than aligned sequence length.},
-	Affiliation = {Department of Plant and Microbial Biology, University of California, Berkeley, USA.},
-	Aid = {10.1002/prot.20299 {$[$}doi{$]$}},
-	Au = {Brenner SE},
-	Author = {Zachariah, Marcus A and Crooks, Gavin E and Holbrook, Stephen R and Brenner, Steven E},
-	Ci = {(c) 2004 Wiley-Liss, Inc.},
-	Da = {20041229},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Edat = {2004/11/25 09:00},
-	Gr = {1 K22 HG00056/HG/NHGRI},
-	Jid = {8700181},
-	Journal = {Proteins},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Zachariah/2005.pdf},
-	Mhda = {2004/11/25 09:00},
-	Number = {2},
-	Own = {NLM},
-	Pages = {329-38},
-	Pl = {United States},
-	Pmid = {15562515},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	Stat = {In-Process},
-	Title = {A generalized affine gap model significantly improves protein sequence alignment accuracy},
-	Volume = {58},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbVgIMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnK7L8rlgVQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACVphY2hhcmlhaAAAEAAIAADBcY1JAAAAEQAIAAC/K/h1AAAAAQAYAEZtWABGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6WmFjaGFyaWFoOjIwMDUucGRmAA4AEgAIADIAMAAwADUALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvWmFjaGFyaWFoLzIwMDUucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvWmFjaGFyaWFoLzIwMDUucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@book{Isaev:2004gl,
-	Author = {Isaev, Alexander},
-	Date-Added = {2005-08-01 11:28:13 -0700},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Publisher = {Springer},
-	Title = {Introduction to Mathematical Methods in Bioinformatics},
-	Year = {2004}}
-
-@article{Dill:1985la,
-	Author = {Dill, K A},
-	Date-Added = {2005-08-01 11:08:55 -0700},
-	Date-Modified = {2008-05-29 11:58:24 -0700},
-	Journal = {Biochemistry},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Dill/1985.pdf},
-	Pages = {1501--1509},
-	Title = {Theory for the folding and stability of globular proteins},
-	Volume = {24},
-	Year = {1985},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGIAAAAAAGIAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbCgIMTk4NS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnJ378GqgZQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABERpbGwAEAAIAADBcY1JAAAAEQAIAAC/Bwx2AAAAAQAYAEZsKABGa88ARmrVAEZqGwBGZGgAQIlDAAIAP2hzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6RGlsbDoxOTg1LnBkZgAADgASAAgAMQA5ADgANQAuAHAAZABmAA8ACAADAGgAcwByABIAO1VzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9EaWxsLzE5ODUucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QIi4uLy4uLy4uLy4uL0FydGljbGVzL0RpbGwvMTk4NS5wZGbSHh8mJ6InI1xOU0RpY3Rpb25hcnkACAARABoAHwApADIANwA6AD8AQQBTAFwAYgBpAHAAeACDAIUAiACKAIwAjwCRAJMAnQCqAK8AtwC5AkUCSgJTAl4CYgJwAncCgAKlAqoCrQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAK6}}
-
-@book{BLAST,
-	Author = {Korf, I and Yandell, M and Bedell, J},
-	Date-Added = {2005-08-01 11:00:22 -0700},
-	Date-Modified = {2008-05-29 11:57:23 -0700},
-	Publisher = {O'Reilly \& Associates, Inc},
-	Title = {BLAST},
-	Year = {2003}}
-
-@article{Karlin:1990be,
-	Abstract = {An unusual pattern in a nucleic acid or protein sequence or a region of strong similarity shared by two or more sequences may have biological significance. It is therefore desirable to know whether such a pattern can have arisen simply by chance. To identify interesting sequence patterns, appropriate scoring values can be assigned to the individual residues of a single sequence or to sets of residues when several sequences are compared. For single sequences, such scores can reflect biophysical properties such as charge, volume, hydrophobicity, or secondary structure potential; for multiple sequences, they can reflect nucleotide or amino acid similarity measured in a wide variety of ways. Using an appropriate random model, we present a theory that provides precise numerical formulas for assessing the statistical significance of any region with high aggregate score. A second class of results describes the composition of high-scoring segments. In certain contexts, these permit the choice of scoring systems which are "optimal" for distinguishing biologically relevant patterns. Examples are given of applications of the theory to a variety of protein sequences, highlighting segments with unusual biological features. These include distinctive charge regions in transcription factors and protooncogene products, pronounced hydrophobic segments in various receptor and transport proteins, and statistically significant subalignments involving the recently characterized cystic fibrosis gene.},
-	Affiliation = {Department of Mathematics, Stanford University, CA 94305.},
-	Au = {Altschul SF},
-	Author = {Karlin, S and Altschul, S F},
-	Da = {19900425},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19900425},
-	Edat = {1990/03/01},
-	Gr = {GM39907-02/GM/NIGMS},
-	Jid = {7505876},
-	Journal = {Proc Natl Acad Sci USA},
-	Keywords = {*Amino Acid Sequence and Analysis of Variance and *Base Sequence and *Evolution and *Models, Genetic and *Models, Statistical and Nucleic Acids/genetics and Probability and Proteins/genetics and Research Support, U.S. Gov't, Non-P.H.S. and Research Support, U.S. Gov't, P.H.S.},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Karlin/1990.pdf},
-	Lr = {20041117},
-	Mhda = {1990/03/01 00:01},
-	Number = {6},
-	Oid = {NASA: 90192788},
-	Own = {NLM},
-	Pages = {2264-8},
-	Pl = {UNITED STATES},
-	Pmid = {2315319},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Rn = {0 (Proteins)},
-	Sb = {S},
-	Stat = {MEDLINE},
-	Title = {Methods for assessing the statistical significance of molecular sequence features by using general scoring schemes},
-	Volume = {87},
-	Year = {1990},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbHwIMTk5MC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnKCb8WnnFQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABkthcmxpbgAQAAgAAMFxjUkAAAARAAgAAL8XAOEAAAABABgARmx8AEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpLYXJsaW46MTk5MC5wZGYAAA4AEgAIADEAOQA5ADAALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvS2FybGluLzE5OTAucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL0thcmxpbi8xOTkwLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@article{Kschischo:2005ct,
-	Abstract = {Sequence alignment has been an invaluable tool for finding homologous sequences. The significance of the homology found is often quantified statistically by p-values. Theory for computing p-values exists for gapless alignments [Karlin, S., Altschul, S.F., 1990. Methods for assessing the statistical significance of molecular sequence features by using general scoring schemes. Proc. Natl. Acad. Sci. USA 87, 2264-2268; Karlin, S., Dembo A., 1992. Limit distributions of maximal segmental score among Markov-dependent partial sums. Adv. Appl. Probab. 24, 13-140], but a full generalization to alignments with gaps is not yet complete. We present a unified statistical analysis of two common sequence comparison algorithms: maximum-score (Smith-Waterman) alignments and their generalized probabilistic counterparts, including maximum-likelihood alignments and hidden Markov models. The most important statistical characteristic of these algorithms is the distribution function of the maximum score S(max), resp. the maximum free energy F(max), for mutually uncorrelated random sequences. This distribution is known empirically to be of the Gumbel form with an exponential tail P(S(max)>x) approximately exp(-lambdax) for maximum-score alignment and P(F(max)>x) approximately exp(-lambdax) for some classes of probabilistic alignment. We derive an exact expression for lambda for particular probabilistic alignments. This result is then used to obtain accurate lambda values for generic probabilistic and maximum-score alignments. Although the result demonstrated uses a simple match-mismatch scoring system, it is expected to be a good starting point for more general scoring functions.},
-	Affiliation = {University of Applied Sciences Koblenz, RheinAhrCampus Remagen, Sudallee 2, 53424 Remagen, Germany. kschischo@rheinahrcampus.de},
-	Aid = {10.1016/j.bulm.2004.07.001 {$[$}doi{$]$}},
-	Au = {Yu YK},
-	Author = {Kschischo, Maik and Lassig, Michael and Yu, Yi-Kuo},
-	Da = {20050204},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {20050517},
-	Edat = {2005/02/05 09:00},
-	Jid = {0401404},
-	Journal = {Bull Math Biol},
-	Keywords = {Algorithms and Computational Biology/methods and Likelihood Functions and Markov Chains and *Models, Statistical and Sequence Alignment/methods/*statistics \& numerical data},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Kschischo/2005.pdf},
-	Mhda = {2005/05/18 09:00},
-	Number = {1},
-	Own = {NLM},
-	Pages = {169-91},
-	Phst = {2004/07/13 {$[$}accepted{$]$}},
-	Pl = {United States},
-	Pmid = {15691544},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Toward an accurate statistics of gapped alignments},
-	Volume = {67},
-	Year = {2005},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGWAAAAAAGWAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbI4IMjAwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLIL8ncBZQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAACUtzY2hpc2NobwAAEAAIAADBcY1JAAAAEQAIAAC/J9KGAAAAAQAYAEZsjgBGa88ARmrVAEZqGwBGZGgAQIlDAAIARGhzcjpVc2VyczphdG9taWNwaXJhdGU6RG9jdW1lbnRzOkNhbHRlY2g6QXJ0aWNsZXM6S3NjaGlzY2hvOjIwMDUucGRmAA4AEgAIADIAMAAwADUALgBwAGQAZgAPAAgAAwBoAHMAcgASAEBVc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvS3NjaGlzY2hvLzIwMDUucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAnLi4vLi4vLi4vLi4vQXJ0aWNsZXMvS3NjaGlzY2hvLzIwMDUucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQJTAlgCYQJsAnACfgKFAo4CuAK9AsAAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAACzQ==}}
-
-@article{Pearson:1998vg,
-	Abstract = {The FASTA package of sequence comparison programs has been modified to provide accurate statistical estimates for local sequence similarity scores with gaps. These estimates are derived using the extreme value distribution from the mean and variance of the local similarity scores of unrelated sequences after the scores have been corrected for the expected effect of library sequence length. This approach allows accurate estimates to be calculated for both FASTA and Smith-Waterman similarity scores for protein/protein, DNA/DNA, and protein/translated-DNA comparisons. The accuracy of the statistical estimates is summarized for 54 protein families using FASTA and Smith-Waterman scores. Probability estimates calculated from the distribution of similarity scores are generally conservative, as are probabilities calculated using the Altschul-Gish lambda, kappa, and eta parameters. The performance of several alternative methods for correcting similarity scores for library-sequence length was evaluated using 54 protein superfamilies from the PIR39 database and 110 protein families from the Prosite/SwissProt rel. 34 database. Both regression-scaled and Altschul-Gish scaled scores perform significantly better than unscaled Smith-Waterman or FASTA similarity scores. When the Prosite/ SwissProt test set is used, regression-scaled scores perform slightly better; when the PIR database is used, Altschul-Gish scaled scores perform best. Thus, length-corrected similarity scores improve the sensitivity of database searches. Statistical parameters that are derived from the distribution of similarity scores from the thousands of unrelated sequences typically encountered in a database search provide accurate estimates of statistical significance that can be used to infer sequence homology.},
-	Affiliation = {Department of Biochemistry, University of Virginia, Charlottesville 22908, USA.},
-	Aid = {S0022283697915254 {$[$}pii{$]$}},
-	Au = {Pearson WR},
-	Author = {Pearson, W R},
-	Da = {19980407},
-	Date-Modified = {2007-03-15 11:54:41 -0400},
-	Dcom = {19980407},
-	Edat = {1998/03/26},
-	Gr = {LM04969/LM/NLM},
-	Jid = {2985088R},
-	Journal = {J Mol Biol},
-	Keywords = {Animals and Comparative Study and Databases, Factual and Evaluation Studies and Humans and Mice and Regression Analysis and Research Support, Non-U.S. Gov't and Research Support, U.S. Gov't, P.H.S. and *Sequence Homology and Sequence Homology, Amino Acid and Sequence Homology, Nucleic Acid and *Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Pearson/1998.pdf},
-	Lr = {20041117},
-	Mhda = {1998/03/26 00:01},
-	Number = {1},
-	Own = {NLM},
-	Pages = {71-84},
-	Pl = {ENGLAND},
-	Pmid = {9514730},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Sb = {IM},
-	Stat = {MEDLINE},
-	Title = {Empirical statistical estimates for sequence similarity searches},
-	Volume = {276},
-	Year = {1998},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGQAAAAAAGQAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbNgIMTk5OC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAnLSb8n1oRQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAAB1BlYXJzb24AABAACAAAwXGNSQAAABEACAAAvyg49AAAAAEAGABGbNgARmvPAEZq1QBGahsARmRoAECJQwACAEJoc3I6VXNlcnM6YXRvbWljcGlyYXRlOkRvY3VtZW50czpDYWx0ZWNoOkFydGljbGVzOlBlYXJzb246MTk5OC5wZGYADgASAAgAMQA5ADkAOAAuAHAAZABmAA8ACAADAGgAcwByABIAPlVzZXJzL2F0b21pY3BpcmF0ZS9Eb2N1bWVudHMvQ2FsdGVjaC9BcnRpY2xlcy9QZWFyc29uLzE5OTgucGRmABMAAS8AABUAAgAT//8AANIeHyAhWCRjbGFzc2VzWiRjbGFzc25hbWWjISIjXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxAlLi4vLi4vLi4vLi4vQXJ0aWNsZXMvUGVhcnNvbi8xOTk4LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCTQJSAlsCZgJqAngCfwKIArACtQK4AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsU=}}
-
-@article{Murphy:2000lr,
-	Abstract = {Protein design experiments have shown that the use of specific subsets of amino acids can produce foldable proteins. This prompts the question of whether there is a minimal amino acid alphabet which could be used to fold all proteins. In this work we make an analogy between sequence patterns which produce foldable sequences and those which make it possible to detect structural homologs by aligning sequences, and use it to suggest the possible size of such a reduced alphabet. We estimate that reduced alphabets containing 10-12 letters can be used to design foldable sequences for a large number of protein families. This estimate is based on the observation that there is little loss of the information necessary to pick out structural homologs in a clustered protein sequence database when a suitable reduction of the amino acid alphabet from 20 to 10 letters is made, but that this information is rapidly degraded when further reductions in the alphabet are made.},
-	Affiliation = {Department of Chemistry, Rutgers University, Wright-Rieman Laboratories, 610 Taylor Road, Piscataway, NJ 08854-8087, USA.},
-	Annote = {MWL},
-	Au = {Murphy, LR},
-	Author = {Murphy, L R AND Wallqvist, A AND Levy, R M},
-	Da = {20000616},
-	Date-Added = {2007-01-22 17:34:51 -0800},
-	Date-Modified = {2008-05-29 12:13:24 -0700},
-	Dcom = {20000616},
-	Edat = {2000/04/25 09:00},
-	Group = {Alphabets; Reviewed; ROC; Forward; Printed; Backward},
-	Jid = {8801484},
-	Journal = {Protein Eng},
-	Keywords = {Algorithms AND Amino Acid Sequence AND Amino Acids AND Protein Folding AND Proteins/*chemistry AND Sequence Alignment AND Software},
-	Language = {eng},
-	Local-Url = {file://localhost/Users/atomicpirate/Documents/Caltech/Articles/Murphy/2000.pdf},
-	Lr = {20061115},
-	Mhda = {2000/06/24 11:00},
-	Month = {Mar},
-	Number = {3},
-	Own = {NLM},
-	Pages = {149--152},
-	Pmid = {10775656},
-	Pst = {ppublish},
-	Pt = {Journal Article},
-	Pubm = {Print},
-	Read = {Yes},
-	So = {Protein Eng 2000 Mar;13(3):149-52.},
-	Stat = {MEDLINE},
-	Title = {Simplified amino acid alphabets for protein fold recognition and implications for folding},
-	Volume = {13},
-	Year = {2000},
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQGOAAAAAAGOAAIAAANoc3IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADBcRzJSCsAAABGbMQIMjAwMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAlRsMEPmNtQREYgAAAAAAAEAAMAAAkgAAAAAAAAAAAAAAAAAAAABk11cnBoeQAQAAgAAMFxjUkAAAARAAgAAMEP+0sAAAABABgARmzEAEZrzwBGatUARmobAEZkaABAiUMAAgBBaHNyOlVzZXJzOmF0b21pY3BpcmF0ZTpEb2N1bWVudHM6Q2FsdGVjaDpBcnRpY2xlczpNdXJwaHk6MjAwMC5wZGYAAA4AEgAIADIAMAAwADAALgBwAGQAZgAPAAgAAwBoAHMAcgASAD1Vc2Vycy9hdG9taWNwaXJhdGUvRG9jdW1lbnRzL0NhbHRlY2gvQXJ0aWNsZXMvTXVycGh5LzIwMDAucGRmAAATAAEvAAAVAAIAE///AADSHh8gIVgkY2xhc3Nlc1okY2xhc3NuYW1loyEiI11OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QJC4uLy4uLy4uLy4uL0FydGljbGVzL011cnBoeS8yMDAwLnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkCSwJQAlkCZAJoAnYCfQKGAq0CsgK1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAsI=}}
-
-@comment{BibDesk Static Groups{
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<array>
-	<dict>
-		<key>group name</key>
-		<string>Journal Club</string>
-		<key>keys</key>
-		<string>Balter:2000fk,Bork:1992fk,Riddle:1997lr,Melo:2006lr,Grant:2004uq,Plaxco:1998kx</string>
-	</dict>
-	<dict>
-		<key>group name</key>
-		<string>TO PRINT</string>
-		<key>keys</key>
-		<string></string>
-	</dict>
-	<dict>
-		<key>group name</key>
-		<string>Web of Science</string>
-		<key>keys</key>
-		<string>Ng:2001le,Wistrand:2005oj,Brown:2007uk,Park:1998bq,Henikoff:2000ez,Tarnas:1998qm,Koonin:2000wj,Sadreyev:2003wo,Das:2000tx,Krishnamurthy:2007by,Karplus:1998lk,Olsen:2005fq,Bernardes:2007vf,Wistrand:2004wt,Eddy:1998ng,Wicker:2008hb,Head-Gordon:2001si,Won:2006eq</string>
-	</dict>
-</array>
-</plist>
-}}
-
-@comment{BibDesk Smart Groups{
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<array>
-	<dict>
-		<key>conditions</key>
-		<array>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Alphabet</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>3</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Backward</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-		</array>
-		<key>conjunction</key>
-		<integer>0</integer>
-		<key>group name</key>
-		<string>Alphabet - No backward search</string>
-	</dict>
-	<dict>
-		<key>conditions</key>
-		<array>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Alphabet</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>3</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Forward</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-		</array>
-		<key>conjunction</key>
-		<integer>0</integer>
-		<key>group name</key>
-		<string>Alphabet - No forward search</string>
-	</dict>
-	<dict>
-		<key>conditions</key>
-		<array>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>alphabet</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Annote</string>
-				<key>value</key>
-				<string>No ROC</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-		</array>
-		<key>conjunction</key>
-		<integer>0</integer>
-		<key>group name</key>
-		<string>Alphabet - No ROC</string>
-	</dict>
-	<dict>
-		<key>conditions</key>
-		<array>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Alphabet</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Reviewed</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-		</array>
-		<key>conjunction</key>
-		<integer>0</integer>
-		<key>group name</key>
-		<string>Alphabet - Reviewed</string>
-	</dict>
-	<dict>
-		<key>conditions</key>
-		<array>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Alphabets</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>3</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Printed</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-		</array>
-		<key>conjunction</key>
-		<integer>0</integer>
-		<key>group name</key>
-		<string>Alphabet - Unprinted</string>
-	</dict>
-	<dict>
-		<key>conditions</key>
-		<array>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Alphabet</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>4</integer>
-				<key>key</key>
-				<string>Read</string>
-				<key>value</key>
-				<string>No</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-		</array>
-		<key>conjunction</key>
-		<integer>0</integer>
-		<key>group name</key>
-		<string>Alphabet - Unread</string>
-	</dict>
-	<dict>
-		<key>conditions</key>
-		<array>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Alphabet</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>3</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Reviewed</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-		</array>
-		<key>conjunction</key>
-		<integer>0</integer>
-		<key>group name</key>
-		<string>Alphabet - Unreviewed</string>
-	</dict>
-	<dict>
-		<key>conditions</key>
-		<array>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Alphabet</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>3</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>ROC</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>3</integer>
-				<key>key</key>
-				<string>Annote</string>
-				<key>value</key>
-				<string>No ROC</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-		</array>
-		<key>conjunction</key>
-		<integer>0</integer>
-		<key>group name</key>
-		<string>Alphabet - WF ROC</string>
-	</dict>
-	<dict>
-		<key>conditions</key>
-		<array>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Fold Space</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Add Citation</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-		</array>
-		<key>conjunction</key>
-		<integer>0</integer>
-		<key>group name</key>
-		<string>Fold Space - Add Citation</string>
-	</dict>
-	<dict>
-		<key>conditions</key>
-		<array>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Cited</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Fold Space</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-		</array>
-		<key>conjunction</key>
-		<integer>0</integer>
-		<key>group name</key>
-		<string>Fold Space - Cited</string>
-	</dict>
-	<dict>
-		<key>conditions</key>
-		<array>
-			<dict>
-				<key>comparison</key>
-				<integer>3</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Cited</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Fold Space</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-		</array>
-		<key>conjunction</key>
-		<integer>0</integer>
-		<key>group name</key>
-		<string>Fold Space - Uncited</string>
-	</dict>
-	<dict>
-		<key>conditions</key>
-		<array>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Fold Space</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>3</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Reviewed</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-		</array>
-		<key>conjunction</key>
-		<integer>0</integer>
-		<key>group name</key>
-		<string>Fold Space - Unreviewed</string>
-	</dict>
-	<dict>
-		<key>conditions</key>
-		<array>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>LitSearch</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>3</integer>
-				<key>key</key>
-				<string>Local-Url</string>
-				<key>value</key>
-				<string>file</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>3</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>IBID</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-		</array>
-		<key>conjunction</key>
-		<integer>0</integer>
-		<key>group name</key>
-		<string>IBID - To submit</string>
-	</dict>
-	<dict>
-		<key>conditions</key>
-		<array>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>IBID</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>3</integer>
-				<key>key</key>
-				<string>Local-Url</string>
-				<key>value</key>
-				<string>file</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-		</array>
-		<key>conjunction</key>
-		<integer>0</integer>
-		<key>group name</key>
-		<string>IBID - Waiting</string>
-	</dict>
-	<dict>
-		<key>conditions</key>
-		<array>
-			<dict>
-				<key>comparison</key>
-				<integer>3</integer>
-				<key>key</key>
-				<string>Local-Url</string>
-				<key>value</key>
-				<string>file</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Local File</string>
-				<key>value</key>
-				<string>0</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-		</array>
-		<key>conjunction</key>
-		<integer>0</integer>
-		<key>group name</key>
-		<string>No PDF</string>
-	</dict>
-	<dict>
-		<key>conditions</key>
-		<array>
-			<dict>
-				<key>comparison</key>
-				<integer>2</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>LitSearch</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-			<dict>
-				<key>comparison</key>
-				<integer>3</integer>
-				<key>key</key>
-				<string>Group</string>
-				<key>value</key>
-				<string>Reviewed</string>
-				<key>version</key>
-				<string>1</string>
-			</dict>
-		</array>
-		<key>conjunction</key>
-		<integer>0</integer>
-		<key>group name</key>
-		<string>TO REVIEW</string>
-	</dict>
-</array>
-</plist>
-}}
diff --git a/interaction/output-context/bib_caltech_hp.cxt b/interaction/output-context/bib_caltech_hp.cxt
deleted file mode 100644
index 6181bae..0000000
--- a/interaction/output-context/bib_caltech_hp.cxt
+++ /dev/null
@@ -1,275 +0,0 @@
-B
-
-100
-70
-
-article0
-article1
-article2
-article3
-article4
-article5
-article6
-article7
-article8
-article9
-article10
-article11
-article12
-article13
-article14
-article15
-article16
-article17
-article18
-article19
-article20
-article21
-article22
-article23
-article24
-misc0
-misc1
-misc2
-article25
-article26
-article27
-article28
-article29
-article30
-article31
-article32
-article33
-article34
-article35
-article36
-article37
-article38
-article39
-article40
-article41
-article42
-inproceedings0
-article43
-inproceedings1
-article44
-book0
-article45
-article46
-article47
-article48
-article49
-article50
-article51
-article52
-article53
-article54
-article55
-article56
-article57
-article58
-article59
-article60
-article61
-article62
-article63
-article64
-article65
-article66
-article67
-article68
-article69
-article70
-article71
-article72
-article73
-article74
-article75
-article76
-article77
-article78
-article79
-article80
-article81
-article82
-article83
-article84
-article85
-article86
-article87
-article88
-inproceedings2
-article89
-article90
-article91
-article92
-abstract
-author
-date-added
-date-modified
-eprint
-journal
-number
-pages
-title
-url
-volume
-year
-bdsk-url-1
-date
-isi
-issn
-month
-publication-type
-bdsk-file-1
-m3
-ty
-address
-au
-da
-dcom
-dep
-doi
-edat
-group
-jid
-jt
-language
-lr
-mh
-mhda
-own
-phst
-pii
-pl
-pmid
-pst
-pt
-pubm
-rn
-sb
-so
-stat
-publisher
-bdsk-url-2
-keywords
-title1
-pmc
-si
-af
-di
-supplement
-booktitle
-gr
-ci
-annote
-isbn
-l3
-bdsk-file-2
-editor
-local-url
-rf
-co
-iso-source-abbreviation
-ps
-read
-XXXXXXXXXXXXX.........................................................
-XXXX.XXXX.XX.XXXXXX...................................................
-XXXX.XXXXXXXX.....XXX.................................................
-.XXX.X..XXXXX.....X...................................................
-.XXX.XXXX.XX..........................................................
-XXXX.XXXX.XXX..X..X..XXXXXXXXXXXXXXXXXXXXXXXXXX.......................
-XXXX.XXXXXXXX...X.X.......X.X..................XX.....................
-XXXX.XXXXXXXX.....X.X.......X....................XX...................
-XXXX.XXXX.XXX..X..X..XXXXXXXXXXXXXXXXXXXXXXXXXX....X..................
-.XXX.XXXX.XX......X...XXX..X.XXXXXXX.XXXXXXXXXX.......................
-XXXX.XXXX.XX......X..XXXX..X.XXXXXXX..XXXXX.XXX.......................
-XXXX.XXXX.XX...X..X..XXXX..X.XXXXXXX.XXXXXXXXXX.....X.................
-XXXX.XXXX.XXX.....X..XXXXX.X.XXXXXXXXXXXXXXXXXX....XX.................
-.XXX.X..XXXXX.....X.........X.........................................
-XXXX.XXXX.XX.XXXXX...................................X................
-XXXX.X.XX.XX.XXXXX...................................XX...............
-XXXX.XXXX.XX.XXXXX...................................X................
-XXXX.X.XX.XX..XX.X...................................XXX..............
-XXXX.XXXX.XX.XXXXX....................................................
-XXXX.XXXX.XX.XXXXX....................................................
-XXXX.X.XX.XX.XXXXX....................................X...............
-XXXX.XXXX.XX.XXXXX....................................................
-XXXX.XXXX.XX.XXXXX....................................................
-XXXX.XXXX.XX..XX.X....................................................
-XXXX.XXXX.XX.XXXXX....................................................
-.XXX.X.XX.XX..XX.X......................................X.............
-.XXX.X.XX.XX..XX.X......................................X.............
-.XXX.X.XX.XX..XX.X......................................X.............
-XXXX.XXXX.XX..XX.X....................................................
-XXXX.XXXX.XX..XX.X....................................................
-XXXX.XXXX.XX.XXXXX....................................................
-XXXX.XXXX.XX..XX.X....................................................
-XXXX.XXXX.XX......X..XXXX..XXXXXXXXX..XXXXXXXXX..........X............
-XXXX.X.XX.XX...X..X..XXXX..XXXXXXXXX..XXXXXXXXX..........X............
-XXXX.X.XX.XX...X..X..XXXX..XXXXXXXXX..XXXXXXXXX.......................
-XXXX.X.XX.XX...X..X..XXXX..XXXXXXXXX.XXXXXXXXXX...........X...........
-XXXX.XXXX.XX...X..X..XXXX..XXXXXXXXX..XXXXXXXXX.......................
-XXXX.XXXX.XXX..X..X..XXXX.XXXXXXXXXX..XXXXXXXXX..........XX...........
-XXXX.XXXX.XXX..X..X..XXXX.XXXXXXXXXX..XXXXXXXXX...........XX..........
-XXXX.XXXX.XXX..X..X..XXXX.XXXXXXXXXX..XXXXXXXXX...........X...........
-XXXX.XXXX.XXX.....X.X............................X....................
-.XXX.XXXX.XX..XX.XX.........X.........................................
-.XXX.XXXX.XX..X..XX.........X.........................................
-XXXX.XXXX.XX...X.....XXXX..X.XXX.XXX..XXXXXXXXX.......................
-XXXX.XXXX.XX...X.....XXXX..X.XXXXXXX.XXXXXXXXXX.......................
-.XXX.XXXXXXXX.....XXX..........................X...........XXXX.......
-.XXX....X..X......X.........X...........................X.............
-XXXX.XXXX.XXX..X..X..XXXXXXX.XXXXXXXXXXXXXXXXXX............X..........
-.XXX....XX.XX.....X..X.........................X........X......X......
-.XXX.XXXX.XXX...X.X.............................X...............X.....
-.XXX....X..X...................................X...........X..........
-XXXX.XXXX.XX...X..X..XXXX..X.XXXX.XX..XXXXXXXXX..X.......X......XX....
-XXXX.XXXX.XXX.....X..XXXX..X.XXXX.XX.XXXXXXXXXX..X..............X.....
-XXXX.XXXX.XXX..X..X....X..XX.X.X..XX.X.XXXX..XX.................X.....
-XXXX.XXXX.XX...X..X..XXXX..X.XXXX.XX..XXXXX.XXX..X..............X.....
-XXXX.XXXX.XX...X..X..XXXX..X.XXXX.XX.XXXXXXXXXX..X.......X......X.....
-XXXX.XXXX.XX...X..X..XXXX..X.XXXX.XX..XXXXXXXXX..X..............X.....
-XXXX.XXXX.XX.XXX.XX..............................X..............X.XXX.
-XXXX.XXXX.XXX..X..X..XXX.XXX.XXX..XXXXXXXXX.XXX.................X.....
-XXXX.XXXX.XXX.....X.X............................X..............X.....
-XXXX.XXXX.XXX..X..X..XXXXXXX.XXX..XXXXXXXXX.XXX..X..............X.....
-XXXX.XXXX.XX...X..X..XXXX..X.XXXX.XX..XXXXXXXXX..X..X....X......X.....
-.XXX.XXXX.XX...X..X...XXX..XXXXXX.XX..XXXXXXXXX..X..............X.....
-XXXX.XXXX.XXX..X..X..XXXX.XX.XXXX.XX.XXXXXXXXXX..X.......X......X.....
-XXXX.X.XX.XXX.....X..XXX.X.X.XXX..XXXXXXXXX.XXX..........X......X.....
-XXXX.XXXX.XXX..X..X..XXX.XXX.XXX..XXXXXXXXX.XXX..........X......X.....
-XXXX.XXXX.XXX.....X..XXXX..XXXXXX.XX.XXXXXXXXXX..X.......XX.....X....X
-XXXX.XXXXXXXX.....X.X.............................X.............X.....
-XXXX.XXXXXXXX.....X.X.............................X.............X.....
-XXXX.X..X..XX..X..X..X.X.XXX.X.X..XX.X.XXXX..XX.................X.....
-XXXX.XXXX.XXX..X..X..XXX.XXX.XXX..XXXXXXXXX.XXX.................X.....
-XXXX.XXXX.XXX..X..X..XXXXXXX.XXX..XXXXXXXXXXXXX..X..X....X.X....X.....
-.XXXXXXXXXXXX.....X.......X.X...................X...............X.....
-.XXX.X.XX.XX.XXXXXX.........X........................XX.........X.....
-.XXX.XXXX.XX..XX.XX.........X...................................X.....
-.XXX.XXXX.XX..XX.XX.........X...................................X.....
-XXXX.XXXX.XXX..X..X..XXX..XX.XXX..XX.XXXXXX.XXX............X....X.....
-XXXX.XXXX.XX...X..X..XXXX..XXXXXX.XX..XXXXXXXXX..X..............X.....
-.XXX.XXXX.XX.XXXXXX.........X...................................X.....
-.XXX.XXXX.XX.XXXXXX.........X...................................X.....
-.XXX.XXXX.XX.XXXXXX.........X...................................X.....
-.XXX.X.XX.XX..XX.XX.........X..............................X....X.....
-XXXX.XXXX.XXX..X..X..XXXX.XX.XXXX.XX.XXXXXX.XXX..X..............XX....
-.XXX.XXXX.XX.XXXXXX.........X........................X..........X.....
-.XXX.XXXX.XX.XX.XXX.........X...................................X....X
-.XXX.XXXX.XX.XXXXXX.........X...................................X.....
-.XXX.XXXX.XX.XXXXXX.........X...................................X.....
-.XXX.XXXX.XX.XXXXXX.........X...................................X.....
-.XXX.XXXX.XX.XXXXXX.........X..............................X....X.....
-.XXX.XXXX.XX.XXXXXX.........X...................................X.....
-.XXX.XXXX.XX..XX.XX.........X..............................X....X.....
-.XXX.XXXX.XX.XXXXXX.........X..............................X....X.....
-.XXX.XXXX.XX.XXXXXX.........X..............................X....X.....
-.XXX.XXXX.XX.XXXXXX.........X...................................X.....
-.XXX.XXXX.XX.XXXXXX.........X...................................X.....
-.XXX.X.XX.XX..XX.XX.........X...........................X..X....X.....
-.XXX.XXXX.XX.XXXXXX.........X..............................X....X.....
-.XXX.XXXX.XX..XX.XX.........X..............................X....X.....
-.XXX.XXXX.XX.XXXXXX.........X...................................X.....
-.XXX.XXXX.XX.XXXXXX.........X..............................X....X.....
diff --git a/interaction/output-context/js_alle.cxt b/interaction/output-context/js_alle.cxt
deleted file mode 100644
index 65a4cef..0000000
--- a/interaction/output-context/js_alle.cxt
+++ /dev/null
@@ -1,606 +0,0 @@
-B
-
-293
-15
-
-bfh0
-bfh1
-bfh2
-bfh3
-bfh4
-bfh5
-bfh6
-bfh7
-bfh8
-bfh9
-epfl0
-epfl1
-epfl2
-epfl3
-epfl4
-epfl5
-epfl6
-epfl7
-epfl8
-epfl9
-epfl10
-epfl11
-epfl12
-epfl13
-epfl14
-epfl15
-epfl16
-epfl17
-epfl18
-epfl19
-epfl20
-epfl21
-epfl22
-epfl23
-epfl24
-epfl25
-epfl26
-epfl27
-epfl28
-epfl29
-epfl30
-epfl31
-epfl32
-epfl33
-epfl34
-epfl35
-epfl36
-epfl37
-epfl38
-epfl39
-epfl40
-epfl41
-epfl42
-epfl43
-epfl44
-epfl45
-epfl46
-epfl47
-epfl48
-epfl49
-epfl50
-epfl51
-epfl52
-epfl53
-epfl54
-epfl55
-epfl56
-epfl57
-fhnw0
-fhnw1
-fhnw2
-fhnw3
-fhnw4
-fhnw5
-fhnw6
-fhnw7
-fhnw8
-fhnw9
-fhnw10
-fhnw11
-fhnw12
-fhnw13
-fhnw14
-fhnw15
-fhnw16
-fhnw17
-fhnw18
-fhnw19
-fhnw20
-fhnw21
-fhnw22
-fhnw23
-fho0
-fho1
-fho2
-fho3
-fho4
-fho5
-fho6
-fho7
-hes-so0
-hes-so1
-hes-so2
-hes-so3
-hslu0
-hslu1
-hslu2
-hslu3
-hslu4
-hslu5
-hslu6
-hslu7
-hslu8
-hslu9
-hslu10
-hslu11
-hslu12
-hslu13
-hslu14
-hslu15
-supsi0
-supsi1
-supsi2
-supsi3
-supsi4
-supsi5
-supsi6
-unibas0
-unibas1
-unibas2
-unibas3
-unibas4
-unibas5
-unibe0
-unibe1
-unibe2
-unibe3
-unibe4
-unibe5
-unibe6
-unibe7
-unibe8
-unibe9
-unibe10
-unibe11
-unibe12
-unibe13
-unibe14
-unifr0
-unifr1
-unifr2
-unifr3
-unifr4
-unifr5
-unifr6
-unifr7
-unifr8
-unifr9
-unifr10
-unifr11
-unifr12
-unige0
-unige1
-unige2
-unige3
-unige4
-unige5
-unige6
-unige7
-unige8
-unige9
-unige10
-unige11
-unige12
-unige13
-unige14
-unige15
-unige16
-unige17
-unige18
-unige19
-unige20
-unige21
-unige22
-unige23
-unige24
-unil0
-unil1
-unil2
-unil3
-unil4
-unil5
-unil6
-unil7
-unil8
-unil9
-unil10
-unil11
-unil12
-unil13
-unine0
-unine1
-unine2
-unine3
-unine4
-unine5
-unisg0
-unisg1
-unisg2
-unisg3
-unisg4
-unisg5
-unisg6
-unisg7
-usi0
-usi1
-usi2
-usi3
-usi4
-usi5
-usi6
-usi7
-usi8
-usi9
-usi10
-usi11
-usi12
-usi13
-usi14
-usi15
-usi16
-usi17
-usi18
-usi19
-usi20
-usi21
-usi22
-usi23
-usi24
-usi25
-usi26
-usi27
-uzh0
-uzh1
-uzh2
-uzh3
-uzh4
-uzh5
-uzh6
-uzh7
-uzh8
-uzh9
-uzh10
-uzh11
-uzh12
-uzh13
-uzh14
-uzh15
-uzh16
-uzh17
-uzh18
-uzh19
-uzh20
-zfh0
-zfh1
-zfh2
-zfh3
-zfh4
-zfh5
-zfh6
-zfh7
-zfh8
-zfh9
-zfh10
-zfh11
-zfh12
-zfh13
-zfh14
-zfh15
-zfh16
-zfh17
-zfh18
-zfh19
-zfh20
-zfh21
-zfh22
-zfh23
-zfh24
-zfh25
-zfh26
-zfh27
-zfh28
-zfh29
-file
-home-page
-group-page
-org
-inst
-label
-given-name
-title
-family-name
-email
-group
-since
-role
-fields
-representative
-XXXXXXXXXXXX...
-XXXXXXXXXXX.XXX
-XXXXXXXXXXXX.X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.XX
-XXXXXXXXXXX..X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXX..X.
-XX.XXXXXXXXXXX.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XX.XXXXXXX..X..
-XX.XXXXXXX..X..
-XX.XXXXXXX..X..
-XX.XXXXXXX..X..
-XX.XXXXXXX..X..
-XX.XXXXXXX..X..
-XX.XXXXXXX..X..
-XX.XXXXXXX..X..
-XX.XXXXXXX..X..
-XX.XXXXXXX..X..
-XX.XXXXXXX..X..
-XX.XXXXXXX..X..
-X..XXXXXX...X..
-XX.XXXXXXX.X.XX
-XXXXXXXXXXXXXX.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXX.XXXX.X.
-X..XXXX.XX.X.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.XX
-XXXXXXXXXXXX.XX
-XXXXXXXXXXXX.XX
-XXXXXXXXXXXX.XX
-XXXXXXXXXXXX.XX
-XXXXXXXXXXXX.XX
-XXXXXXXXXXXX.XX
-XXXXXXXXXXXX.XX
-XXXXXXXXXXX..X.
-XX.XXXXXXX...XX
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.XX
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-X..XXXXXXX.X.XX
-X..XXXXXXX.X.XX
-X..XXXXXXX.X.XX
-XX.XXXXXXX.X.XX
-X..XXXXXXX.X.XX
-XX.XXXXXXX.X.XX
-X..XXXXXXX.X.XX
-XXXXXXXXXXX..X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.XX
-XXXXXXXXXXXX.X.
-XX.XXXXXXX..X..
-XX.XXXXXXX..X..
-XXXXXXXXXXX..X.
-XX.XXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXX.XXX
-XXXXXXXXXXXX.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX..XX.
-XX.XXXXXXX...X.
-X..XXXXXXX..X..
-XX.XXXXXXX..X..
-XX.XXXXXXX..X..
-XX.XXXXXXX.....
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXXX.XX
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-X..XXXXXXX..X..
-XX.XXXXXXX..X..
-X..XXXXXXX..X..
-X..XXXXXXX..X..
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX.XX.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.XX
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XX.XXXXXXX...X.
-XX.XXXXXXX...X.
-XX.XXXXXXXXX.X.
-XX.XXXXXXXX..X.
-XX.XXXXXXX.X.XX
-XX.XXXXXXX.X.X.
-XX.XXXXXXX...X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX...X.
-XX.XXXXXXX..XX.
-XX.XXXXXXX..XX.
-XX.XXXXXXX..XX.
-XX.XXXXXXX..XX.
-X..XXXXXXX..X..
-X..XXXXXXX..X..
-XXXXXXXXXXX..XX
-XXXXXXXXXX...X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXX..X.
-XX.XXXXXXX...X.
-XX.XXXXXXX...X.
-XX.XXXXXXX...X.
-XX.XXXXXXX.XXXX
-XX.XXXXXXX..XX.
-XX.XXXXXXX...X.
-XXXXXXXXXXX..X.
-XX.XXXXXXX..X..
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.XX
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XXXXXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-XX.XXXXXXX.X.X.
-X..XXXXXXX.XX..
-XX.XXXXXXX..X..
-XX.XXXXXXX.XX..
-XX.XXXXXXX.XX..
-XXXXXXXXXXXXXX.
-XXXXXXXXXXXXXX.
-XXXXXXXXXXXXXXX
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XX.XXXXXXX.X.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXX..X.
-XXXXXXXXXXXX.X.
-XXXXXXXXXXXX.X.
-XX.XXXX.XX.XX..
-XX.XXXXXXXX....
-X..XXXXXXXX....
-XX.XXXXXXXX....
-XX.XXXXXXXX....
-X..XXXXXXXX....
-X..XXXXXXXX....
-X..XXXXXXXX....
-X..XXXXXXXX....
-X..XXXX.XXX....
-XX.XXXXXXXX....
-X..XXXXXXXX....
-X..XXXX.XXX....
-X..XXXXXXXX....
-X..XXXX.XXX....
-X.XXXXXXXXX....
-X.XXXXXXXXX....
-X.XXXXXXXXX....
-X.XXXXX.XXX....
-X.XXXXXXXXX....
-X.XXXXXXXXX....
-X.XXXXXXXXX....
-X.XXXXXXXXX....
-XXXXXXXXXXX..X.
-X..XXXXXXXX...X
-X..XXXXXXXX....
-X..XXXX.XXX....
-X.XXXXXXXXX....
-X.XXXXXXXXX....
-XX.XXXXXXXX....
diff --git a/src/driver/Driver.java b/src/driver/Driver.java
index cedc457..71b5918 100644
--- a/src/driver/Driver.java
+++ b/src/driver/Driver.java
@@ -1,6 +1,5 @@
 package driver;
 
-import java.io.File;
 import java.util.ArrayList;
 
 import datasets.*;
@@ -17,6 +16,7 @@ public static void main(String[] args){
 		String graphvizFolder = System.getProperty("user.dir") + "\\interaction\\output-graph";
 		ArrayList<SemiStructuredDataset> datasets = new ArrayList<SemiStructuredDataset>();
 		ParserFactory factory = new ParserFactory();
+		//TODO: Add option to choose graphviz conversion file format
 		
 		//add datasets to be processed
 		datasets.add(new BibtexDataset(inputFolder + "\\" + "caltech_hp.bib"));
@@ -83,7 +83,7 @@ private static void parseDocument(SemiStructuredDataset dataset, String outputFo
 			System.out.println("noSing2\t---" + "\t" + lattice.latticeStats());	//if we have deleted singleton objects
 			lattice.exportLatticeToFile(graphvizFolder + "\\" + "0c_withoutSingletons2_" + parser.getTargetLatticeFilename(fileName));
 			graphvizString += "dot \"" + graphvizFolder + "\\" + "0c_withoutSingletons2_" + fileName(fileName) + ".dot\" -Tpng -o \"" + graphvizFolder + "\\images\\0c_withoutSingletons2_" + fileName(fileName) + ".png\"\n";
-		}		
+		}
 	
 		///LATTICEMERGE///
 		double score = cc.latticeMerge(firstOption, thirdOption);