From 7727bbd90c2fa1dc686e7ec0d193bbbefab7b04a Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Thu, 10 Oct 2024 13:39:44 -0700 Subject: [PATCH] Fix capitalization and UTF-8 characters in refs --- manuscript/cartography.bib | 85 ++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 45 deletions(-) diff --git a/manuscript/cartography.bib b/manuscript/cartography.bib index 867e1f9f..da0a5f2a 100644 --- a/manuscript/cartography.bib +++ b/manuscript/cartography.bib @@ -94,7 +94,7 @@ @Article{Wiens1998 } @article{Barrat-Charlaix2022, - title={TreeKnit: Inferring ancestral reassortment graphs of influenza viruses}, + title={{TreeKnit}: Inferring ancestral reassortment graphs of influenza viruses}, author={Barrat-Charlaix, Pierre and Vaughan, Timothy G and Neher, Richard A}, journal={PLoS Comput Biol}, volume={18}, @@ -146,18 +146,18 @@ @Article{Stoddard2022 Month="Mar" } -@article {Tran-Kiem2023, - author = {C{\'e}cile Tran-Kiem and Trevor Bedford}, - title = {Estimating the reproduction number and transmission heterogeneity from the size distribution of clusters of identical pathogen sequences}, - elocation-id = {2023.04.05.23287263}, - year = {2023}, - doi = {10.1101/2023.04.05.23287263}, - publisher = {Cold Spring Harbor Laboratory Press}, - abstract = {Quantifying transmission intensity and heterogeneity is crucial to ascertain the threat posed by infectious diseases and inform the design of interventions. Methods that jointly estimate the reproduction number R and the dispersion parameter k have however mainly remained limited to the analysis of epidemiological clusters or contact tracing data, whose collection often proves difficult. Here, we show that clusters of identical sequences are imprinted by the pathogen offspring distribution, and we derive an analytical formula for the distribution of the size of these clusters. We develop and evaluate a novel inference framework to jointly estimate the reproduction number and the dispersion parameter from the size distribution of clusters of identical sequences. We then illustrate its application across a range of epidemiological situations. Finally, we develop a hypothesis testing framework relying on clusters of identical sequences to determine whether a given pathogen genetic subpopulation is associated with increased or reduced transmissibility. Our work provides new tools to estimate the reproduction number and transmission heterogeneity from pathogen sequences without building a phylogenetic tree, thus making it easily scalable to large pathogen genome datasets.Significance statement For many infectious diseases, a small fraction of individuals has been documented to disproportionately contribute to onward spread. Characterizing the extent of superspreading is a crucial step towards the implementation of efficient interventions. Despite its epidemiological relevance, it remains difficult to quantify transmission heterogeneity. Here, we present a novel inference framework harnessing the size of clusters of identical pathogen sequences to estimate the reproduction number and the dispersion parameter. We also show that the size of these clusters can be used to estimate the transmission advantage of a pathogen genetic variant. This work provides crucial new tools to better characterize the spread of pathogens and evaluate their control.Competing Interest StatementThe authors have declared no competing interest.Funding StatementTB is a Howard Hughes Medical Institute Investigator. This work is supported by NIH NIGMS R35 GM119774 awarded to TB. Most of the analyses were completed using Fred Hutch Scientific Computing resources (NIH grants S10-OD-020069 and S10-OD-028685).Author DeclarationsI confirm all relevant ethical guidelines have been followed, and any necessary IRB and/or ethics committee approvals have been obtained.YesI confirm that all necessary patient/participant consent has been obtained and the appropriate institutional forms have been archived, and that any patient/participant/sample identifiers included were not known to anyone (e.g., hospital staff, patients or participants themselves) outside the research group so cannot be used to identify individuals.YesI understand that all clinical trials and any other prospective interventional studies must be registered with an ICMJE-approved registry, such as ClinicalTrials.gov. I confirm that any such study reported in the manuscript has been registered and the trial registration ID is provided (note: if posting a prospective study registered retrospectively, please provide a statement in the trial ID field explaining why the study was not registered in advance).YesI have followed all appropriate research reporting guidelines, such as any relevant EQUATOR Network research reporting checklist(s) and other pertinent material, if applicable.YesThe codes and data used in this paper can be found at https://github.com/blab/size-genetic-clusters. https://github.com/blab/size-genetic-clusters}, - URL = {https://www.medrxiv.org/content/early/2023/10/10/2023.04.05.23287263}, - eprint = {https://www.medrxiv.org/content/early/2023/10/10/2023.04.05.23287263.full.pdf}, - journal = {medRxiv} -} +@article{Tran-Kiem2023, +author = {C{\'e}cile Tran-Kiem and Trevor Bedford }, +title = {Estimating the reproduction number and transmission heterogeneity from the size distribution of clusters of identical pathogen sequences}, +journal = {Proceedings of the National Academy of Sciences}, +volume = {121}, +number = {15}, +pages = {e2305299121}, +year = {2024}, +doi = {10.1073/pnas.2305299121}, +URL = {https://www.pnas.org/doi/abs/10.1073/pnas.2305299121}, +eprint = {https://www.pnas.org/doi/pdf/10.1073/pnas.2305299121}, +abstract = {For many infectious diseases, a small fraction of individuals has been documented to disproportionately contribute to onward spread. Characterizing the extent of superspreading is a crucial step towards the implementation of efficient interventions. Despite its epidemiological relevance, it remains difficult to quantify transmission heterogeneity. Here, we present an inference framework harnessing the size of clusters of identical pathogen sequences to estimate the reproduction number and the dispersion parameter. We also show that the size of these clusters can be used to estimate the transmission advantage of a pathogen genetic variant. This work provides crucial tools to better characterize the spread of pathogens and evaluate their control. Quantifying transmission intensity and heterogeneity is crucial to ascertain the threat posed by infectious diseases and inform the design of interventions. Methods that jointly estimate the reproduction number R and the dispersion parameter k have however mainly remained limited to the analysis of epidemiological clusters or contact tracing data, whose collection often proves difficult. Here, we show that clusters of identical sequences are imprinted by the pathogen offspring distribution, and we derive an analytical formula for the distribution of the size of these clusters. We develop and evaluate an inference framework to jointly estimate the reproduction number and the dispersion parameter from the size distribution of clusters of identical sequences. We then illustrate its application across a range of epidemiological situations. Finally, we develop a hypothesis testing framework relying on clusters of identical sequences to determine whether a given pathogen genetic subpopulation is associated with increased or reduced transmissibility. Our work provides tools to estimate the reproduction number and transmission heterogeneity from pathogen sequences without building a phylogenetic tree, thus making it easily scalable to large pathogen genome datasets.}} @Article{OToole2021, Author="O'Toole, A. and Scher, E. and Underwood, A. and Jackson, B. and Hill, V. and McCrone, J. T. and Colquhoun, R. and Ruis, C. and Abu-Dahab, K. and Taylor, B. and Yeats, C. and du Plessis, L. and Maloney, D. and Medd, N. and Attwood, S. W. and Aanensen, D. M. and Holmes, E. C. and Pybus, O. G. and Rambaut, A. ", @@ -194,7 +194,7 @@ @article{Aksamentov2021 } @Article{Argimon2016, - Author="Argimón, S. and Abudahab, K. and Goater, R. J. E. and Fedosejev, A. and Bhai, J. and Glasner, C. and Feil, E. J. and Holden, M. T. G. and Yeats, C. A. and Grundmann, H. and Spratt, B. G. and Aanensen, D. M. ", + Author="Argim\'{o}n, S. and Abudahab, K. and Goater, R. J. E. and Fedosejev, A. and Bhai, J. and Glasner, C. and Feil, E. J. and Holden, M. T. G. and Yeats, C. A. and Grundmann, H. and Spratt, B. G. and Aanensen, D. M. ", Title="{{M}icroreact: visualizing and sharing data for genomic epidemiology and phylogeography}", Journal="Microb Genom", Year="2016", @@ -299,7 +299,7 @@ @Article{Vang2021 } @article{maaten2008visualizing, - title={Visualizing data using t-SNE}, + title={Visualizing data using {t-SNE}}, author={van der Maaten, Laurens and Hinton, Geoffrey}, journal={Journal of Machine Learning Research}, volume={9}, @@ -308,7 +308,7 @@ @article{maaten2008visualizing year={2008}, } @article{lel2018umap, - title={UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction}, + title={{UMAP}: Uniform Manifold Approximation and Projection for Dimension Reduction}, author={Leland McInnes and John Healy and James Melville}, year={2018}, eprint={1802.03426}, @@ -331,14 +331,6 @@ @article{jolliffe_cadima_2016 year={2016}, month={Apr} } -@article{pérez-losada_arenas_galán_palero_gonzález-candelas_2014, - title={Recombination in viruses: Mechanisms, methods of study, and evolutionary consequences}, - journal={Infection, Genetics and Evolution}, - publisher={Elsevier}, - author={Pérez-Losada, Marcos and Arenas, Miguel and Galán, Juan Carlos and Palero, Ferran and González-Candelas, Fernando}, - year={2015}, -} - @article{Stormo2011, title = "Maximally efficient modeling of {DNA} sequence motifs at all levels of complexity", @@ -433,14 +425,14 @@ @article{martin_murrell_khoosal_muhire_2017 year={2017}, } @article{novembre_2008, - title={Genes mirror geography within Europe}, + title={Genes mirror geography within {E}urope}, journal={Nature}, publisher={U.S. National Library of Medicine}, - author={Novembre, John and Johnson, Toby and Bryc, Katarzyna and Kutalik, Zoltán and Boyko, Adam R and Auton, Adam and Indap, Amit and King, Karen S and Bergmann, Sven and Nelson, Matthew R and et al.}, + author={Novembre, John and Johnson, Toby and Bryc, Katarzyna and Kutalik, Zolt\'{a}n and Boyko, Adam R and Auton, Adam and Indap, Amit and King, Karen S and Bergmann, Sven and Nelson, Matthew R and et al.}, year={2008}, } @article {faria, - author = {N. R. Faria, J. Quick, I.M. Claro, J. Thézé, J. G. de Jesus, M. Giovanetti, M. U. G. Kraemer, S. C. Hill, A. Black, A. C. da Costa, L. C. Franco, S. P. Silva, C.-H. Wu, J. Raghwani, S. Cauchemez, L. du Plessis, M. P. Verotti, W. K. de Oliveira, E. H. Carmo, G. E. Coelho, A. C. F. S. Santelli, L. C. Vinhal, C. M. Henriques, J. T. Simpson, M. Loose, K. G. Andersen, N. D. Grubaugh, S. Somasekar, C. Y. Chiu, J. E. Muñoz-Medina, C. R. Gonzalez-Bonilla, C. F. Arias, L. L. Lewis-Ximenez, S. A. Baylis, A. O. Chieppe, S. F. Aguiar, C. A. Fernandes, P. S. Lemos, B. L. S. Nascimento, H. A. O. Monteiro, I. C. Siqueira, M. G. de Queiroz, T. R. de Souza, J. F. Bezerra, M. R. Lemos, G. F. Pereira, D. Loudal, L. C. Moura, R. Dhalia, R. F. França, T. Magalhães, E. T. Marques Jr, T. Jaenisch, G. L. Wallau, M. C. de Lima, V. Nascimento, E. M. de Cerqueira, M. M. de Lima, D. L. Mascarenhas, J. P. Moura Neto, A. S. Levin, T. R. Tozetto-Mendoza, S. N. Fonseca, M. C. Mendes-Correa, F. P. Milagres, A. Segurado, E. C. Holmes, A. Rambaut, T. Bedford, M. R. T. Nunes, E. C. Sabino, L. C. J. Alcantara, N. J. Loman \& O. G. Pybus}, + author = {N. R. Faria, J. Quick, I.M. Claro, J. Th\'{e}z\'{e}, J. G. de Jesus, M. Giovanetti, M. U. G. Kraemer, S. C. Hill, A. Black, A. C. da Costa, L. C. Franco, S. P. Silva, C.-H. Wu, J. Raghwani, S. Cauchemez, L. du Plessis, M. P. Verotti, W. K. de Oliveira, E. H. Carmo, G. E. Coelho, A. C. F. S. Santelli, L. C. Vinhal, C. M. Henriques, J. T. Simpson, M. Loose, K. G. Andersen, N. D. Grubaugh, S. Somasekar, C. Y. Chiu, J. E. Mu\~{n}oz-Medina, C. R. Gonzalez-Bonilla, C. F. Arias, L. L. Lewis-Ximenez, S. A. Baylis, A. O. Chieppe, S. F. Aguiar, C. A. Fernandes, P. S. Lemos, B. L. S. Nascimento, H. A. O. Monteiro, I. C. Siqueira, M. G. de Queiroz, T. R. de Souza, J. F. Bezerra, M. R. Lemos, G. F. Pereira, D. Loudal, L. C. Moura, R. Dhalia, R. F. França, T. Magalhães, E. T. Marques Jr, T. Jaenisch, G. L. Wallau, M. C. de Lima, V. Nascimento, E. M. de Cerqueira, M. M. de Lima, D. L. Mascarenhas, J. P. Moura Neto, A. S. Levin, T. R. Tozetto-Mendoza, S. N. Fonseca, M. C. Mendes-Correa, F. P. Milagres, A. Segurado, E. C. Holmes, A. Rambaut, T. Bedford, M. R. T. Nunes, E. C. Sabino, L. C. J. Alcantara, N. J. Loman \& O. G. Pybus}, title = {Establishment and cryptic transmission of Zika virus in Brazil and the Americas}, journal = {Nature}, year = {2017}, @@ -467,7 +459,7 @@ @Article{Arita2021 } @Article{metsky_2017, - Author="Metsky, H. C. and Matranga, C. B. and Wohl, S. and Schaffner, S. F. and Freije, C. A. and Winnicki, S. M. and West, K. and Qu, J. and Baniecki, M. L. and Gladden-Young, A. and Lin, A. E. and Tomkins-Tinch, C. H. and Ye, S. H. and Park, D. J. and Luo, C. Y. and Barnes, K. G. and Shah, R. R. and Chak, B. and Barbosa-Lima, G. and Delatorre, E. and Vieira, Y. R. and Paul, L. M. and Tan, A. L. and Barcellona, C. M. and Porcelli, M. C. and Vasquez, C. and Cannons, A. C. and Cone, M. R. and Hogan, K. N. and Kopp, E. W. and Anzinger, J. J. and Garcia, K. F. and Parham, L. A. and Ramírez, R. M. G. and Montoya, M. C. M. and Rojas, D. P. and Brown, C. M. and Hennigan, S. and Sabina, B. and Scotland, S. and Gangavarapu, K. and Grubaugh, N. D. and Oliveira, G. and Robles-Sikisaka, R. and Rambaut, A. and Gehrke, L. and Smole, S. and Halloran, M. E. and Villar, L. and Mattar, S. and Lorenzana, I. and Cerbino-Neto, J. and Valim, C. and Degrave, W. and Bozza, P. T. and Gnirke, A. and Andersen, K. G. and Isern, S. and Michael, S. F. and Bozza, F. A. and Souza, T. M. L. and Bosch, I. and Yozwiak, N. L. and MacInnis, B. L. and Sabeti, P. C. ", + Author="Metsky, H. C. and Matranga, C. B. and Wohl, S. and Schaffner, S. F. and Freije, C. A. and Winnicki, S. M. and West, K. and Qu, J. and Baniecki, M. L. and Gladden-Young, A. and Lin, A. E. and Tomkins-Tinch, C. H. and Ye, S. H. and Park, D. J. and Luo, C. Y. and Barnes, K. G. and Shah, R. R. and Chak, B. and Barbosa-Lima, G. and Delatorre, E. and Vieira, Y. R. and Paul, L. M. and Tan, A. L. and Barcellona, C. M. and Porcelli, M. C. and Vasquez, C. and Cannons, A. C. and Cone, M. R. and Hogan, K. N. and Kopp, E. W. and Anzinger, J. J. and Garcia, K. F. and Parham, L. A. and Ram\'{i}rez, R. M. G. and Montoya, M. C. M. and Rojas, D. P. and Brown, C. M. and Hennigan, S. and Sabina, B. and Scotland, S. and Gangavarapu, K. and Grubaugh, N. D. and Oliveira, G. and Robles-Sikisaka, R. and Rambaut, A. and Gehrke, L. and Smole, S. and Halloran, M. E. and Villar, L. and Mattar, S. and Lorenzana, I. and Cerbino-Neto, J. and Valim, C. and Degrave, W. and Bozza, P. T. and Gnirke, A. and Andersen, K. G. and Isern, S. and Michael, S. F. and Bozza, F. A. and Souza, T. M. L. and Bosch, I. and Yozwiak, N. L. and MacInnis, B. L. and Sabeti, P. C. ", Title="{{Z}ika virus evolution and spread in the {A}mericas}", Journal="Nature", Year="2017", @@ -478,7 +470,7 @@ @Article{metsky_2017 } @article {rambaut_2008, - title={The genomic and epidemiological dynamics of human influenza A virus}, + title={The genomic and epidemiological dynamics of human influenza {A} virus}, url={https://www.nature.com/articles/nature06945}, journal={Nature }, publisher={Nature }, @@ -517,11 +509,11 @@ @article {sudmant_2015 } @article{diaz-papkovich_2019, - title={UMAP reveals cryptic population structure and phenotype heterogeneity in large genomic cohorts}, + title={{UMAP} reveals cryptic population structure and phenotype heterogeneity in large genomic cohorts}, url={https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1008432}, journal={PLoS Genet}, publisher={Public Library of Science}, - author={Diaz-Papkovich, Alex and Anderson-Trocmé, Luke and Ben-Eghan, Chief and Gravel, Simon}, + author={Diaz-Papkovich, Alex and Anderson-Trocm\'{e}, Luke and Ben-Eghan, Chief and Gravel, Simon}, year={2019}, month={Nov} } @@ -718,8 +710,8 @@ @article{campello2015hierarchical publisher={ACM New York, NY, USA} } @Article{molder_2021, - Author = { Mölder, F and Jablonski, KP and Letcher, B and Hall, MB and Tomkins-Tinch, CH and Sochat, V and Forster, J and Lee, S and Twardziok, SO and Kanitz, A and Wilm, A and Holtgrewe, M and Rahmann, S and Nahnsen, S and Köster, J}, - Title = {Sustainable data analysis with Snakemake [version 2; peer review: 2 approved] + Author = { M\"{o}lder, F and Jablonski, KP and Letcher, B and Hall, MB and Tomkins-Tinch, CH and Sochat, V and Forster, J and Lee, S and Twardziok, SO and Kanitz, A and Wilm, A and Holtgrewe, M and Rahmann, S and Nahnsen, S and K\"{o}ster, J}, + Title = {Sustainable data analysis with {Snakemake} [version 2; peer review: 2 approved] }, Journal = {F1000Research}, Volume = {10}, @@ -793,7 +785,7 @@ @book{HyndmanAthanasopoulos2021 author = {Hyndman, R.J. and Athanasopoulos, G.}, year = {2021}, title = {{F}orecasting: principles and practice}, - edition = {3rd edition}, + edition = {3rd}, publisher = {OTexts}, address = {Melbourne, Australia}, url = {OTexts.com/fpp3}, @@ -880,7 +872,7 @@ @article{Potter2019 @article{Zhu2020, author = {Zhu, Na and Zhang, Dingyu and Wang, Wenling and Li, Xingwang and Yang, Bo and Song, Jingdong and Zhao, Xiang and Huang, Baoying and Shi, Weifeng and Lu, Roujian and Niu, Peihua and Zhan, Faxian and Ma, Xuejun and Wang, Dayan and Xu, Wenbo and Wu, Guizhen and Gao, George F. and Tan, Wenjie}, - title = {A novel coronavirus from patients with pneumonia in China, 2019}, + title = {A novel coronavirus from patients with pneumonia in {C}hina, 2019}, journal = {New England Journal of Medicine}, volume = {382}, number = {8}, @@ -1015,22 +1007,25 @@ @inproceedings{Yang2006 year={2006} } -@misc{Delicado2024, +@article{Delicado2024, title={Multidimensional Scaling for Big Data}, - author={Pedro Delicado and Cristian Pachón-García}, + author={Pedro Delicado and Cristian Pach\'{o}n-Garc\'{i}a}, year={2024}, eprint={2007.11919}, - archivePrefix={arXiv}, - primaryClass={stat.CO} + journal={arXiv}, + doi={10.48550/arXiv.2007.11919}, + url={https://arxiv.org/abs/2007.11919} } -@misc{vandermaaten2013, - title={Barnes-Hut-SNE}, +@article{vandermaaten2013, + title={{Barnes-Hut-SNE}}, author={Laurens van der Maaten}, year={2013}, eprint={1301.3342}, - archivePrefix={arXiv}, - primaryClass={cs.LG} + journal={arXiv}, + primaryClass={cs.LG}, + doi={10.48550/arXiv.1301.3342}, + url={https://arxiv.org/abs/1301.3342} } @InProceedings{Yang2013, @@ -1052,7 +1047,7 @@ @InProceedings{Yang2013 @article{vandermaaten2014, author = {Laurens van der Maaten}, - title = {Accelerating t-SNE using Tree-Based Algorithms}, + title = {Accelerating {t-SNE} using Tree-Based Algorithms}, journal = {Journal of Machine Learning Research}, year = {2014}, volume = {15}, @@ -1074,7 +1069,7 @@ @Article{Kupperman2022 @article {Nguyen2024, author = {Nguyen, Thao-Quyen and Hutter, Carl and Markin, Alexey and Thomas, Megan and Lantz, Kristina and Killian, Mary Lea and Janzen, Garrett M. and Vijendran, Sriram and Wagle, Sanket and Inderski, Blake and Magstadt, Drew R. and Li, Ganwu and Diel, Diego G. and Frye, Elisha Anna and Dimitrov, Kiril M. and Swinford, Amy K. and Thompson, Alexis C. and Snevik, Kevin R. and Suarez, David L. and Spackman, Erica and Lakin, Steven M. and Ahola, Sara C. and Johnson, Kammy R. and Baker, Amy L. and Robbe-Austerman, Suelee and Torchetti, Mia Kim and Anderson, Tavis K.}, - title = {Emergence and interstate spread of highly pathogenic avian influenza A(H5N1) in dairy cattle}, + title = {Emergence and interstate spread of highly pathogenic avian influenza {A(H5N1)} in dairy cattle}, elocation-id = {2024.05.01.591751}, year = {2024}, doi = {10.1101/2024.05.01.591751},