From 4371dbe6c65771165475e0ef16ed2ba9d5028643 Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Wed, 24 Jul 2024 15:18:36 -0700 Subject: [PATCH] Cite related network-based visualizations Adds citations to the introduction to clarify the past use of map- or network-based visualizations in viral epidemiology and adds a note to the discussion that public health officials can visualize embeddings in standard tools for genomic epidemiology. Closes #104 --- manuscript/cartography.bib | 83 ++++++++++++++++++++++++++++++++++++++ manuscript/cartography.tex | 4 +- 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/manuscript/cartography.bib b/manuscript/cartography.bib index 98b3dd79..c0b2bd15 100644 --- a/manuscript/cartography.bib +++ b/manuscript/cartography.bib @@ -215,6 +215,89 @@ @Article{Campbell2021 Month="Sep" } +@Article{Wertheim2017, + Author="Wertheim, J. O. and Kosakovsky Pond, S. L. and Forgione, L. A. and Mehta, S. R. and Murrell, B. and Shah, S. and Smith, D. M. and Scheffler, K. and Torian, L. V. ", + Title="{{S}ocial and {G}enetic {N}etworks of {H}{I}{V}-1 {T}ransmission in {N}ew {Y}ork {C}ity}", + Journal="PLoS Pathog", + Year="2017", + Volume="13", + Number="1", + Pages="e1006000", + Month="Jan" +} + +@Article{Campbell2020, + Author="Campbell, E. M. and Patala, A. and Shankar, A. and Li, J. F. and Johnson, J. A. and Westheimer, E. and Gay, C. L. and Cohen, S. E. and Switzer, W. M. and Peters, P. J. ", + Title="{{P}hylodynamic {A}nalysis {C}omplements {P}artner {S}ervices by {I}dentifying {A}cute and {U}nreported {H}{I}{V} {T}ransmission}", + Journal="Viruses", + Year="2020", + Volume="12", + Number="2", + Month="Jan" +} + +@ARTICLE{Kirbiyik2020, + title = "Network Characteristics and Visualization of {COVID-19} Outbreak + in a Large Detention Facility in the {U}nited {S}tates - {C}ook {C}ounty, + {I}llinois, 2020", + author = "K{\i}rb{\i}y{\i}k, Uzay and Binder, Alison M and Ghinai, Isaac + and Zawitz, Chad and Levin, Rebecca and Samala, Usha and Smith, + Michelle Bryant and Gubser, Jane and Jones, Bridgette and Varela, + Kate and Rafinski, Josh and Fitzgerald, Anne and Orris, Peter and + Bahls, Alex and Welbel, Sharon and Mennella, Connie and Black, + Stephanie R and Armstrong, Paige A", + abstract = "Correctional and detention facilities have been + disproportionately affected by coronavirus disease 2019 + (COVID-19) because of shared space and movement of staff members + and detained persons within facilities (1,2). During March + 1-April 30, 2020, at Cook County Jail in Chicago, Illinois, >900 + COVID-19 cases were diagnosed across all 10 housing divisions, + representing 13 unique buildings.(†) Movement within the jail was + examined through network analyses and visualization, a field that + examines elements within a network and the connections between + them. This methodology has been used to supplement contact + tracing investigations for tuberculosis and to understand how + social networks contribute to transmission of sexually + transmitted infections (3-5). Movements and connections of 5,884 + persons (3,843 [65\%] detained persons and 2,041 [35\%] staff + members) at the jail during March 1-April 30 were analyzed. A + total of 472 (12.3\%) COVID-19 cases were identified among + detained persons and 198 (9.7\%) among staff members. Among + 103,701 shared-shift connections among staff members, 1.4\% + occurred between persons with COVID-19, a percentage that is + significantly higher than the expected 0.9\% by random occurrence + alone (p<0.001), suggesting that additional transmission occurred + within this group. The observed connections among detained + persons with COVID-19 were significantly lower than expected + (1.0\% versus 1.1\%, p<0.001) when considering only the housing + units in which initial transmission occurred, suggesting that the + systematic isolation of persons with COVID-19 is effective at + limiting transmission. A network-informed approach can identify + likely points of high transmission, allowing for interventions to + reduce transmission targeted at these groups or locations, such + as by reducing convening of staff members, closing breakrooms, + and cessation of contact sports.", + journal = "MMWR Morb Mortal Wkly Rep", + volume = 69, + number = 44, + pages = "1625--1630", + month = nov, + year = 2020, + address = "United States", + language = "en" +} + +@Article{Vang2021, + Author="Vang, K. E. and Krow-Lucal, E. R. and James, A. E. and Cima, M. J. and Kothari, A. and Zohoori, N. and Porter, A. and Campbell, E. M. ", + Title="{{P}articipation in {F}raternity and {S}orority {A}ctivities and the {S}pread of {C}{O}{V}{I}{D}-19 {A}mong {R}esidential {U}niversity {C}ommunities - {A}rkansas, {A}ugust 21-{S}eptember 5, 2020}", + Journal="MMWR Morb Mortal Wkly Rep", + Year="2021", + Volume="70", + Number="1", + Pages="20--23", + Month="Jan" +} + @article{maaten2008visualizing, title={Visualizing data using t-SNE}, author={van der Maaten, Laurens and Hinton, Geoffrey}, diff --git a/manuscript/cartography.tex b/manuscript/cartography.tex index a1730134..78622ea1 100644 --- a/manuscript/cartography.tex +++ b/manuscript/cartography.tex @@ -132,7 +132,8 @@ \section{Introduction} For example, genomic epidemiologists commonly need to 1) visualize the genetic relationships among closely related virus samples \citep{Argimon2016,Campbell2021}, 2) identify clusters of closely-related genomes that represent regional outbreaks or new variants of concern \citep{OToole2022,McBroome2022,Stoddard2022,Tran-Kiem2023}, 3) place newly sequenced viral genomes in the evolutionary context of other circulating samples \citep{OToole2021,Turakhia2021,Aksamentov2021}. Given that these common use cases rely on genetic distances between samples, tree-free statistical methods that operate on pairwise distances could be sufficient to address each case. As these tree-free methods lack a formal biological model of evolutionary relationships, they make weak assumptions about the input data and therefore should be applicable to pathogen genomes that violate phylogenetic assumptions. -Furthermore, methods that describe genetic relationships with map-like visualizations may feel more familiar to public health practitioners, and therefore more easily applied for public health action. +Furthermore, methods that describe genetic relationships with network-like visualizations may feel more familiar to public health practitioners who are accustomed to viewing contact tracing networks alongside genomic information in tools like MicrobeTrace \citep{Campbell2021} or MicroReact \citep{Argimon2016} and for viral pathogens like HIV \citep{Wertheim2017,Campbell2020} and SARS-CoV-2 \citep{Kirbiyik2020,Vang2021}. +For this reason, reduced dimensionality representations of genomic relationships may be more easily applied for public health action. Common statistical approaches to analyzing variation from genome alignments start by transforming alignments into either a matrix that codes each distinct nucleotide character as an integer or a distance matrix representing the pairwise distances between sequences. The first of these transformations is the first step prior to performing a principal component analysis (PCA) to find orthogonal representations of the inputs that explain the most variance \citep{jolliffe_cadima_2016}. @@ -498,6 +499,7 @@ \subsection{Current applications and future directions for applying dimensionali This application benefits routine surveillance efforts for seasonal influenza performed by Nextstrain where identification of HA and NA reassortment may indicate important fitness or transmission patterns \citep{Huddleston2024}. Researchers can also quickly apply these methods in response to outbreaks like the recent H5N1 avian influenza outbreak in cattle in the United States \citep{Nguyen2024}. As a proof of concept, we applied t-SNE to all eight gene segments of recent H5N1 sequences, identified clusters with HDBSCAN, and confirmed the previously reported reassortment groups with PB2/NP and the other gene segments in the cattle outbreak. +Researchers can easily visualize their embeddings in standard visualization tools for genomic epidemiology including Nextstrain's Auspice \citep{Hadfield2018}, MicrobeTrace \citep{Campbell2021}, or MicroReact \citep{Argimon2016}. Some limitations noted above suggest future directions for this line of research. In the long term, researchers may benefit from analyzing viral genomes with a broader range of dimensionality reduction methods including neural network models \citep{Kupperman2022,Chari2023}.