This repository has been archived by the owner on Dec 18, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhydra.bib
298 lines (274 loc) · 21.5 KB
/
hydra.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
@article{dodt_flexbarflexible_2012,
title = {{FLEXBAR}—{Flexible} {Barcode} and {Adapter} {Processing} for {Next}-{Generation} {Sequencing} {Platforms}},
volume = {1},
copyright = {http://creativecommons.org/licenses/by/3.0/},
url = {http://www.mdpi.com/2079-7737/1/3/895},
doi = {10.3390/biology1030895},
abstract = {Quantitative and systems biology approaches benefit from the unprecedented depth of next-generation sequencing. A typical experiment yields millions of short reads, which oftentimes carry particular sequence tags. These tags may be: (a) specific to the sequencing platform and library construction method (e.g., adapter sequences); (b) have been introduced by experimental design (e.g., sample barcodes); or (c) constitute some biological signal (e.g., splice leader sequences in nematodes). Our software FLEXBAR enables accurate recognition, sorting and trimming of sequence tags with maximal flexibility, based on exact overlap sequence alignment. The software supports data formats from all current sequencing platforms, including color-space reads. FLEXBAR maintains read pairings and processes separate barcode reads on demand. Our software facilitates the fine-grained adjustment of sequence tag detection parameters and search regions. FLEXBAR is a multi-threaded software and combines speed with precision. Even complex read processing scenarios might be executed with a single command line call. We demonstrate the utility of the software in terms of read mapping applications, library demultiplexing and splice leader detection. FLEXBAR and additional information is available for academic use from the website: http://sourceforge.net/projects/flexbar/.},
language = {en},
number = {3},
urldate = {2015-03-30},
journal = {Biology},
author = {Dodt, Matthias and Roehr, Johannes T. and Ahmed, Rina and Dieterich, Christoph},
month = dec,
year = {2012},
keywords = {clipping, demultiplexing, High-throughput sequencing, Quality Control, trimming},
pages = {895--905},
file = {Snapshot:/home/mattias/.mozilla/firefox/88hqpn07.default/zotero/storage/TG2E58JT/htm.html:text/html}
}
@article{mcdonald_biological_2012,
title = {The {Biological} {Observation} {Matrix} ({BIOM}) format or: how {I} learned to stop worrying and love the ome-ome},
volume = {1},
issn = {2047-217X},
doi = {10.1186/2047-217X-1-7},
number = {1},
journal = {GigaScience},
author = {McDonald, Daniel and Clemente, Jose C. and Kuczynski, Justin and Rideout, Jai R. and Stombaugh, Jesse and Wendel, Doug and Wilke, Andreas and Huse, Susan and Hufnagle, John and Meyer, Folker and al, et},
month = jul,
year = {2012},
pages = {7}
}
@article{pruesse_sina:_2012,
title = {{SINA}: {Accurate} high-throughput multiple sequence alignment of ribosomal {RNA} genes},
volume = {28},
issn = {1367-4803, 1460-2059},
shorttitle = {{SINA}},
url = {http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/bts252},
doi = {10.1093/bioinformatics/bts252},
language = {en},
number = {14},
urldate = {2016-08-24},
journal = {Bioinformatics},
author = {Pruesse, E. and Peplies, J. and Glockner, F. O.},
month = jul,
year = {2012},
pages = {1823--1829}
}
@book{rognes_vsearch:_2016,
title = {{VSEARCH}: {Versatile} open-source tool for metagenomics},
url = {http://dx.doi.org/10.5281/zenodo.15524},
author = {Rognes, Torbjørn and Mahé, Frédéric and xflouris},
month = feb,
year = {2016}
}
@article{edgar_uparse:_2013,
title = {{UPARSE}: highly accurate {OTU} sequences from microbial amplicon reads},
volume = {10},
copyright = {© 2013 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
issn = {1548-7091},
shorttitle = {{UPARSE}},
url = {http://www.nature.com/nmeth/journal/v10/n10/abs/nmeth.2604.html},
doi = {10.1038/nmeth.2604},
abstract = {Amplified marker-gene sequences can be used to understand microbial community structure, but they suffer from a high level of sequencing and amplification artifacts. The UPARSE pipeline reports operational taxonomic unit (OTU) sequences with ≤1\% incorrect bases in artificial microbial community tests, compared with {\textgreater}3\% incorrect bases commonly reported by other methods. The improved accuracy results in far fewer OTUs, consistently closer to the expected number of species in a community.},
language = {en},
number = {10},
urldate = {2015-03-30},
journal = {Nature Methods},
author = {Edgar, Robert C.},
month = oct,
year = {2013},
pages = {996--998},
file = {Snapshot:/home/mattias/.mozilla/firefox/88hqpn07.default/zotero/storage/2SIRUFDN/nmeth.2604.html:text/html}
}
@article{edgar_search_2010,
title = {Search and clustering orders of magnitude faster than {BLAST}},
volume = {26},
issn = {1367-4803, 1460-2059},
url = {http://bioinformatics.oxfordjournals.org/content/26/19/2460},
doi = {10.1093/bioinformatics/btq461},
abstract = {Motivation: Biological sequence data is accumulating rapidly, motivating the development of improved high-throughput methods for sequence classification.
Results: UBLAST and USEARCH are new algorithms enabling sensitive local and global search of large sequence databases at exceptionally high speeds. They are often orders of magnitude faster than BLAST in practical applications, though sensitivity to distant protein relationships is lower. UCLUST is a new clustering method that exploits USEARCH to assign sequences to clusters. UCLUST offers several advantages over the widely used program CD-HIT, including higher speed, lower memory use, improved sensitivity, clustering at lower identities and classification of much larger datasets.
Availability: Binaries are available at no charge for non-commercial use at http://www.drive5.com/usearch
Contact: [email protected]
Supplementary information: Supplementary data are available at Bioinformatics online.},
language = {en},
number = {19},
urldate = {2015-03-30},
journal = {Bioinformatics},
author = {Edgar, Robert C.},
month = oct,
year = {2010},
pmid = {20709691},
pages = {2460--2461},
file = {Snapshot:/home/mattias/.mozilla/firefox/88hqpn07.default/zotero/storage/C6G9XQCX/2460.html:text/html}
}
@article{koljalg_towards_2013,
title = {Towards a unified paradigm for sequence-based identification of fungi},
volume = {22},
issn = {1365-294X},
url = {http://onlinelibrary.wiley.com/doi/10.1111/mec.12481/abstract},
doi = {10.1111/mec.12481},
abstract = {The nuclear ribosomal internal transcribed spacer (ITS) region is the formal fungal barcode and in most cases the marker of choice for the exploration of fungal diversity in environmental samples. Two problems are particularly acute in the pursuit of satisfactory taxonomic assignment of newly generated ITS sequences: (i) the lack of an inclusive, reliable public reference data set and (ii) the lack of means to refer to fungal species, for which no Latin name is available in a standardized stable way. Here, we report on progress in these regards through further development of the UNITE database (http://unite.ut.ee) for molecular identification of fungi. All fungal species represented by at least two ITS sequences in the international nucleotide sequence databases are now given a unique, stable name of the accession number type (e.g. Hymenoscyphus pseudoalbidus{\textbar}GU586904{\textbar}SH133781.05FU), and their taxonomic and ecological annotations were corrected as far as possible through a distributed, third-party annotation effort. We introduce the term ‘species hypothesis’ (SH) for the taxa discovered in clustering on different similarity thresholds (97–99\%). An automatically or manually designated sequence is chosen to represent each such SH. These reference sequences are released (http://unite.ut.ee/repository.php) for use by the scientific community in, for example, local sequence similarity searches and in the QIIME pipeline. The system and the data will be updated automatically as the number of public fungal ITS sequences grows. We invite everybody in the position to improve the annotation or metadata associated with their particular fungal lineages of expertise to do so through the new Web-based sequence management system in UNITE.},
language = {en},
number = {21},
urldate = {2016-05-13},
journal = {Molecular Ecology},
author = {Kõljalg, Urmas and Nilsson, R. Henrik and Abarenkov, Kessy and Tedersoo, Leho and Taylor, Andy F. S. and Bahram, Mohammad and Bates, Scott T. and Bruns, Thomas D. and Bengtsson-Palme, Johan and Callaghan, Tony M. and Douglas, Brian and Drenkhan, Tiia and Eberhardt, Ursula and Dueñas, Margarita and Grebenc, Tine and Griffith, Gareth W. and Hartmann, Martin and Kirk, Paul M. and Kohout, Petr and Larsson, Ellen and Lindahl, Björn D. and Lücking, Robert and Martín, María P. and Matheny, P. Brandon and Nguyen, Nhu H. and Niskanen, Tuula and Oja, Jane and Peay, Kabir G. and Peintner, Ursula and Peterson, Marko and Põldmaa, Kadri and Saag, Lauri and Saar, Irja and Schüßler, Arthur and Scott, James A. and Senés, Carolina and Smith, Matthew E. and Suija, Ave and Taylor, D. Lee and Telleria, M. Teresa and Weiss, Michael and Larsson, Karl-Henrik},
month = nov,
year = {2013},
keywords = {bioinformatics, DNA barcoding, ecological genomics, Fungi, Microbial diversity},
pages = {5271--5277},
file = {Snapshot:/home/mattias/.mozilla/firefox/88hqpn07.default/zotero/storage/6XHCX9JM/abstract.html:text/html}
}
@article{koster_snakemakescalable_2012,
title = {Snakemake—a scalable bioinformatics workflow engine},
volume = {28},
issn = {1367-4803, 1460-2059},
url = {http://bioinformatics.oxfordjournals.org/content/28/19/2520},
doi = {10.1093/bioinformatics/bts480},
abstract = {Summary: Snakemake is a workflow engine that provides a readable Python-based workflow definition language and a powerful execution environment that scales from single-core workstations to compute clusters without modifying the workflow. It is the first system to support the use of automatically inferred multiple named wildcards (or variables) in input and output filenames.
Availability: http://snakemake.googlecode.com.
Contact: [email protected]},
language = {en},
number = {19},
urldate = {2015-03-30},
journal = {Bioinformatics},
author = {Köster, Johannes and Rahmann, Sven},
month = oct,
year = {2012},
pmid = {22908215},
pages = {2520--2522},
file = {Snapshot:/home/mattias/.mozilla/firefox/88hqpn07.default/zotero/storage/D4DC647Q/2520.html:text/html}
}
@article{cole_ribosomal_2014,
title = {Ribosomal {Database} {Project}: data and tools for high throughput {rRNA} analysis},
volume = {42},
issn = {0305-1048, 1362-4962},
shorttitle = {Ribosomal {Database} {Project}},
url = {http://nar.oxfordjournals.org/content/42/D1/D633},
doi = {10.1093/nar/gkt1244},
abstract = {Ribosomal Database Project (RDP; http://rdp.cme.msu.edu/) provides the research community with aligned and annotated rRNA gene sequence data, along with tools to allow researchers to analyze their own rRNA gene sequences in the RDP framework. RDP data and tools are utilized in fields as diverse as human health, microbial ecology, environmental microbiology, nucleic acid chemistry, taxonomy and phylogenetics. In addition to aligned and annotated collections of bacterial and archaeal small subunit rRNA genes, RDP now includes a collection of fungal large subunit rRNA genes. RDP tools, including Classifier and Aligner, have been updated to work with this new fungal collection. The use of high-throughput sequencing to characterize environmental microbial populations has exploded in the past several years, and as sequence technologies have improved, the sizes of environmental datasets have increased. With release 11, RDP is providing an expanded set of tools to facilitate analysis of high-throughput data, including both single-stranded and paired-end reads. In addition, most tools are now available as open source packages for download and local use by researchers with high-volume needs or who would like to develop custom analysis pipelines.},
language = {en},
number = {D1},
urldate = {2015-03-30},
journal = {Nucleic Acids Research},
author = {Cole, James R. and Wang, Qiong and Fish, Jordan A. and Chai, Benli and McGarrell, Donna M. and Sun, Yanni and Brown, C. Titus and Porras-Alfaro, Andrea and Kuske, Cheryl R. and Tiedje, James M.},
month = jan,
year = {2014},
pmid = {24288368},
pages = {D633--D642},
file = {Snapshot:/home/mattias/.mozilla/firefox/88hqpn07.default/zotero/storage/RQTZ6EGE/D633.html:text/html}
}
@misc{joshi_sickle:_2011,
title = {Sickle: {A} sliding-window, adaptive, quality-based trimming tool for {FastQ} files},
url = {https://github.com/najoshi/sickle},
urldate = {2015-03-30},
author = {Joshi, NA and Fass, JN},
year = {2011},
file = {najoshi/sickle · GitHub:/home/mattias/.mozilla/firefox/88hqpn07.default/zotero/storage/HKSSS5UE/sickle.html:text/html}
}
@article{masella_pandaseq:_2012,
title = {{PANDAseq}: paired-end assembler for illumina sequences},
volume = {13},
issn = {1471-2105},
shorttitle = {{PANDAseq}},
doi = {10.1186/1471-2105-13-31},
abstract = {BACKGROUND: Illumina paired-end reads are used to analyse microbial communities by targeting amplicons of the 16S rRNA gene. Publicly available tools are needed to assemble overlapping paired-end reads while correcting mismatches and uncalled bases; many errors could be corrected to obtain higher sequence yields using quality information.
RESULTS: PANDAseq assembles paired-end reads rapidly and with the correction of most errors. Uncertain error corrections come from reads with many low-quality bases identified by upstream processing. Benchmarks were done using real error masks on simulated data, a pure source template, and a pooled template of genomic DNA from known organisms. PANDAseq assembled reads more rapidly and with reduced error incorporation compared to alternative methods.
CONCLUSIONS: PANDAseq rapidly assembles sequences and scales to billions of paired-end reads. Assembly of control libraries showed a 4-50\% increase in the number of assembled sequences over naïve assembly with negligible loss of "good" sequence.},
language = {eng},
journal = {BMC bioinformatics},
author = {Masella, Andre P. and Bartram, Andrea K. and Truszkowski, Jakub M. and Brown, Daniel G. and Neufeld, Josh D.},
year = {2012},
pmid = {22333067},
pmcid = {PMC3471323},
keywords = {Bacteria, Metagenomics, RNA, Bacterial, RNA, Ribosomal, 16S, Software},
pages = {31}
}
@misc{bushnell_bbmap_2015,
title = {{BBMap}},
url = {http://sourceforge.net/projects/bbmap/},
author = {Bushnell, B},
year = {2015}
}
@article{quast_silva_2013,
title = {The {SILVA} ribosomal {RNA} gene database project: improved data processing and web-based tools},
volume = {41},
issn = {0305-1048, 1362-4962},
doi = {10.1093/nar/gks1219},
number = {D1},
journal = {Nucleic Acids Research},
author = {Quast, C. and Pruesse, E. and Yilmaz, P. and Gerken, J. and Schweer, T. and Yarza, P. and Peplies, J. and Glockner, F. O.},
month = jan,
year = {2013},
pages = {D590--D596}
}
@article{bengtsson-palme_improved_2013,
title = {Improved software detection and extraction of {ITS}1 and {ITS}2 from ribosomal {ITS} sequences of fungi and other eukaryotes for analysis of environmental sequencing data},
volume = {4},
copyright = {© 2013 The Authors. Methods in Ecology and Evolution © 2013 British Ecological Society},
issn = {2041-210X},
url = {http://onlinelibrary.wiley.com/doi/10.1111/2041-210X.12073/abstract},
doi = {10.1111/2041-210X.12073},
abstract = {* The nuclear ribosomal internal transcribed spacer (ITS) region is the primary choice for molecular identification of fungi. Its two highly variable spacers (ITS1 and ITS2) are usually species specific, whereas the intercalary 5.8S gene is highly conserved. For sequence clustering and blast searches, it is often advantageous to rely on either one of the variable spacers but not the conserved 5.8S gene. To identify and extract ITS1 and ITS2 from large taxonomic and environmental data sets is, however, often difficult, and many ITS sequences are incorrectly delimited in the public sequence databases.
* We introduce ITSx, a Perl-based software tool to extract ITS1, 5.8S and ITS2 – as well as full-length ITS sequences – from both Sanger and high-throughput sequencing data sets. ITSx uses hidden Markov models computed from large alignments of a total of 20 groups of eukaryotes, including fungi, metazoans and plants, and the sequence extraction is based on the predicted positions of the ribosomal genes in the sequences.
* ITSx has a very high proportion of true-positive extractions and a low proportion of false-positive extractions. Additionally, process parallelization permits expedient analyses of very large data sets, such as a one million sequence amplicon pyrosequencing data set. ITSx is rich in features and written to be easily incorporated into automated sequence analysis pipelines.
* ITSx paves the way for more sensitive blast searches and sequence clustering operations for the ITS region in eukaryotes. The software also permits elimination of non-ITS sequences from any data set. This is particularly useful for amplicon-based next-generation sequencing data sets, where insidious non-target sequences are often found among the target sequences. Such non-target sequences are difficult to find by other means and would contribute noise to diversity estimates if left in the data set.},
language = {en},
number = {10},
urldate = {2016-05-13},
journal = {Methods in Ecology and Evolution},
author = {Bengtsson-Palme, Johan and Ryberg, Martin and Hartmann, Martin and Branco, Sara and Wang, Zheng and Godhe, Anna and De Wit, Pierre and Sánchez-García, Marisol and Ebersberger, Ingo and de Sousa, Filipe and Amend, Anthony and Jumpponen, Ari and Unterseher, Martin and Kristiansson, Erik and Abarenkov, Kessy and Bertrand, Yann J. K. and Sanli, Kemal and Eriksson, K. Martin and Vik, Unni and Veldre, Vilmar and Nilsson, R. Henrik},
month = oct,
year = {2013},
keywords = {Fungi, molecular ecology, Next-generation sequencing, Perl, ribosomal DNA},
pages = {914--919},
file = {Full Text PDF:/home/mattias/.mozilla/firefox/88hqpn07.default/zotero/storage/GHV96IS8/Bengtsson-Palme et al. - 2013 - Improved software detection and extraction of ITS1.pdf:application/pdf;Snapshot:/home/mattias/.mozilla/firefox/88hqpn07.default/zotero/storage/CTAJ63FN/abstract.html:text/html}
}
@article{edgar_uchime_2011,
title = {{UCHIME} improves sensitivity and speed of chimera detection},
volume = {27},
issn = {1367-4803},
url = {https://academic.oup.com/bioinformatics/article/27/16/2194/255262/UCHIME-improves-sensitivity-and-speed-of-chimera},
doi = {10.1093/bioinformatics/btr381},
abstract = {Motivation: Chimeric DNA sequences often form during polymerase chain reaction amplification, especially when sequencing single regions (e.g. 16S rRNA or fungal Internal Transcribed Spacer) to assess diversity or compare populations. Undetected chimeras may be misinterpreted as novel species, causing inflated estimates of diversity and spurious inferences of differences between populations. Detection and removal of chimeras is therefore of critical importance in such experiments.Results: We describe UCHIME, a new program that detects chimeric sequences with two or more segments. UCHIME either uses a database of chimera-free sequences or detects chimeras de novo by exploiting abundance data. UCHIME has better sensitivity than ChimeraSlayer (previously the most sensitive database method), especially with short, noisy sequences. In testing on artificial bacterial communities with known composition, UCHIME de novo sensitivity is shown to be comparable to Perseus. UCHIME is \>100× faster than Perseus and \>1000× faster than ChimeraSlayer.Contact:[email protected]: Source, binaries and data: http://drive5.com/uchime.Supplementary information:Supplementary data are available at Bioinformatics online.},
number = {16},
urldate = {2017-08-07},
journal = {Bioinformatics},
author = {Edgar, Robert C. and Haas, Brian J. and Clemente, Jose C. and Quince, Christopher and Knight, Rob},
month = aug,
year = {2011},
note = {03548},
pages = {2194--2200},
file = {Edgar et al_2011_UCHIME improves sensitivity and speed of chimera detection.pdf:/home/mattias/.mozilla/firefox/88hqpn07.default/zotero/storage/ACRTUECX/Edgar et al_2011_UCHIME improves sensitivity and speed of chimera detection.pdf:application/pdf;Snapshot:/home/mattias/.mozilla/firefox/88hqpn07.default/zotero/storage/DAQH763S/btr381.html:text/html}
}
@article{martin_cutadapt_2011,
title = {Cutadapt removes adapter sequences from high-throughput sequencing reads},
volume = {17},
issn = {2226-6089},
url = {http://journal.embnet.org/index.php/embnetjournal/article/view/200},
doi = {10.14806/ej.17.1.200},
number = {1},
urldate = {2017-08-21},
journal = {EMBnet.journal},
author = {Martin, Marcel},
month = may,
year = {2011},
note = {02310},
pages = {10}
}
@misc{mahe_stampa:_2017,
title = {stampa: {Sequence} {Taxonomic} {Assignment} by {Massive} {Pairwise} {Alignments}},
copyright = {GPL-3.0},
shorttitle = {stampa},
url = {https://github.com/frederic-mahe/stampa},
urldate = {2017-08-21},
author = {Mahé, Frédéric},
month = jul,
year = {2017},
note = {00002
original-date: 2015-04-23T15:50:11Z},
file = {Snapshot:/home/mattias/.mozilla/firefox/88hqpn07.default/zotero/storage/JRMRSM2U/stampa.html:text/html}
}
@misc{hollander_nioo-knaw/hydra:_2017,
title = {Nioo-{Knaw}/{Hydra}: 1.3.3},
shorttitle = {Nioo-{Knaw}/{Hydra}},
url = {https://zenodo.org/record/597131},
abstract = {No description provided.},
urldate = {2017-09-08},
publisher = {Zenodo},
author = {Hollander, Mattias De},
month = sep,
year = {2017},
note = {DOI: 10.5281/zenodo.597131}
}