diff --git a/topics/microbiome/tutorials/diversity/images/alpha_diversity.svg b/topics/microbiome/tutorials/diversity/images/alpha_diversity.svg new file mode 100644 index 00000000000000..2f631dbfba77f1 --- /dev/null +++ b/topics/microbiome/tutorials/diversity/images/alpha_diversity.svg @@ -0,0 +1,467 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + Evenness + Richness + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/topics/microbiome/tutorials/diversity/images/alpha_diversity_richness_evenness.png b/topics/microbiome/tutorials/diversity/images/alpha_diversity_richness_evenness.png new file mode 100644 index 00000000000000..fa371a20e5b4ba Binary files /dev/null and b/topics/microbiome/tutorials/diversity/images/alpha_diversity_richness_evenness.png differ diff --git a/topics/microbiome/tutorials/diversity/images/alphadiversity_metrics.png b/topics/microbiome/tutorials/diversity/images/alphadiversity_metrics.png new file mode 100644 index 00000000000000..b4bf7156a77604 Binary files /dev/null and b/topics/microbiome/tutorials/diversity/images/alphadiversity_metrics.png differ diff --git a/topics/microbiome/tutorials/diversity/images/bracken_output.png b/topics/microbiome/tutorials/diversity/images/bracken_output.png new file mode 100644 index 00000000000000..dc8eb6c424e4b9 Binary files /dev/null and b/topics/microbiome/tutorials/diversity/images/bracken_output.png differ diff --git a/topics/microbiome/tutorials/diversity/images/diversity_differences.png b/topics/microbiome/tutorials/diversity/images/diversity_differences.png new file mode 100644 index 00000000000000..a553e6432addbd Binary files /dev/null and b/topics/microbiome/tutorials/diversity/images/diversity_differences.png differ diff --git a/topics/microbiome/tutorials/diversity/images/hill_numbers.png b/topics/microbiome/tutorials/diversity/images/hill_numbers.png new file mode 100644 index 00000000000000..80461fc7f27f7e Binary files /dev/null and b/topics/microbiome/tutorials/diversity/images/hill_numbers.png differ diff --git a/topics/microbiome/tutorials/diversity/images/test b/topics/microbiome/tutorials/diversity/images/test new file mode 100644 index 00000000000000..8b137891791fe9 --- /dev/null +++ b/topics/microbiome/tutorials/diversity/images/test @@ -0,0 +1 @@ + diff --git a/topics/microbiome/tutorials/diversity/images/unifrac.png b/topics/microbiome/tutorials/diversity/images/unifrac.png new file mode 100644 index 00000000000000..a6e4f923cf0045 Binary files /dev/null and b/topics/microbiome/tutorials/diversity/images/unifrac.png differ diff --git a/topics/microbiome/tutorials/diversity/krona-kraken.html b/topics/microbiome/tutorials/diversity/krona-kraken.html new file mode 100644 index 00000000000000..2a49a709c93812 --- /dev/null +++ b/topics/microbiome/tutorials/diversity/krona-kraken.html @@ -0,0 +1,11137 @@ + + + + + + + + + + + + + +
+ + + magnitude + magnitudeUnassigned + + + JC1A + JP4D + + + 1355741112530 + + 75635 + 128827257 + + 0 + 7 + + 0 + 7 + + 0 + 7 + + 7 + + + + + + 012 + 431 + + 00 + 48 + + 48 + 02 + + 01 + 13 + + 0 + 1 + + 1 + + + + 0 + 1 + + 1 + + + + 1 + 0 + + 1 + + + + + 3 + 0 + + 2 + 0 + + 2 + + + + 1 + 0 + + 1 + + + + + 3 + 1 + + 2 + 1 + + 1 + + + + + + + 1 + 0 + + 1 + 0 + + 1 + 0 + + 0 + 1 + + 1 + + + + + + + 4 + 0 + + 4 + 0 + + 0 + 4 + + 4 + 0 + + 4 + + + + + + + 2 + 6 + + 0 + 4 + + 3 + 0 + + 3 + 2 + + 1 + + + + + 0 + 1 + + 1 + 0 + + 1 + + + + + + + + 127956551 + 00 + + 00 + 127956551 + + 00 + 127956551 + + 127956551 + 00 + + 127956551 + 00 + + 127956551 + + + + + + + + 03 + 823 + + 819 + 01 + + 0 + 17 + + 0 + 17 + + 10 + 17 + + 4 + + + 2 + + + 1 + + + + + + 81 + 01 + + 8 + 0 + + 0 + 8 + + 8 + + + + + + + 0 + 1 + + 1 + 0 + + 1 + + + + + + 1 + 7 + + 0 + 5 + + 0 + 5 + + 5 + 0 + + 0 + 5 + + 5 + + + + + + + 0 + 1 + + 1 + 0 + + 0 + 1 + + 0 + 1 + + 1 + + + + + + + + 3 + + + + 386812099 + 17254102920 + + 1 + 0 + + 1 + + + + 2 + 0 + + 0 + 2 + + 2 + 1 + + 1 + 0 + + 0 + 1 + + 1 + + + + + + + + 286713349 + 1202789726 + + 45323 + 13991436 + + 13511100 + 62450 + + 00 + 12 + + 12 + + + + 3 + + + 5340 + 218 + + 01 + 333 + + 331 + + + 1 + + + + 41 + 00 + + 41 + + + + 1 + + + 1 + 5 + + 1 + + + 1 + + + 1 + + + 1 + + + + 90 + 145 + + 51 + + + 4 + + + + 4 + 5 + + 1 + + + + 1 + + + 1 + 0 + + 1 + + + + + 0 + 1 + + 1 + + + + 12 + 25 + + 1 + + + 2 + 0 + + 1 + + + 1 + + + + 1 + 0 + + 1 + + + + + 3130 + 120658 + + 1 + 0 + + 1 + + + + 5 + + + 369 + 295 + + 1 + + + 2 + + + 52 + + + 1 + + + + 1091 + 683 + + 408 + + + + 81 + 00 + + 1 + + + 8 + + + + 333 + 354 + + 1 + + + 1 + + + 1 + + + + 0 + 1 + + 1 + + + + 1 + 0 + + 1 + + + + 1 + 11 + + 10 + + + + + 26541 + 0143 + + 2 + 3 + + 1 + + + + 37 + 385 + + 1 + + + 329 + + + 18 + + + + 1 + + + 03 + 269 + + 1 + + + 1 + + + 1 + + + 251 + + + 1 + + + 1 + + + 1 + + + + + 0 + 1 + + 1 + + + + + 6 + 1 + + 0 + 5 + + 5 + 1 + + 4 + + + + + + 33 + 01 + + 01 + 31 + + 0 + 3 + + 3 + + + + + 0 + 1 + + 0 + 1 + + 1 + + + + + + 4 + 0 + + 3 + 0 + + 1 + 2 + + 1 + + + + 1 + 0 + + 1 + + + + + 1 + 0 + + 0 + 1 + + 1 + + + + + + + 1 + 0 + + 0 + 1 + + 0 + 1 + + 0 + 1 + + 1 + + + + + + + 349422400 + 677974354 + + 5 + 1 + + 0 + 4 + + 3 + 0 + + 3 + + + + 1 + + + + + 1 + + + 8941598 + 5791187 + + 0 + 1 + + 1 + 0 + + 1 + + + + + 1 + 0 + + 0 + 1 + + 1 + + + + + 47 + 00 + + 02 + 47 + + 2 + + + 2 + + + 4 + + + 1 + + + + + 03 + 627 + + 66 + 02 + + 3 + + + 6 + + + 1 + + + + 16 + 3 + + 11 + + + 2 + + + + 2 + 0 + + 2 + + + + + 18 + 5 + + 1 + 0 + + 1 + + + + 12 + 7 + + 3 + + + 1 + + + 1 + + + + + 17464 + 1522 + + 15941 + 9236 + + 1 + + + 1 + + + 1 + + + 401 + + + 1 + + + 16 + + + 11 + + + + 0 + 1 + + 1 + + + + + 4 + 17 + + 0 + 1 + + 1 + + + + 1 + + + 7 + 1 + + 6 + + + + 3 + 2 + + 1 + + + + 0 + 1 + + 1 + + + + + 14 + 1218 + + 1 + + + 1113 + 49 + + 2 + + + 7 + + + 1 + + + 1 + + + + + 0 + 4 + + 0 + 4 + + 4 + + + + + 1393 + 1137 + + 1 + 0 + + 1 + + + + 23 + 30 + + 1 + + + 1 + + + 1 + + + 1 + + + 3 + + + + 0 + 2 + + 2 + + + + 2 + 19 + + 14 + + + 3 + + + + 4 + 0 + + 3 + + + 1 + + + + 2 + 0 + + 2 + + + + + 00 + 12 + + 0 + 1 + + 1 + + + + 1 + 0 + + 1 + + + + 1 + + + + 7780 + 221 + + 6 + 0 + + 3 + + + 3 + + + + 68 + 11 + + 39 + + + 6 + + + 12 + + + + 4 + 0 + + 4 + + + + 1 + + + 015 + 720 + + 7 + + + 1 + + + 1 + + + 1 + + + 1 + + + 1 + + + + 1 + 0 + + 1 + + + + 1 + 0 + + 1 + + + + 24 + 1 + + 22 + + + 1 + + + + 0 + 2 + + 2 + + + + + 4 + 0 + + 1 + 4 + + 1 + + + 2 + + + + + 0 + 2 + + 2 + + + + 4 + 0 + + 2 + 4 + + 1 + + + 1 + + + + + 7 + 0 + + 0 + 6 + + 6 + + + + 1 + 0 + + 1 + + + + + 00 + 159 + + 159 + 00 + + 159 + + + + + 1252 + 05 + + 4 + 0 + + 4 + + + + 1241 + 00 + + 1241 + + + + 2 + 0 + + 2 + + + + + 11 + 00 + + 00 + 11 + + 1 + + + 1 + + + + + + 3 + 0 + + 3 + 0 + + 0 + 1 + + 1 + + + + 0 + 2 + + 2 + + + + + + 108519290 + 178231989 + + 1043875 + 1043001 + + 1 + + + 2 + 36 + + 1 + + + 33 + + + + 6 + 0 + + 6 + + + + 107 + 65 + + 11 + + + 3 + + + 3 + + + 3 + + + 1 + + + 21 + + + + 29 + 7 + + 13 + + + 5 + + + 4 + + + + 3 + 37 + + 25 + + + 4 + + + 1 + + + 4 + + + + 0 + 7 + + 7 + + + + 8 + 13 + + 4 + + + 1 + + + + 0 + 17 + + 17 + + + + 0 + 13 + + 13 + + + + 0 + 1 + + 1 + + + + 0 + 11 + + 11 + + + + 3 + 0 + + 3 + + + + 48 + 28 + + 1 + + + 9 + + + 1 + + + 4 + + + 3 + + + 2 + + + + 230 + 406 + + 9 + + + 167 + + + + 33 + 0 + + 33 + + + + 0 + 2 + + 2 + + + + 22 + 0 + + 22 + + + + 4 + 1 + + 1 + + + 1 + + + 1 + + + + 3 + 11 + + 4 + + + 1 + + + 3 + + + + 12 + 2 + + 2 + + + 8 + + + + 2 + 0 + + 2 + + + + 0 + 25 + + 25 + + + + 17 + 0 + + 2 + + + 9 + + + 1 + + + 5 + + + + 6 + 0 + + 6 + + + + 0 + 5 + + 5 + + + + + 4616475 + 5938824 + + 00 + 610 + + 610 + + + + 5 + 0 + + 5 + + + + 6 + 0 + + 6 + + + + 0 + 3 + + 3 + + + + 15 + 0 + + 15 + + + + 218 + 00 + + 218 + + + + 2 + 0 + + 2 + + + + 0 + 23 + + 23 + + + + 00 + 328 + + 328 + + + + 1237 + 078 + + 80 + + + 179 + + + + 0 + 94 + + 94 + + + + 0 + 9 + + 9 + + + + 0 + 11 + + 11 + + + + 21 + 0 + + 21 + + + + 00 + 419 + + 419 + + + + 0 + 1 + + 1 + + + + 0 + 7 + + 7 + + + + 3 + 0 + + 3 + + + + 0 + 28 + + 28 + + + + 0 + 5 + + 5 + + + + 61 + 0 + + 61 + + + + 0 + 2 + + 2 + + + + 21 + 0 + + 21 + + + + 17129 + 1635 + + 123 + + + 22 + + + 22 + + + 27 + + + + 17126 + 1526 + + 8 + + + 11 + + + 281 + + + + 741395 + 53824 + + 18 + + + 6 + + + 44 + + + 11 + + + 5 + + + 11 + + + 10 + + + 619 + + + 1186 + + + 20 + + + 3 + + + 13 + + + 5 + + + 18 + + + 4 + + + 1050 + + + 38 + + + 98 + + + 20 + + + 32 + + + + 0 + 5 + + 5 + + + + 0 + 15 + + 15 + + + + 720 + 00 + + 720 + + + + 00 + 110 + + 110 + + + + 12 + 20 + + 4 + + + 4 + + + + + + 0 + 3 + + 0 + 3 + + 2 + 0 + + 2 + + + + 0 + 1 + + 1 + + + + + + 1325001 + 45215776 + + 461340 + 24819 + + 0 + 3 + + 3 + + + + 722 + 313 + + 1 + + + 1 + + + 4 + + + 1 + + + 1 + + + 1 + + + 1 + + + 1 + + + 1 + + + 1 + + + + 357 + 3264 + + 97 + + + 1 + + + 5 + + + 17 + + + 15 + + + 2 + + + 3 + + + 67 + + + + 1 + 0 + + 1 + + + + 436 + 413 + + 5 + + + 1 + + + 1 + + + 1 + + + 3 + + + 1 + + + 2 + + + 8 + + + 1 + + + + 827 + 843 + + 1 + + + 1 + + + 1 + + + 2 + + + 8 + + + 1 + + + 2 + + + + 0 + 3 + + 2 + + + 1 + + + + 149 + 40 + + 21 + + + 88 + + + + + 2 + 0 + + 0 + 2 + + 2 + + + + + 1406440 + 2729435 + + 04 + 114 + + 1 + + + 4 + + + 13 + + + 2 + + + + 2 + 0 + + 1 + + + 1 + + + + 4 + 11 + + 1 + + + 4 + + + 2 + + + + 932043 + 541144 + + 19352 + + + 4 + + + 11388 + + + 9155 + + + + 14 + 25 + + 3 + + + 2 + + + 3 + + + 3 + + + + 2 + 0 + + 2 + + + + 6 + 225 + + 214 + + + 1 + + + 2 + + + 2 + + + + 38673 + 24360 + + 14135 + + + 5 + + + 22 + + + 2 + + + 1 + + + 13 + + + 2 + + + 2 + + + 4 + + + 1 + + + 4 + + + 1 + + + 121 + + + + + + 00 + 3440 + + 3440 + 825 + + 110 + 19 + + 1 + + + + 112 + 122 + + 1 + + + + 0 + 1 + + 1 + + + + 0 + 1 + + 1 + + + + 131 + 00 + + 1 + + + 13 + + + + + + 1 + 0 + + 1 + + + + 1062457 + 51144 + + 36 + 00 + + 1 + + + 00 + 12 + + 1 + + + 1 + + + 1 + + + + 2 + 0 + + 2 + + + + 3 + 0 + + 2 + + + 1 + + + + + 00 + 71 + + 71 + 00 + + 71 + + + + + 5354 + 372283 + + 21869 + 00 + + 21869 + + + + 20 + 23 + + 3 + + + + 1 + 0 + + 1 + + + + 1 + 0 + + 1 + + + + 2756 + 1039 + + 1 + + + 6 + + + 99 + + + 8 + + + 1 + + + + + 26 + 00 + + 00 + 26 + + 26 + + + + + 617 + 02 + + 26 + 29 + + 1 + + + 2 + + + + 01 + 43 + + 41 + + + 1 + + + + 3 + 0 + + 3 + + + + + + 79 + 0 + + 37 + 79 + + 3 + 0 + + 3 + + + + 0 + 10 + + 10 + + + + 27 + 0 + + 27 + + + + 0 + 2 + + 2 + + + + + + 172 + 00 + + 00 + 172 + + 01 + 172 + + 17 + + + 1 + + + + + + + 0 + 15 + + 6 + 0 + + 3 + 6 + + 2 + + + 1 + 0 + + 1 + + + + + + 9 + 1 + + 5 + 0 + + 0 + 5 + + 4 + + + 1 + + + + + 0 + 3 + + 0 + 3 + + 3 + + + + + + + 78259 + 773555 + + 1 + 3 + + 0 + 2 + + 1 + 2 + + 1 + + + + + + 35814 + 03 + + 24 + 35810 + + 00 + 12 + + 1 + + + 1 + + + 1 + + + + 40 + 355 + + 315 + + + + 0 + 1 + + 1 + + + + 0 + 1 + + 1 + + + + 2 + 0 + + 2 + + + + + 1 + 0 + + 0 + 1 + + 1 + + + + + + 127 + 0 + + 127 + 3 + + 124 + 6 + + 118 + + + + + + 0 + 33 + + 33 + 0 + + 33 + 3 + + 17 + + + 1 + + + 12 + + + + + + 1 + 0 + + 1 + + + + 5834 + 01 + + 12 + 5833 + + 1 + 0 + + 1 + + + + 1425 + 5730 + + 212 + + + 11 + + + 13 + + + 82 + + + + + + 5 + 0 + + 5 + 0 + + 5 + 2 + + 1 + + + 1 + + + 1 + + + + + + 00 + 1017 + + 00 + 1017 + + 610 + 1017 + + 1 + + + 1 + + + 45 + + + + + + 00 + 297 + + 1 + 0 + + 1 + 0 + + 1 + + + + + 1 + 3 + + 2 + 0 + + 1 + + + 1 + + + + + 00 + 293 + + 00 + 291 + + 291 + + + + 0 + 2 + + 2 + + + + + + 71 + 00 + + 71 + 00 + + 1 + 0 + + 1 + + + + 2 + 7 + + 4 + + + 1 + + + + + + 2 + 0 + + 0 + 2 + + 2 + 0 + + 2 + + + + + + 0 + 5 + + 0 + 5 + + 2 + 0 + + 2 + + + + 0 + 1 + + 1 + + + + 1 + 0 + + 1 + + + + 0 + 1 + + 1 + + + + + + 15717 + 154 + + 00 + 21 + + 00 + 21 + + 2 + + + 1 + + + + + 13210 + 14012 + + 00 + 32 + + 32 + + + + 1 + + + 4 + + + + + 00 + 7630 + + 7630 + 00 + + 0 + 6 + + 6 + + + + 7030 + 435 + + 1 + + + 1 + + + 1 + + + 9 + + + 262 + + + 11 + + + 1 + + + + + + + 3 + 1 + + 1 + 2 + + 1 + + + + + 102 + 20913 + + 41 + 243 + + 20 + 3 + + 17 + 0 + + 17 + + + + + 1 + 2 + + 1 + 0 + + 1 + + + + + + 0 + 142 + + 142 + 0 + + 139 + 0 + + 139 + + + + 3 + + + + + 1 + 0 + + 1 + 0 + + 1 + 0 + + 1 + + + + + + 337 + 40 + + 0 + 1 + + 0 + 1 + + 1 + + + + + 296 + 90 + + 113 + 02 + + 11 + + + 1 + + + + 9 + 5 + + 3 + + + 1 + + + + 1 + 0 + + 1 + + + + 0 + 1 + + 1 + + + + 1 + 0 + + 1 + + + + + + + + 31 + 00 + + 31 + 00 + + 31 + 00 + + 31 + 00 + + 31 + 00 + + 31 + + + + + + + + 26526 + 42 + + 81 + 00 + + 00 + 81 + + 81 + 00 + + 81 + 01 + + 1 + + + 7 + + + + + + + 1 + 0 + + 1 + 0 + + 0 + 1 + + 1 + 0 + + 1 + + + + + + + 00 + 20820 + + 2711 + 05 + + 11 + 00 + + 10 + 11 + + 1 + + + + + 0 + 2 + + 1 + 0 + + 1 + + + + 0 + 1 + + 1 + + + + + 0 + 1 + + 1 + + + + 1 + 2 + + 1 + + + + 01 + 242 + + 0 + 16 + + 16 + + + + 80 + 81 + + 1 + + + + + + 1819 + 00 + + 17 + 0 + + 0 + 13 + + 13 + + + + 1 + + + 3 + + + + 0 + 17 + + 17 + 0 + + 17 + + + + + 1479 + 00 + + 1478 + 532 + + 5 + + + 2 + + + 6 + + + 3 + + + 35 + + + 1 + + + 1 + + + 2 + + + 20 + + + 1 + + + 1 + + + 3 + + + 1 + + + 341 + + + 11 + + + + 0 + 1 + + 1 + + + + + + + 452 + 01 + + 30 + 401 + + 5 + 30 + + 0 + 2 + + 2 + + + + 0 + 5 + + 5 + + + + 1 + 0 + + 1 + + + + 0 + 1 + + 1 + + + + 2 + 0 + + 2 + + + + 0 + 2 + + 2 + + + + 2 + 0 + + 1 + + + 1 + + + + 0 + 7 + + 4 + + + 3 + + + + 3 + 1 + + 2 + + + + + 11 + 00 + + 11 + 01 + + 1 + + + + + 0 + 1 + + 0 + 1 + + 1 + + + + + 0 + 5 + + 0 + 4 + + 4 + + + + 1 + 0 + + 1 + + + + + + 5 + 0 + + 5 + 0 + + 0 + 5 + + 5 + + + + + + + + 40 + 413 + + 171 + 373 + + 00 + 71 + + 71 + 70 + + 0 + 1 + + 1 + + + + + + 1 + 0 + + 1 + 0 + + 0 + 1 + + 1 + + + + + + 0 + 13 + + 0 + 13 + + 0 + 13 + + 13 + + + + + + + + 124 + 2120 + + 1 + 0 + + 1 + 0 + + 1 + 0 + + 1 + + + + + + 153 + 133 + + 3 + 13 + + 1 + + + 8 + 5 + + 3 + + + + 1 + 0 + + 1 + + + + + 1 + 5 + + 4 + + + + 1 + 0 + + 1 + + + + 1 + 0 + + 0 + 1 + + 1 + + + + + + 0 + 3 + + 1 + 0 + + 1 + 0 + + 1 + + + + + 0 + 2 + + 0 + 2 + + 2 + + + + + + 7 + 37 + + 0 + 16 + + 16 + 3 + + 6 + + + 4 + + + 3 + + + + + 1 + 0 + + 1 + + + + 13 + 0 + + 13 + 0 + + 13 + + + + + + 2 + 0 + + 1 + 0 + + 0 + 1 + + 1 + + + + + 1 + 0 + + 1 + 0 + + 1 + + + + + + + 5 + + + 541587 + 2346 + + 00 + 16 + + 16 + 00 + + 2 + 0 + + 0 + 2 + + 2 + + + + + 0 + 2 + + 0 + 2 + + 2 + + + + + 10 + 12 + + 0 + 2 + + 1 + + + 1 + + + + + + + 0 + 23 + + 23 + 0 + + 3 + 23 + + 0 + 1 + + 1 + + + + 0 + 2 + + 1 + + + 1 + + + + 2 + 0 + + 2 + + + + 14 + 1 + + 13 + + + + 1 + 0 + + 1 + + + + + + + 0 + 32 + + 32 + 9 + + 23 + 0 + + 23 + 0 + + 23 + + + + + + + 538105 + 00 + + 3120 + 538105 + + 123 + 37625 + + 50 + 53 + + 3 + + + + 31 + 65 + + 1 + + + 3 + + + 1 + + + 2 + + + + 542 + 35314 + + 2398 + + + 604 + + + + + 13159 + 021 + + 1 + 0 + + 1 + + + + 1 + 0 + + 1 + + + + 0 + 4 + + 4 + + + + 1 + 0 + + 1 + + + + 0 + 10 + + 10 + + + + 3 + 15 + + 1 + + + 2 + + + 2 + + + 5 + + + 1 + + + 1 + + + + 0 + 3 + + 3 + + + + 250 + 1286 + + 3 + + + 432 + + + 601 + + + + + 1 + 0 + + 1 + 0 + + 1 + + + + + + + 0 + 18 + + 18 + 4 + + 2 + 4 + + 1 + 0 + + 1 + + + + 0 + 1 + + 1 + + + + + 3 + 0 + + 3 + 0 + + 1 + + + 2 + + + + + 1 + 5 + + 2 + 3 + + 1 + + + + 0 + 1 + + 1 + + + + + 1 + 0 + + 1 + 0 + + 1 + + + + + 1 + 0 + + 0 + 1 + + 1 + + + + + + + 0 + 57 + + 57 + 0 + + 57 + 2 + + 54 + 0 + + 54 + + + + 1 + 0 + + 1 + + + + + + + + 0 + 3 + + 3 + 0 + + 1 + 3 + + 0 + 2 + + 0 + 2 + + 2 + + + + + + + + 4716 + 460308 + + 101 + 00 + + 0 + 1 + + 1 + 0 + + 1 + 0 + + 1 + + + + + + 00 + 91 + + 0 + 9 + + 9 + 0 + + 9 + + + + + 1 + 0 + + 0 + 1 + + 1 + + + + + + + 0 + 1 + + 1 + 0 + + 1 + 0 + + 0 + 1 + + 1 + + + + + + + 0 + 1 + + 1 + 0 + + 0 + 1 + + 1 + 0 + + 1 + + + + + + + 140224 + 402290 + + 00 + 322 + + 00 + 322 + + 1 + 0 + + 1 + + + + 212 + 321 + + 1 + + + 1 + + + 1 + + + 1 + + + 1 + + + 1 + + + 2 + + + 2 + + + + + + 77 + 4716 + + 2 + 5 + + 1 + 0 + + 1 + + + + 1 + 0 + + 1 + + + + 1 + 0 + + 1 + + + + + 3 + 0 + + 0 + 2 + + 1 + + + 1 + + + + 1 + + + + 1 + 0 + + 1 + 0 + + 1 + + + + + 40 + 0 + + 40 + 13 + + 22 + + + 5 + + + + + + 00 + 301 + + 301 + 00 + + 20 + 301 + + 141 + + + 12 + + + 1 + + + 1 + + + + + + 04 + 314 + + 1 + 0 + + 1 + 0 + + 1 + + + + + 33 + 00 + + 0 + 1 + + 1 + + + + 2 + 0 + + 2 + + + + 0 + 3 + + 3 + + + + + 2 + 3 + + 1 + 0 + + 1 + + + + + 0 + 3 + + 3 + 0 + + 3 + + + + + + 22 + 00 + + 22 + 00 + + 21 + 01 + + 2 + + + + 0 + 1 + + 1 + + + + + + 2 + 0 + + 2 + 1 + + 1 + + + + + 00 + 1778 + + 1 + 0 + + 1 + 0 + + 1 + + + + + 1777 + 00 + + 00 + 1777 + + 1776 + + + 1 + + + + + + 1 + 0 + + 1 + 0 + + 1 + 0 + + 1 + + + + + + + + 0 + 1 + + 1 + 0 + + 0 + 1 + + 0 + 1 + + 0 + 1 + + 1 + + + + + + + + 0 + 14 + + 14 + 0 + + 14 + 0 + + 0 + 14 + + 14 + 0 + + 14 + + + + + + + + 0 + 2 + + 0 + 2 + + 2 + + + + + 0 + 7 + + 7 + 0 + + 7 + 0 + + 0 + 7 + + 7 + + + + + + + 59 + 2141 + + 3 + 0 + + 3 + 0 + + 3 + 2 + + 0 + 1 + + 1 + + + + + + + 1629 + 00 + + 1629 + 27 + + 1421 + 44 + + 1016 + 07 + + 27 + + + 82 + + + + 1 + 0 + + 1 + + + + + 0 + 1 + + 1 + + + + + + + + 1053991002347 + + + 395 + 00 + + 0 + 39 + + 0 + 39 + + 0 + 39 + + 39 + 0 + + 39 + 0 + + 39 + + + + + + + + 1 + 0 + + 0 + 1 + + 1 + 0 + + 0 + 1 + + 1 + + + + + + + 00 + 02 + + 00 + 02 + + 02 + 00 + + 01 + + + 0 + 1 + + 1 + + + + + + + 2 + 0 + + 0 + 2 + + 2 + 0 + + 0 + 2 + + 2 + 0 + + 2 + + + + + + + + + 1 + 0 + + 0 + 1 + + 0 + 1 + + 1 + 0 + + 1 + 0 + + 0 + 1 + + 1 + + + + + + + + + +
diff --git a/topics/microbiome/tutorials/diversity/tutorial.bib b/topics/microbiome/tutorials/diversity/tutorial.bib new file mode 100644 index 00000000000000..8e94717cf303a7 --- /dev/null +++ b/topics/microbiome/tutorials/diversity/tutorial.bib @@ -0,0 +1,205 @@ +% This file was created with Citavi 6.14.4.0 + +@article{Berger.1970, + abstract = {The diversity of a planktonic foraminiferal assemblage on the ocean floor depends on the state of preservation of that assemblage. As dissolution progresses, species diversity (number of species in the assemblage) decreases, but compound diversity (based on relative species abundance) first increases and then decreases; species dominance first decreases and then increases. The reason for these changes is that the species most susceptible to solution deliver moresediment to the ocean floor than do species with solution-resistant shells, possibly because the more soluble tests are produced in surface waters, where growth and production are greatest.}, + author = {Berger, W. H. and Parker, F. L.}, + year = {1970}, + title = {Diversity of planktonic foraminifera in deep-sea sediments}, + pages = {1345--1347}, + volume = {168}, + number = {3937}, + issn = {0036-8075}, + journal = {Science (New York, N.Y.)}, + doi = {10.1126/science.168.3937.1345.} +} + + +@article{Bolyen.2019, + author = {Bolyen, Evan and Rideout, Jai Ram and Dillon, Matthew R. and Bokulich, Nicholas A. and Abnet, Christian C. and Al-Ghalith, Gabriel A. and Alexander, Harriet and Alm, Eric J. and Arumugam, Manimozhiyan and Asnicar, Francesco and Bai, Yang and Bisanz, Jordan E. and Bittinger, Kyle and Brejnrod, Asker and Brislawn, Colin J. and Brown, C. Titus and Callahan, Benjamin J. and Caraballo-Rodr{\'i}guez, Andr{\'e}s Mauricio and Chase, John and Cope, Emily K. and {Da Silva}, Ricardo and Diener, Christian and Dorrestein, Pieter C. and Douglas, Gavin M. and Durall, Daniel M. and Duvallet, Claire and Edwardson, Christian F. and Ernst, Madeleine and Estaki, Mehrbod and Fouquier, Jennifer and Gauglitz, Julia M. and Gibbons, Sean M. and Gibson, Deanna L. and Gonzalez, Antonio and Gorlick, Kestrel and Guo, Jiarong and Hillmann, Benjamin and Holmes, Susan and Holste, Hannes and Huttenhower, Curtis and Huttley, Gavin A. and Janssen, Stefan and Jarmusch, Alan K. and Jiang, Lingjing and Kaehler, Benjamin D. and Kang, Kyo Bin and Keefe, Christopher R. and Keim, Paul and Kelley, Scott T. and Knights, Dan and Koester, Irina and Kosciolek, Tomasz and Kreps, Jorden and Langille, Morgan G. I. and Lee, Joslynn and Ley, Ruth and Liu, Yong-Xin and Loftfield, Erikka and Lozupone, Catherine and Maher, Massoud and Marotz, Clarisse and Martin, Bryan D. and McDonald, Daniel and McIver, Lauren J. and Melnik, Alexey V. and Metcalf, Jessica L. and Morgan, Sydney C. and Morton, Jamie T. and Naimey, Ahmad Turan and Navas-Molina, Jose A. and Nothias, Louis Felix and Orchanian, Stephanie B. and Pearson, Talima and Peoples, Samuel L. and Petras, Daniel and Preuss, Mary Lai and Pruesse, Elmar and Rasmussen, Lasse Buur and Rivers, Adam and Robeson, Michael S. and Rosenthal, Patrick and Segata, Nicola and Shaffer, Michael and Shiffer, Arron and Sinha, Rashmi and Song, Se Jin and Spear, John R. and Swafford, Austin D. and Thompson, Luke R. and Torres, Pedro J. and Trinh, Pauline and Tripathi, Anupriya and Turnbaugh, Peter J. and Ul-Hasan, Sabah and {van der Hooft}, Justin J. J. and Vargas, Fernando and V{\'a}zquez-Baeza, Yoshiki and Vogtmann, Emily and von Hippel, Max and Walters, William and Wan, Yunhu and Wang, Mingxun and Warren, Jonathan and Weber, Kyle C. and Williamson, Charles H. D. and Willis, Amy D. and Xu, Zhenjiang Zech and Zaneveld, Jesse R. and Zhang, Yilong and Zhu, Qiyun and Knight, Rob and Caporaso, J. Gregory}, + year = {2019}, + title = {Reproducible, interactive, scalable and extensible microbiome data science using QIIME 2}, + pages = {852--857}, + volume = {37}, + number = {8}, + journal = {Nature biotechnology}, + doi = {10.1038/s41587-019-0209-9} +} + + +@article{Bray.1957, + author = {Bray, J. Roger and Curtis, J. T.}, + year = {1957}, + title = {An Ordination of the Upland Forest Communities of Southern Wisconsin}, + pages = {325--349}, + volume = {27}, + number = {4}, + issn = {0012-9615}, + journal = {Ecological Monographs}, + doi = {10.2307/1942268} +} + + +@article{Chao.1992, + author = {Chao, Anne and Lee, Shen-Ming}, + year = {1992}, + title = {Estimating the Number of Classes via Sample Coverage}, + pages = {210--217}, + volume = {87}, + number = {417}, + issn = {0162-1459}, + journal = {Journal of the American Statistical Association}, + doi = {10.1080/01621459.1992.10475194} +} + + +@article{Fisher.1943, + author = {Fisher, R. A. and Corbet, A. Steven and Williams, C. B.}, + year = {1943}, + title = {The Relation Between the Number of Species and the Number of Individuals in a Random Sample of an Animal Population}, + pages = {42}, + volume = {12}, + number = {1}, + issn = {00218790}, + journal = {The Journal of Animal Ecology}, + doi = {10.2307/1411} +} + + +@article{Jaccard.1912, + author = {Jaccard, Paul}, + year = {1912}, + title = {THE DISTRIBUTION OF THE FLORA IN THE ALPINE ZONE.1}, + pages = {37--50}, + volume = {11}, + number = {2}, + issn = {0028-646X}, + journal = {New Phytologist}, + doi = {10.1111/j.1469-8137.1912.tb05611.x} +} + +@article{Margalef.1969, + author = {Margalef, R.}, + year = {1969}, + title = {Perspectives in Ecological Theory}, + pages = {571}, + volume = {20}, + number = {2}, + issn = {00301299}, + journal = {Oikos}, + doi = {10.2307/3543237} +} + + +@article{Pielou.1966, + author = {Pielou, E. C.}, + year = {1966}, + title = {The measurement of diversity in different types of biological collections}, + pages = {131--144}, + volume = {13}, + issn = {00225193}, + journal = {Journal of Theoretical Biology}, + doi = {10.1016/0022-5193(66)90013-0} +} + + +@article{Shannon.1948, + author = {Shannon, C. E.}, + year = {1948}, + title = {A Mathematical Theory of Communication}, + pages = {379--423}, + volume = {27}, + number = {3}, + issn = {00058580}, + journal = {Bell System Technical Journal}, + doi = {10.1002/j.1538-7305.1948.tb01338.x} +} + + +@article{SIMPSON.1949, + author = {SIMPSON, E. H.}, + year = {1949}, + title = {Measurement of Diversity}, + pages = {688}, + volume = {163}, + number = {4148}, + issn = {0028-0836}, + journal = {Nature}, + doi = {10.1038/163688a0} +} + + +@article{Srensen.1948, + author = {S{\o}rensen, T.}, + year = {1948}, + title = {A method of establishing groups of equal amplitude in plant sociology based on similarity of species and its application to analyses of the vegetation on Danish commons}, + pages = {1--34}, + number = {5}, + journal = {Kongelige Danske Videnskabernes Selskab.} + link = {https://www.royalacademy.dk/Publications/High/295_S%C3%B8rensen,%20Thorvald.pdf} +} + + +@article{BonillaRosso.2012, + abstract = {Metagenomics holds the promise of greatly advancing the study of diversity in natural communities, but novel theoretical and methodological approaches must first be developed and adjusted for these data sets. We evaluated widely used macroecological metrics of taxonomic diversity on a simulated set of metagenomic samples, using phylogenetically meaningful protein-coding genes as ecological proxies. To our knowledge, this is the first approach of this kind to evaluate taxonomic diversity metrics derived from metagenomic data sets. We demonstrate that abundance matrices derived from protein-coding marker genes reproduce more faithfully the structure of the original community than those derived from SSU-rRNA gene. We also found that the most commonly used diversity metrics are biased estimators of community structure and differ significantly from their corresponding real parameters and that these biases are most likely caused by insufficient sampling and differences in community phylogenetic composition. Our results suggest that the ranking of samples using multidimensional metrics makes a good qualitative alternative for contrasting community structure and that these comparisons can be greatly improved with the incorporation of metrics for both community structure and phylogenetic diversity. These findings will help to achieve a standardized framework for community diversity comparisons derived from metagenomic data sets.}, + author = {Bonilla-Rosso, Germ{\'a}n and Eguiarte, Luis E. and Romero, David and Travisano, Michael and Souza, Valeria}, + year = {2012}, + title = {Understanding microbial community diversity metrics derived from metagenomes: performance evaluation using simulated data sets}, + pages = {37--49}, + volume = {82}, + number = {1}, + journal = {FEMS microbiology ecology}, + doi = {10.1111/j.1574-6941.2012.01405.x} +} + + +@article{Chao.2015, + author = {Chao, Anne and Jost, Lou}, + year = {2015}, + title = {Estimating diversity and entropy profiles via discovery rates of new species}, + pages = {873--882}, + volume = {6}, + number = {8}, + issn = {2041-210X}, + journal = {Methods in Ecology and Evolution}, + doi = {10.1111/2041-210X.12349} +} + + +@article{Hill.1973, + author = {Hill, M. O.}, + year = {1973}, + title = {Diversity and Evenness: A Unifying Notation and Its Consequences}, + pages = {427--432}, + volume = {54}, + number = {2}, + issn = {00129658}, + journal = {Ecology}, + doi = {10.2307/1934352} +} + +@article{Finotello.2018, + abstract = {The human microbiota is a complex ecological community of commensal, symbiotic and pathogenic microorganisms harboured by the human body. Next-generation sequencing (NGS) technologies, in particular targeted amplicon sequencing of the 16S ribosomal RNA gene (16S-seq), are enabling the identification and quantification of human-resident microorganisms at unprecedented resolution, providing novel insights into the role of the microbiota in health and disease. Once microbial abundances are quantified through NGS data analysis, diversity indices provide valuable mathematical tools to describe the ecological complexity of a single sample or to detect species differences between samples. However, diversity is not a determined physical quantity for which a consensus definition and unit of measure have been established, and several diversity indices are currently available. Furthermore, they were originally developed for macroecology and their robustness to the possible bias introduced by sequencing has not been characterized so far. To assist the reader with the selection and interpretation of diversity measures, we review a panel of broadly used indices, describing their mathematical formulations, purposes and properties, and characterize their behaviour and criticalities in dependence of the data features using simulated data as ground truth. In addition, we make available an R package, DiversitySeq, which implements in a unified framework the full panel of diversity indices and a simulator of 16S-seq data, and thus represents a valuable resource for the analysis of diversity from NGS count data and for the benchmarking of computational methods for 16S-seq.}, + author = {Finotello, Francesca and Mastrorilli, Eleonora and {Di Camillo}, Barbara}, + year = {2018}, + title = {Measuring the diversity of the human microbiota with targeted next-generation sequencing}, + pages = {679--692}, + volume = {19}, + number = {4}, + journal = {Briefings in bioinformatics}, + doi = {10.1093/bib/bbw119} +} + +@article{Bolyen.2019, + author = {Bolyen, Evan and Rideout, Jai Ram and Dillon, Matthew R. and Bokulich, Nicholas A. and Abnet, Christian C. and Al-Ghalith, Gabriel A. and Alexander, Harriet and Alm, Eric J. and Arumugam, Manimozhiyan and Asnicar, Francesco and Bai, Yang and Bisanz, Jordan E. and Bittinger, Kyle and Brejnrod, Asker and Brislawn, Colin J. and Brown, C. Titus and Callahan, Benjamin J. and Caraballo-Rodr{\'i}guez, Andr{\'e}s Mauricio and Chase, John and Cope, Emily K. and {Da Silva}, Ricardo and Diener, Christian and Dorrestein, Pieter C. and Douglas, Gavin M. and Durall, Daniel M. and Duvallet, Claire and Edwardson, Christian F. and Ernst, Madeleine and Estaki, Mehrbod and Fouquier, Jennifer and Gauglitz, Julia M. and Gibbons, Sean M. and Gibson, Deanna L. and Gonzalez, Antonio and Gorlick, Kestrel and Guo, Jiarong and Hillmann, Benjamin and Holmes, Susan and Holste, Hannes and Huttenhower, Curtis and Huttley, Gavin A. and Janssen, Stefan and Jarmusch, Alan K. and Jiang, Lingjing and Kaehler, Benjamin D. and Kang, Kyo Bin and Keefe, Christopher R. and Keim, Paul and Kelley, Scott T. and Knights, Dan and Koester, Irina and Kosciolek, Tomasz and Kreps, Jorden and Langille, Morgan G. I. and Lee, Joslynn and Ley, Ruth and Liu, Yong-Xin and Loftfield, Erikka and Lozupone, Catherine and Maher, Massoud and Marotz, Clarisse and Martin, Bryan D. and McDonald, Daniel and McIver, Lauren J. and Melnik, Alexey V. and Metcalf, Jessica L. and Morgan, Sydney C. and Morton, Jamie T. and Naimey, Ahmad Turan and Navas-Molina, Jose A. and Nothias, Louis Felix and Orchanian, Stephanie B. and Pearson, Talima and Peoples, Samuel L. and Petras, Daniel and Preuss, Mary Lai and Pruesse, Elmar and Rasmussen, Lasse Buur and Rivers, Adam and Robeson, Michael S. and Rosenthal, Patrick and Segata, Nicola and Shaffer, Michael and Shiffer, Arron and Sinha, Rashmi and Song, Se Jin and Spear, John R. and Swafford, Austin D. and Thompson, Luke R. and Torres, Pedro J. and Trinh, Pauline and Tripathi, Anupriya and Turnbaugh, Peter J. and Ul-Hasan, Sabah and {van der Hooft}, Justin J. J. and Vargas, Fernando and V{\'a}zquez-Baeza, Yoshiki and Vogtmann, Emily and von Hippel, Max and Walters, William and Wan, Yunhu and Wang, Mingxun and Warren, Jonathan and Weber, Kyle C. and Williamson, Charles H. D. and Willis, Amy D. and Xu, Zhenjiang Zech and Zaneveld, Jesse R. and Zhang, Yilong and Zhu, Qiyun and Knight, Rob and Caporaso, J. Gregory}, + year = {2019}, + title = {Reproducible, interactive, scalable and extensible microbiome data science using QIIME 2}, + pages = {852--857}, + volume = {37}, + number = {8}, + journal = {Nature biotechnology}, + doi = {10.1038/s41587-019-0209-9} +} + + + + diff --git a/topics/microbiome/tutorials/diversity/tutorial.md b/topics/microbiome/tutorials/diversity/tutorial.md new file mode 100644 index 00000000000000..0de5d62644899e --- /dev/null +++ b/topics/microbiome/tutorials/diversity/tutorial.md @@ -0,0 +1,437 @@ +--- +layout: tutorial_hands_on +title: Calculating α and β diversity from microbiome taxonomic data +zenodo_link: xxx +questions: +- How many different taxons are present in my sample? How do I additionally take their relative abundance into account? +- How similar or how dissimilar are my samples in term of taxonomic diversity? +- What are the different metrics used to calculate the taxonomic diversity of my samples? +objectives: +- Explain what taxonomic diversity is +- Explain different metrics to calculate α and β diversity +- Apply Krakentools to calculate α and β diversity and understand the output +level: Introductory +key_points: +- There are 2 different types of diversity metrics (α and β diversity) +- Krakentools can be used in Galaxy for calculating the diversity +time_estimation: 20M +contributions: + authorship: + - sophia120199 + - bebatut +tags: +- metagenomics +- diversity +--- + +# Introduction + +A **diversity index** is a quantitative measure that is used to assess the level of diversity or variety within a particular system, such as a biological community, a population, or a workplace. It provides a way to capture and quantify the distribution of different types or categories within a system. + +In various fields, diversity indexes are employed to understand and compare the composition and richness of various elements. Apart from ecology, fields such as social and cultural science are interested in the diversity within a population or workplace. In these cases, the indexes may consider factors like age, gender, ethnicity, or other relevant characteristics to assess the diversity and inclusiveness of a group or organization. + +To study microbiome data, indirect methods like **metagenomics** can be used. Metagenomic samples contain DNA from different organisms at a specific site, where the sample was collected. Metagenomic data can be used to find out which organisms coexist in that niche and which genes are present in the different organisms. + +Once we know which taxons are present in a metagenomic sample ([Tutorial on Taxonomic Profiling and Visualization of Metagenomic Data]({% link topics/metagenomics/tutorials/taxonomic-profiling/tutorial.md %}])), we can do diversity analyses. + +Related to ecology, the term **diversity** describes the number of different species present in one particular area and their relative abundance. More specifically, several different metrics of diversity can be calculated. The most common ones are α, β and γ diversity: + +- **α diversity** describes the diversity within a community + + It considers the number of different species in an environment (also referred to as species **richness**). Additionally, it can take the abundance of each species into account to measure how evenly individuals are distributed across the sample (also referred to as species **evenness**). + +- - **β diversity** compare the diversity between different communities by measuring their distance + +- **γ diversity** is a measure of the overall diversity for the different ecosystems within a region. + +![α, β and γ diversity](./images/diversity_differences.png) + + +In this analysis we will use Galaxy for calculating different alpha diversity indexes and the Bray-Curtis dissimilarity index for β diversity. + +# Background on data + +The dataset we will use for this tutorial comes from an oasis in the Mexican desert called Cuatro Ciénegas ({% cite Okie.2020 %}). The researchers were interested in genomic traits that affect the rates and costs of biochemical information processing within cells. They performed a whole-ecosystem experiment, thus fertilizing the pond to achieve nutrient enriched conditions. + +Here we will use 2 datasets: +- `JP4D`: a microbiome sample collected from the Lagunita Fertilized Pond +- `JC1A`: a **control** samples from a control mesocosm. + +The datasets differ in size, but according to the authors this doesn't matter for their analysis of genomic traits. Also, they underline that differences between the two samples reflect trait-mediated ecological dynamics instead of microevolutionary changes as the duration of the experiment was only 32 days. This means that depending on available nutrients, specific lineages within the pond grow more successfully than others because of their genomic traits. The samples have been analysed as explained in the [Taxonomic profiling tutorial]({% link topics/sequence-analysis/tutorials/taxonomic-profiling/tutorial.md %}). + +In a nutshell, taxonomic labels have been assigned to the metagenomics data using [Kraken2](toolshed.g2.bx.psu.edu/repos/iuc/kraken2/kraken2/2.1.1+galaxy1) to find out which species are present in the samples. Finally, species abundance was estimated using [Bracken](toolshed.g2.bx.psu.edu/repos/iuc/bracken/est_abundance/2.7+galaxy1). For this tutorial, we will use the output file of Bracken. + +To get an overview, you can find a Krona chart visualizing the different species present in the two samples. + + + +The dataset we will work with in this tutorial is the output file of Bracken, which estimates species abundance. + +``name taxonomy_id taxonomy_lvl kraken_assigned_reads added_reads new_est_reads fraction_total_reads +Paracoccus sp. MC1862 2760307 S 98 4 102 0.00169 +Paracoccus sp. AK26 2589076 S 85 8 93 0.00154 +Paracoccus sp. Arc7-R13 2500532 S 67 13 80 0.00133 +Paracoccus sp. BM15 1529068 S 27 1 28 0.00046 +Paracoccus sanguinis 1545044 S 142 37 179 0.00297 +Paracoccus contaminans 1945662 S 87 18 105 0.00174 +Paracoccus aminovorans 34004 S 86 26 112 0.00186`` + +> +> +> What information do the different columns contain? +> +> > +> > +> > 1. species name +> > 2. taxonomy ID +> > 3. taxonomic level: K_kingdom, P_phylum, C_class, O_order, F_family, G_genus, and S_species +> > 4. reads assigned by Kraken +> > 5. additional reads added by Bracken: In order to estimate species abundance, Bracken reestimates the reads assigned by Kraken using bayesian reestimation. For details on the procedure, have a look into the [Bracken publication](https://peerj.com/articles/cs-104/). +> > 6. sum of column 4 and column 5 +> > 7. fraction of the reads assigned to the particular species and the total reads +> > +> {: .solution} +{: .question} + + +> More details on using input other than Bracken +> +> It is possible to use Krakentools to calculate a and b diversity also on other datasets than the Bracken output. Any tool that outputs taxonomy abundances can be used prior to the diversity analysis. Importantly, the respective output file needs to be converted into the correct table format and filtered for the taxonomic rank "species". This step is not necessary when using Bracken output as already only the species level is listed. +> +> xxx hands on: filter on specific taxonomic level using filter on column tool in galaxy +> xxx show example of kraken and metaphlan output file +> +{: .details} + +> +> +> In this tutorial, we will cover: +> +> 1. TOC +> {:toc} +> +{: .agenda} + +# Prepare Galaxy and data + +Any analysis should get its own Galaxy history. So let's start by creating a new one: + +> Data upload +> +> 1. Create a new history for this analysis +> +> {% snippet faqs/galaxy/histories_create_new.md %} +> +> 2. Rename the history +> +> {% snippet faqs/galaxy/histories_rename.md %} +> +{: .hands_on} + +We need now to import the data + +> Import datasets +> +> 1. Import the following samples via link from [Zenodo]({{ page.zenodo_link }}) or Galaxy shared data libraries: +> +> ```text +> {{ page.zenodo_link }}/files/xxx +> {{ page.zenodo_link }}/files/xxx +> ``` +> +> {% snippet faqs/galaxy/datasets_import_via_link.md %} +> {% snippet faqs/galaxy/datasets_import_from_data_library.md %} +> +> 2. Create a paired collection. +> +> {% snippet faqs/galaxy/collections_build_list_paired.md %} +> +{: .hands_on} + + +# Calculating α diversity + +**α diversity** describes the diversity within a community. There are several different indexes used to calculate α diversity because different indexes capture different aspects of diversity and have varying sensitivities to different factors. These indexes have been developed to address specific research questions, account for different ecological or population characteristics, or highlight certain aspects of diversity. + +![α diversity](./images/alphadiversity_metrics.png)(https://medium.com/pjtorres-high-gut-alpha-diversity-and-health/high-alpha-diversity-and-health-65e5eca7fa36) + +Metrics of alpha diversity can be grouped into different classes: +**richness**: estimate the quantity of distinct species within a sample +- **Margalef’s richness**, which indicates the estimated species richness, accounting for the community size. This metric takes into account that a larger community size can support a greater number of species ({% cite Margalef.1969 %}) + + $$ D = \frac{(S - 1)}{\Log(n)} $$ + + With + - $$S$$ the total number of species, + - $$n$$ the total number of individuals in the sample +- **Chao1**, which estimates the true species richness or diversity of a community, particularly when there might be rare or unobserved species. Chao1 estimates the number of unobserved species based on the number of singletons and doubletons. It assumes that there are additional rare species that are likely to exist but have not been observed. The estimation considers the number of unobserved singletons and doubletons and incorporates them into the observed species richness to provide an estimate of the true species richness ({% cite Chao.1992 %}). + + $$ Schao1 = Sobs + \frac{n1(n1 - 1)}{2(n2 + 1)} $$ + + With: + - $$Sobs$$ the observed species richness, + - $$n1$$ the number of species represented by a single individual (singletons), + - $$n2$$ the number of species represented by two individuals (doubletons). + +- **ACE** (Abundance-based Coverage Estimator), which takes into account the abundance distribution of observed species and incorporates the presence of rare or unobserved species. ACE estimates the number of unobserved species based on the abundance distribution and incorporates it into the observed species richness. It takes into account the relative rarity of observed species and uses this information to estimate the true species richness. + + + +**evenness**: evaluate the relative abundances of species rather than their total count + +- **Pielou’s evenness**, which quantifies how close the community’s diversity is to the maximum possible diversity. This index is calculated by taking the Shannon Diversity Index (which measures the overall diversity of the community) and dividing it by the maximum possible diversity given the observed species richness ({%cite Pielou.1966 %}). + + $$ J = \frac{H'}{ln(S)} $$ + + With: + - $$H'$$ Shannon Weiner diversity + - $$S$$ the total number of species in a sample, across all samples in dataset. + + + +**diversity**: incorporate both the relative abundances and total count of distinct species + - **Shannons** index, which calculates the uncertainty in predicting the species identity of an individual that is selected from a community ({% cite Shannon.1948 %}). + + $$ H' = -∑i=1S pi \* ln(pi) $$ + + pi = proportion of individuals of species i, and ln is the natural logarithm, and S = species richness. +- **Berger-Parker** index, which expresses the proportional importance of the most abundant type. Highly biased by sample size and richness ({% cite Berger.1970 %} ). + + $$ D = nmax/N $$ + + max is the abundance of the most dominant species, and N is the total number of individuals (sum of all abundances). +- **Simpsons** index, which calculates the probability that two individuals selected from a community will be of the same species. Obtains small values in datasets of high diversity and large values in datasets of low diversity ({% cite SIMPSON.1949 %}). + + $$ D = ∑i=1S (ni/N)2 $$ + + ni is the number of individuals in species i, N = total number of individuals of all species, and ni/N = pi (proportion of individuals of species i), and S = species richness. + +- **Inverse Simpons** index, which is the transformation of Simpsons index that increases with increasing diversity. +- **Fishers alpha** index, which describes the relationship between the number of species and the number of individuals in those species. Parametric index of diversity that assumes that the abundance of species follows a log series distribution ({% cite Fisher.1943 %}). + + $$ S\=a\*ln(1+n/a) $$ + + S is number of taxa, n is number of individuals and a is the Fisher's alpha. + +![richness and evenness](./images/alpha_diversity_richness_evenness.png) + + + +**KrakenTools** is a suite of scripts designed to help Kraken users with downstream analysis of Kraken results. The Krakentool **Calculate alpha diversity** offers the possibility to calculate five different alpha diversity indexes: +1. Shannon's alpha diversity +2. Berger Parker's alpha +3. Simpson's diversity +4. Inverse Simpson's diversity +5. Fisher's index + +> Calculate α diversity with Krakentools +> 1. {% tool [Krakentools: Calculate alpha diversity]([toolshed.g2.bx.psu.edu/view/iuc/krakentools_alpha_diversity/9d0330e23bfd)) %} with the following parameters: +> - *"Abundance file"*: `Dataset Collection`: uploaded Bracken output file +> - *"Specify alpha diversity type"*: `Shannon's alpha diversity` +> +{: .hands_on} + + + +> +> +> 1. Calculate the 5 different alpha indexes available in Krakentools and compare the results. What do these numbers tell you? +> 2. Are the results consistent among the different indexes? +> +> > +> > +> > +> > 1. The five alpha indexes available in Krakentools are: Shannon's alpha diversity, Berger Parker's alpha, Simpson's diversity, Inverse Simpson's diversity, Fisher's index. Below you can find a table comparing the different results for both the JC1A and JP4D sample as well as an explanation of the meaning of these values. +> > +> > | | JC1A | JP4D | +> > | --------------- | --------- | --------- | +> > | Shannon | 5,3441 | 6,4429 | +> > | Berger-Parker | 0,2299 | 0,0581 | +> > | Simpson | 0,9401 | 0,9926 | +> > | Inverse Simpson | 16,6941 | 136,0287 | +> > | Fisher | 3240,0957 | 9163,5027 | +> > +> > +> > When the **Shannon index** is given as a value of 5, it indicates a **relatively high level of diversity** within the community. The index ranges from 0 to a maximum value that depends on the number of species and their relative abundances. The higher the Shannon index value, the greater the diversity within the community. +> > +> > When the **Berger-Parker index** is given as a value of 0.23, it suggests that **a single species dominates the community**, as it represents **23 %** of the total individuals in the community. This indicates a relatively low level of species evenness, meaning that the abundance of individuals is heavily skewed towards one dominant species. In contrast to the Shannon index, which considers both species richness and evenness, the Berger-Parker index emphasizes the dominance of a particular species. A value of 0.23 indicates that the community is heavily influenced by one species, while the other species in the community are less abundant. In the case of JP4D, the dominant species accounts for only **5 %** of the total individuals, which implies a **more balanced distribution of individuals** among different species compared to a higher Berger-Parker index value. +> > +> > When the **Simpson's index** is given as a value of 0.94, it indicates a **high level of species diversity and evenness** within the community. The index ranges from 0 to 1, with 1 representing maximum diversity. Therefore, a Simpson's index of 0.94 suggests that the community is highly diverse, with a relatively even distribution of individuals among different species. In other words, the value of 0.94 indicates that if you were to randomly select two individuals from the community, there is a 94% probability that they would belong to different species. This implies a rich and balanced community where multiple species coexist in relatively equal abundance. +> > +> > When the **Inverse Simpson's index** is given as a value of **16.69**, it suggests a **relatively low level of species diversity** within the community. The index ranges from 1 to the total number of species in the community, with higher values indicating higher diversity. Therefore, a value of 16.69 indicates a lower diversity compared to a higher index value. An Inverse Simpson's index of 136 suggests a relatively high level of species diversity within the community. The index ranges from 1 to the total number of species in the community, with higher values indicating greater diversity. Therefore, a value of **136 indicates a higher diversity compared to a lower index value**. The Inverse Simpson's index is the reciprocal of the Simpson's index, which quantifies species diversity and evenness within a community. A higher Inverse Simpson's index value signifies a community with a greater number of species and a more even distribution of individuals among those species. +> > +> > +> > 2. The results are consistent as all indexes show JP4D to be the more diverse sample compared to JC1A. +> {: .solution} +> +{: .question} + + +> +> +> Apart from Krakentools, there are two more tools available in Galaxy that can be used to calculate diversity indexes, QIIME2 ({% cite Bolyen.2019 %}) and [Vegan](https://github.com/vegandevs/vegan). +> +> +> QIIME 2 (Quantitative Insights Into Microbial Ecology 2) is a powerful open-source bioinformatics software package that provides a comprehensive suite of tools and methods for processing, analyzing, and visualizing microbiome data. It offers a modular approach to microbiome analysis, allowing researchers to build flexible analysis pipelines tailored to their specific research goals. The software supports a wide range of data types, including 16S rRNA gene sequencing, metagenomics, metatranscriptomics, and others. +> +> Some of the key features and functionalities of QIIME 2 include: +> 1. Diversity Analysis: QIIME 2 allows users to explore and quantify microbial diversity within and between samples. It provides metrics for alpha diversity (within-sample diversity) and beta diversity (between-sample diversity). +> 2. Data Import and Preprocessing +> 3. Taxonomic Assignments +> 4. Community Analysis +> 5. Phylogenetic Analysis +> 6. Statistical Analysiss. +> 7. Visualization +> +> The vegan package is a community ecology package in the R programming language. It provides a wide range of tools and methods for analyzing and interpreting ecological data, particularly in the context of community ecology. The package is designed to handle multivariate data and offers various statistical techniques for studying species composition, diversity, and community dynamics. +> +> The vegan package encompasses several functionalities, including: +> +> 1. Diversity Analysis: vegan offers numerous diversity indices, such as species richness, Shannon diversity index, Simpson index, and many others. These indices allow researchers to quantify the diversity of species within a community and compare diversity between different samples or groups. +> 2. Ordination Techniques +> 3. Community Classification: +> 4. Ecological Network Analysis +> 5. Ecological Indices +> 6. Plotting and Visualization +> +{: .comment} + +# Calculating β diversity + +**β diversity** measures the distance between two or more separate entities. It therefore describes the difference between two communities or ecosystems. + +There are **multiple indexes** used to calculate β diversity because different indexes emphasize different aspects of compositional dissimilarity between communities or sites. + +These indexes have been developed to address specific research questions, accommodate different data types, or provide insights into different dimensions of β diversity. Below, you can find a list of commonly used indexes to calculate β diversity and their description. + +- **Jaccard Index**, which measures the proportion of shared species between two samples ({% cite Jaccard.1912 %}). + + $$ J(X, Y) = \| X ∩ Y\| / \| X ∪ Y\| $$ + +With: +- $$X ∩ Y$$ the intersection of sets X and Y (elements common to both sets) +- $$X ∪ Y$$ the union of sets X and Y (all unique elements from both sets combined) + +- **Sørensen Index**, which is similar to Jaccard Index, but accounts for species abundance ({% cite Srensen.1948 %}). + + $$ DSC = 2\| X ∩ Y\| / \| X\| + \| Y\| $$ + +With: +- $$X ∩ Y$$ the intersection of sets X and Y (elements common to both sets) +- $$ \| X\| and \| Y \|$$ the cardinalities of the two sets (i.e. the number of elements in each set) + +- **Bray-Curtis Dissimilarity**, which measures the dissimilarity of species abundances between two samples ({% cite Bray.1957 %}). + + $$ BCij = 1 - (2Cij / (Si + Sj)) $$ + +With: +- $$Cij$$ the sum of the absolute differences in abundances between corresponding species in samples i and j +- $$Si$$ the total abundance or sum of species abundances in sample i +- $$Sj$$ the total abundance or sum of species abundances in sample j + +- **Kulczynski Dissimilarity**, which masures the dissimilarity in the proportional abundances of shared species. + + $$ D = 1 - (SAB / (SA + SB - 2SAB)) $$ + +With: +- $$SAB$$ the number of shared OTUs between communities A and B +- $$SA$$ the number of OTUs in community A +- $$SB$$ the number of OTUs in community B + +- **UniFrac**, which incorporates information on phylogenetic distances between observed species in the computation. Can be calculated either weighted (accounts for abundances) or unweighted (accounts only for richness). + + ![UniFrac](./images/unifrac.png) + + + +> Calculate β diversity with Krakentools +> 1. {% tool [Krakentools: Calculate beta diversity (Bray-Curtis dissimilarity)]([https://toolshed.g2.bx.psu.edu/view/iuc/krakentools_beta_diversity/b33f117e9b67]) %} with the following parameters: +> - *"Taxonomy file"*: `Dataset Collection`: uploaded Bracken output file +> - *"Specify type of input file"*: `Bracken species abundance file` +> +{: .hands_on} + + +> +> +> 1. What is the Bray-Curtis dissimilarity calculated for the two samples? +> 2. What does this number tell you? +> +> > +> > +> > 1. The output file gives you a table comparing sample 0 and 1, respectively. Consequently, comparing 0 to 0 and 1 to 1 results in a dissimilarity of 0, as those are exactly the same. Comparing sample 0 to sample 1 shows a Bray-Curtis dissimilarity of 0.701. +> > 2. The Bray-Curtis dissimilarity measures the dissimiliraty of two samples. Consequently, an output of 0 represents two samples that are exactly the same, while an output of 1 means they are maximally divergent. In our case, a Bray-Curtis dissimilarity of 0.7 suggests that there is a substantial difference in the species composition or abundances between the two communities being compared. The higher the dissimilarity value, the greater the difference in species composition or abundances. +> {: .solution} +> +{: .question} + +# Evaluation of different diversity metrics + +Bonilla-Rosso et al. did **performance evaluation** of different diversity metrics using simulated data sets {% cite BonillaRosso.2012 %}. For details on the individual metrics, please check their [publication](https://pubmed.ncbi.nlm.nih.gov/22554028/). However, it is important to note that none of the estimated metrics showed statistical similarity to their corresponding parameters in the source communities. Moreover, the results obtained were inconsistent across the samples. + +This **inconsistency** can be attributed to the fact that individual metrics only provide a **specific** perspective on diversity and are prone to bias in their estimation, leading to incorrect ranking of the samples. + +In summary, **relying solely on single-diversity metrics may not be enough** to accurately compare the diversity between two communities. Instead, we recommend utilizing multi-dimensional metrics to capture diverse rankings across different scales of diversity, which can be affected differently in manipulative studies. + +**Multidimensional diversity metrics**, also known as multivariate diversity metrics, are quantitative measures that capture multiple dimensions or aspects of diversity simultaneously. These metrics go beyond single-diversity metrics, such as species richness or Shannon entropy, which provide a one-dimensional representation of diversity. + +In contrast, multidimensional diversity metrics **take into account various attributes or characteristics** of species or communities to provide a more comprehensive understanding of diversity. These attributes can include species abundances, functional traits, phylogenetic relationships, or spatial distributions. + +The choice and composition of dimensions in multidimensional diversity metrics depend on the research context and the specific objectives of the study. Some aspects, multidimensional diversity metrics take into account, include: +1. **Functional Diversity**: This metric considers the range and variation of functional traits among species within a community. It assesses the diversity of ecological roles and functional strategies present, contributing to ecosystem functioning and resilience. +2. **Phylogenetic Diversity**: This metric incorporates the evolutionary relationships among species within a community. It quantifies the diversity based on the length and topology of the phylogenetic tree, highlighting the evolutionary history and relatedness of species. +3. **Spatial Diversity**: This metric incorporates spatial patterns and distributions of species within a landscape or ecosystem. It considers the heterogeneity of habitats, connectivity, and the arrangement of species populations across space. + +Multidimensional diversity metrics offer a **more nuanced and holistic perspective** on biodiversity, capturing different facets and dimensions of ecological variation. They provide insights into the ecological processes shaping communities and ecosystems and can be valuable in conservation planning, ecosystem management, and understanding the functional implications of biodiversity patterns. + +> More details on Multidimensional diversity metrics and Parameter q +> +> ## Multidimensional diversity metrics and Parameter q +> +> Expressing the compositional complexity of an assemblage cannot be accomplished with a single numerical value. Traditional measures like diversities (Hill numbers) and entropies (Rényi entropies) vary in their **order q**, which determines **the extent to which rare or common species are emphasized**. The ranking and comparison of assemblages rely on the chosen value of q. +> +> Instead of selecting a few measures to describe an assemblage, it is preferable to **present a continuous profile** that depicts diversity or entropy as a function of q (where q ≥ 0). This approach enables a visual comparison of the compositional complexities among multiple assemblages and facilitates the assessment of the evenness in the relative abundance distributions of the assemblages. In practice, the profile is typically plotted for values of q ranging from 0 to q = 3 or 4, beyond which there is usually little change. +> +> ![Parameter q](./images/hill_numbers.png) +> https://www.redalyc.org/journal/5117/511766773011/html/ +> +> # Hill numbers +> +> Hill numbers, also known as diversity indices or diversity measures, are mathematical metrics used to quantify the diversity or richness of a biological community. They were developed by ecologist Robert H. Whittaker and are widely used in ecology and biodiversity studies. +> +> Hill numbers provide a way to summarize and compare the diversity of different communities based on the abundance or occurrence of different species within those communities. These numbers take into account both the number of species present and their relative abundances. The higher the Hill number, the greater the diversity or richness of the community. +> +> Hill numbers are often represented by the symbol "D", followed by a subscript that indicates the order of diversity. The order of diversity determines the weight given to rare versus common species. Commonly used Hill numbers include: +> +> 1. Species richness (D₀): This is the simplest Hill number and represents the total number of species in a community, without considering their abundances. It provides a basic measure of biodiversity based on species count. +> 2. Shannon diversity index (D₁): This index incorporates both species richness and evenness. It takes into account both the number of species and their relative abundances, providing a more comprehensive measure of diversity. +> 3. Simpson diversity index (D₂): This index focuses on the dominance or concentration of species within a community. It considers both species richness and the probability that two individuals randomly selected from the community belong to the same species. +> +> # Rényi entropy +> +> Rényi entropy is a concept in information theory and statistical physics introduced by Alfréd Rényi, a Hungarian mathematician. It is a generalization of the Shannon entropy, which measures the uncertainty or information content of a random variable or probability distribution. +> The Rényi entropy of a discrete probability distribution is defined by the parameter α, which determines the order of the entropy. The formula for calculating Rényi entropy is: +> +> $$ Hα(P) = 1 / (1 - α) * log₂(∑(i=1 to N) pi^α) $$ +> +> where P = {p₁, p₂, ..., pN} is the probability distribution of N discrete events or states, and pi represents the probability of the ith event. +> +> The value of α determines the properties of Rényi entropy. When α = 1, Rényi entropy reduces to Shannon entropy, providing a measure of average uncertainty or information content. As α approaches 0, Rényi entropy converges to the minimum value, representing the most certain or least diverse distribution. Conversely, as α approaches infinity, Rényi entropy approaches the maximum value, indicating a uniform or maximally diverse distribution. +> +> Rényi entropy has applications in various fields, including information theory, statistical physics, and data analysis. It offers a way to quantify the diversity or randomness of a system beyond the traditional Shannon entropy, allowing for a more nuanced understanding of information content and structure. +> +{: .details} + + +For further information on how to choose the best diversity metric also check + +1. [Measuring the diversity of the human microbiota with targeted next-generation sequencing](https://academic.oup.com/bib/article/19/4/679/2871295?utm_source=pocket_reader) {% cite BonillaRosso.2012 %} +2. [Understanding microbial community diversity metrics derived from metagenomes: performance evaluation using simulated data sets](https://academic.oup.com/femsec/article/82/1/37/567182?utm_source=pocket_reader) {% cite Finotello.2018 %} + +# Conclusion + +In this tutorial, we look how to calculate α and β diversity from microbiome data. We apply **Krakentools** to calculate the α and β diversity of two microbiome sample datasets. + + + + diff --git a/topics/microbiome/tutorials/diversity/workflow/test b/topics/microbiome/tutorials/diversity/workflow/test new file mode 100644 index 00000000000000..8b137891791fe9 --- /dev/null +++ b/topics/microbiome/tutorials/diversity/workflow/test @@ -0,0 +1 @@ +