-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsampling_results.Rmd
81 lines (64 loc) · 2.38 KB
/
sampling_results.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Sampling evaluation
```{r}
library(data.table)
ground_truth <- fread("simulated/data/manifest.csv")[sample_id != "negative"]
setnames(ground_truth, "relative_abundance", "real_relative")
ground_truth[, "expected_reads" := real_relative * depth]
ground_truth
```
```{r}
inferred <- fread("simulated/data/food_abundance.csv") |> unique(by=c("sample_id", "matched_taxid"))
inferred
```
```{r}
merged <- merge(ground_truth[type=="food"], inferred, by.x=c("sample_id", "id"), by.y=c("sample_id", "matched_taxid"), all=T)
merged[is.na(real_relative), real_relative:=0]
merged[is.na(expected_reads), expected_reads:=0]
merged[, within_genus := genus %in% genus[real_relative > 0], by="sample_id"]
merged[, within_family := family %in% family[real_relative > 0], by="sample_id"]
merged[is.na(reads), reads:=0]
merged
```
```{r, fig.width=6, fig.height=4}
library(ggplot2)
theme_minimal() |> theme_set()
corrs <- merged[, .(
r=cor(log10(expected_reads + 1), log10(reads + 1))
), by="sample_id"]
corrs[, "label" := sprintf("r=%.2f", r), by="sample_id"]
options(scipen=3)
merged[, "type" := tstrsplit(sample_id, "_")[[1]]]
ggplot(merged) +
aes(x=expected_reads+1.0, y=reads+1.0) +
stat_smooth(data=merged, method="lm", color="black", linewidth=1, aes(group=1)) +
#geom_boxplot(width=0.4, aes(group=factor(expected_reads)), fill="white", color="black", outlier.color=NA) +
geom_jitter(stroke=0.5, shape=21, size=2, aes(fill=type), width=0.05) +
#geom_line(aes(group=genus), color="gray") +
scale_x_log10() + scale_y_log10() +
guides(fill=FALSE) + #facet_wrap(~ sample_id) +
labs(x="expected [reads + 1]", y="observed [reads + 1]")
ggsave("figures/simulated_comparison.pdf", width=6, height=4)
```
```{r}
cor.test(~ log10(expected_reads + 1) + log10(reads + 1), data=merged[reads>0])
corrs[, summary(r)]
```
```{r, fig.width=3, fig.height=2}
probs <- merged[expected_reads > 0, .(prob=sum(reads > 0)/16), by="real_relative"]
ggplot(probs) +
aes(x=real_relative, y=prob) +
geom_point() +
geom_line() +
scale_x_log10(limits=c(NA, 0.11)) +
labs(x="relative abundance [reads/total]", y="sensitivity [P(detection)]")
ggsave("figures/detection_probability.pdf", width=3, height=2)
```
```{r}
summary(merged[expected_reads>0 & reads==0, expected_reads])
```
## Figure data
```{r}
library(openxlsx)
fig2 <- list(`simulation results`=merged, proportions=probs)
write.xlsx(fig2, "figure_data/fig2.xlsx")
```