Skip to content

Commit

Permalink
Update vignettes with new makeSignatures function and bugphyzz import.
Browse files Browse the repository at this point in the history
  • Loading branch information
sdgamboa committed Jan 10, 2024
1 parent 0032b6d commit 8cfd107
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 59 deletions.
46 changes: 14 additions & 32 deletions vignettes/articles/attributes.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,20 @@ knitr::opts_chunk$set(
library(DT)
library(bugphyzz)
library(dplyr)
library(purrr)
```

```{r}
bp <- importBugphyzz(version = 'devel', force_download = TRUE)
bp <- bp |>
filter(
!Evidence %in% c('asr', 'inh')
)
x <- bp |>
select(Attribute_source, Attribute_group, Attribute) |>
filter(!grepl(';', Attribute_source)) |>
count(Attribute_source, Attribute_group) |>
mutate(new_col = paste0(Attribute_group, ' (', n, ')'))
bp <- importBugphyzz(version = 'devel')
x <- map(bp, ~ {
.x |>
select(Attribute_group, Attribute, Attribute_source) |>
count(Attribute_group, Attribute, Attribute_source, name = "N_annotations") |>
distinct()
}) |>
bind_rows() |>
filter(!is.na(Attribute_source)) |> # Attribute source with NAs means it was obtained through ASR or TAX
relocate(Attribute_source, Attribute_group, Attribute, N_annotations)
```

## Attributes
Expand Down Expand Up @@ -80,11 +81,6 @@ datt
## Sources

```{r}
x <- x |>
select(-Attribute_group, -n) |>
group_by(Attribute_source) |>
summarise(Attribute_group = paste(sort(new_col), collapse = '; ')) |>
ungroup()
fname2 <- system.file(
'extdata/attribute_sources.tsv',
package = 'bugphyzz', mustWork = TRUE
Expand All @@ -93,23 +89,9 @@ src <- read.table(
fname2, header = TRUE, sep = '\t', quote = ""
)
src <- left_join(x, src, by = 'Attribute_source') |>
relocate(
Attribute_source, Confidence_in_curation, Evidence,
Attribute_group, full_source
) |>
mutate(
Confidence_in_curation = factor(
Confidence_in_curation, levels = c('high', 'medium', 'low')
)
) |>
arrange(Confidence_in_curation, Evidence, Attribute_source)
xsrc <- left_join(x, src, by = 'Attribute_source') |>
rename(Full_source = full_source)
colnames(src) <- c(
'Source (short)', 'Confidence in curation', 'Evidence*', 'Attribute group**',
'Full source'
)
caption2 <- paste0(
'Table 2. Sources of attribute annotations in bugphyzz. ',
'* Evidence codes: exp = experimental evidence, igc = inferred from genomic context, ',
Expand All @@ -120,7 +102,7 @@ caption2 <- paste0(
)
src_dt <- datatable(
data = src,
data = xsrc,
filter = "top",
extensions = c("Buttons","KeyTable"),
# caption = caption2,
Expand Down
62 changes: 35 additions & 27 deletions vignettes/bugphyzz.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ also be accessed by direct download at **>>> insert Zenodo link here <<<**.

# Import bugphyzz

```{r, message=FALSE, eval=FALSE}
```{r, eval=FALSE}
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
Expand All @@ -37,62 +37,70 @@ library(purrr)
```

```{r import data}
bp <- importBugphyzz(version = 'devel', force_download = TRUE)
head(bp)
bp <- importBugphyzz(version = 'devel')
head(map(bp, head))
```

## Explore wich attribute groups are available

```{r}
unique(bp$Attribute_group)
names(bp)
```

## Explore which attribute signatures are available
# Create signatures

## Create signatures of taxids at the genus level for aerophilicity

```{r}
head(unique(bp$Attribute))
aer_sigs_g <- makeSignatures(
dat = bp[["aerophilicity"]], tax_id_type = "NCBI_ID", tax_level = "genus"
)
map(aer_sigs_g, head)
```

# Creating signatures

## Create signatures of taxids at the genus level for aerophilicity
## Create signatures of taxa names at the species level for growth temperature

```{r}
aer <- bp[which(bp$Attribute_group == 'aerophilicity'),]
sigs1 <- getBugphyzzSignatures(
df = aer, tax.id.type = 'NCBI_ID', tax.level = 'genus', min.size = 10
gt_sigs_sp <- makeSignatures(
dat = bp[["growth temperature"]], tax_id_type = "Taxon_name",
tax_level = 'species'
)
map(sigs1, head)
map(gt_sigs_sp, head)
```

## Create signatures of taxa names at the species level for optimal ph
## Create signatures with custom threshold for numeric attributes

```{r}
op <- bp[bp$Attribute_group == 'optimal ph', ]
sigs2 <- getBugphyzzSignatures(
df = op, tax.id.type = 'Taxon_name', tax.level = 'species', min.size = 10
gt_sigs_mix <- makeSignatures(
dat = bp[["growth temperature"]], tax_id_type = "Taxon_name",
tax_level = "mixed", min = 0, max = 25
)
map(sigs2, head)
map(gt_sigs_mix, head)
```


# Other examples
## Create signatures for a binary attribute

```{r}
mot <- bp[bp$Attribute_group == 'motility',]
sigs3 <- getBugphyzzSignatures(
df = mot, tax.id.type = 'Taxon_name', tax.level = 'mixed'
ap_sigs_mix <- makeSignatures(
dat = bp[["animal pathogen"]], tax_id_type = "NCBI_ID",
tax_level = "mixed", evidence = c("exp", "igc", "nas", "tas")
)
lapply(sigs3, head)
map(ap_sigs_mix, head)
```

# Merge examples above
## Make signatures for all datasets with a single function call

```{r}
sigs <- c(sigs1, sigs2, sigs3)
lapply(sigs, head)
sigs <- map(bp, makeSignatures) |>
list_flatten()
length(sigs)
```


```{r}
head(map(sigs, head))
```

## Session information:

```{r}
Expand Down

0 comments on commit 8cfd107

Please sign in to comment.