diff --git a/NAMESPACE b/NAMESPACE index f0dda5b..9e998d4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,7 +4,6 @@ export(GeomToken) export(GeomTurn) export(add_lines) export(calculate_timing) -export(check_quality) export(geom_token) export(geom_turn) export(init) diff --git a/R/source_quality.R b/R/source_quality.R deleted file mode 100644 index 83320da..0000000 --- a/R/source_quality.R +++ /dev/null @@ -1,40 +0,0 @@ -#' Check the quality of a specific source -#' -#' Quality measure that is considered: the total duration of utterances in ms -#' must not be equal to the number of utterances. -#' -#' @param data dataset including the columns source, begin, and end -#' @param source the source that is verified -#' -#' @return 0 if the quality is good, 1 if the quality is bad -#' @export -check_quality <- function(data, source){ - check_columns(data, c("source", "begin", "end")) - - # extract only the source we want to check - data <- data[data$source == source,] - if(nrow(data) == 0){ - stop(paste("Source", source, "not found in dataset.")) - } - - # number of utterances - nturns <- nrow(data) - - # duration of spoken utterances - data$duration <- data$end - data$begin - total_duration <- sum(data$duration, na.rm = TRUE) - - # if the total number of utterances is the same as the length of the spoken utterances - # that means that they all last one millisecond on average - # this is a sign of bad quality - quality <- ifelse(total_duration == nturns, 0, 1) - - if(quality == 0){ - cat(paste("WARNING: Bad data quality for source", source, "\n")) - } - - return(quality) -} - - - diff --git a/man/check_quality.Rd b/man/check_quality.Rd deleted file mode 100644 index bf2d173..0000000 --- a/man/check_quality.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/source_quality.R -\name{check_quality} -\alias{check_quality} -\title{Check the quality of a specific source} -\usage{ -check_quality(data, source) -} -\arguments{ -\item{data}{dataset including the columns source, begin, and end} - -\item{source}{the source that is verified} -} -\value{ -0 if the quality is good, 1 if the quality is bad -} -\description{ -Quality measure that is considered: the total duration of utterances in ms -must not be equal to the number of utterances. -} diff --git a/vignettes/original_demo.Rmd b/vignettes/original_demo.Rmd deleted file mode 100644 index 7175209..0000000 --- a/vignettes/original_demo.Rmd +++ /dev/null @@ -1,64 +0,0 @@ ---- -title: "original_demo" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{original_demo} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -library(convplot) - - -# Sample data for ElPaCo eScience project -# MD 202302 - - - -# Packages & functions ---------------------------------------------------- - -# Packages -list.of.packages <- c("tidyverse","ggthemes","ggrepel","knitr","data.table","viridis","ggridges","cowplot") -new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] -if(length(new.packages)) install.packages(new.packages) -lapply(list.of.packages, require, character.only=T) - -# Load functions -source("helper-functions.R") - - -# Load data --------------------------------------------------------------- - -# subset of six languages and 5 source files per language -# originates in: elpaco-lingdiv-branch-escience.R -d <- read_csv("data/d_sample.csv") - -# metadata of corpora included in sample -d.metadata <- read_csv("data/d_sample_metadata.csv") - -# tokenized data (generated using white space tokenizer without further assumptions) -d.tokens <- read_csv("data/d_sample_tokens.csv") - - -# Quick demo of existing code --------------------------------------------- - -# Warts and all! - -# inspect_corpus(): a quick quantitative + qualitative peek at a corpus -# Liesenfeld & Dingemanse LREC - -inspect_corpus(lang="dutch") -inspect_corpus(lang="siwu") - -# convplot(): function to display time-aligned 'piano roll' style plot of a conversation -# Dingemanse & Liesenfeld ACL - -convplot(n=10,content=T,dyads=T) -