Skip to content

Commit

Permalink
Fixed possible issue with sentence splitting when annotating texts
Browse files Browse the repository at this point in the history
  • Loading branch information
henriquesposito committed Aug 23, 2024
1 parent 5b1e078 commit e7ba84f
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 1 deletion.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ importFrom(stringr,str_detect)
importFrom(stringr,str_extract)
importFrom(stringr,str_extract_all)
importFrom(stringr,str_remove_all)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_squish)
importFrom(textstem,lemmatize_strings)
importFrom(tidyr,unite)
3 changes: 2 additions & 1 deletion R/text_tools.R
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ read_pdf <- function(path) {
#' Defaults to "words".
#' @import spacyr
#' @importFrom dplyr group_by summarise ungroup %>%
#' @importFrom stringr str_squish
#' @importFrom stringr str_squish str_replace_all
#' @return A data frame with syntax information by words or sentences in text.
#' @examples
#' #annotate_text(US_News_Conferences_1960_1980[1:2, 3])
Expand All @@ -328,6 +328,7 @@ read_pdf <- function(path) {
annotate_text <- function(v, level = "words") {
doc_id <- sentence_id <- token_id <- token <- pos <- tag <- lemma <- entity <- NULL
suppressWarnings(spacyr::spacy_initialize(model = "en_core_web_sm"))
v <- stringr::str_replace_all(v, "\\.\\,|\\. \\,|\\,\\.|\\, \\.", ".")
parse <- spacyr::spacy_parse(v, tag = TRUE)
suppressWarnings(spacyr::spacy_finalize())
if (level == "sentences" | level == "sentence") {
Expand Down

0 comments on commit e7ba84f

Please sign in to comment.