henriquesposito · henriquesposito · Jul 15, 2024 · Mar 9, 2023 · May 23, 2023 · Feb 20, 2024
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -5,3 +5,7 @@
 ^.github$
 ^.gitignore$
 ^inst$
+.DS_Store
+^_pkgdown\.yml$
+^docs$
+^pkgdown$
diff --git a/.github/workflows/prchecks.yml b/.github/workflows/prchecks.yml
@@ -2,7 +2,6 @@ on:
   pull_request:
     branches:
       - master
-      - main
 
 name: Binary checks
 
@@ -41,6 +40,7 @@ jobs:
             any::rcmdcheck
             any::lintr
             any::spelling
+            any::covr
           needs: check
 
       - uses: r-lib/actions/check-r-package@v2
@@ -63,6 +63,9 @@ jobs:
           name: ${{ matrix.config.asset_name }}
           path: build/
 
+      - name: Calculate code coverage
+        run: Rscript -e "covr::codecov()"
+
       - name: Lint
         run: lintr::lint_package()
         shell: Rscript {0}

diff --git a/.github/workflows/pushrelease.yml b/.github/workflows/pushrelease.yml
@@ -1,7 +1,6 @@
 on:
   push:
     branches:
-      - main
       - master
 
 name: Check and release
@@ -39,6 +38,7 @@ jobs:
           cache-version: 2
           extra-packages: |
             any::rcmdcheck
+            any::covr
             any::remotes
           needs: check
 
@@ -62,6 +62,9 @@ jobs:
           name: ${{ matrix.config.asset_name }}
           path: build/
 
+      - name: Calculate code coverage
+        run: Rscript -e "covr::codecov()"
+
   release:
     name: Bump version and release
     needs: build
@@ -78,7 +81,6 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           WITH_V: true
-          DEFAULT_BUMP: patch
 
       - name: Checkout two
         uses: actions/checkout@v2
@@ -101,7 +103,7 @@ jobs:
         run: ls -R
 
       - name: Rename Mac release
-        run: mv ./macOS/*.tgz pkg_macOS.tgz
+        run: mv ./macOS/*.tgz poldis_macOS.tgz
 
       - name: Upload Mac binary
         id: upload-mac
@@ -110,12 +112,12 @@ jobs:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         with:
           upload_url: ${{ steps.create_release.outputs.upload_url }}
-          asset_path: pkg_macOS.tgz
-          asset_name: pkg_macOS.tgz
+          asset_path: poldis_macOS.tgz
+          asset_name: poldis_macOS.tgz
           asset_content_type: application/zip
 
       - name: Rename Linux release
-        run: mv ./linuxOS/*.tar.gz pkg_linuxOS.tar.gz
+        run: mv ./linuxOS/*.tar.gz poldis_linuxOS.tar.gz
 
       - name: Upload Linux binary
         id: upload-lin
@@ -124,12 +126,12 @@ jobs:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         with:
           upload_url: ${{ steps.create_release.outputs.upload_url }}
-          asset_path: pkg_linuxOS.tar.gz
-          asset_name: pkg_linuxOS.tar.gz
+          asset_path: poldis_linuxOS.tar.gz
+          asset_name: poldis_linuxOS.tar.gz
           asset_content_type: application/zip
 
       - name: Rename Windows release
-        run: mv ./winOS/*.zip pkg_winOS.zip
+        run: mv ./winOS/*.zip poldis_winOS.zip
 
       - name: Upload Windows binary
         id: upload-win
@@ -138,8 +140,8 @@ jobs:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         with:
           upload_url: ${{ steps.create_release.outputs.upload_url }}
-          asset_path: pkg_winOS.zip
-          asset_name: pkg_winOS.zip
+          asset_path: poldis_winOS.zip
+          asset_name: poldis_winOS.zip
           asset_content_type: application/zip
 
   render:
@@ -151,16 +153,17 @@ jobs:
 
       - uses: r-lib/actions/setup-r@v2
 
-      - uses: r-lib/actions/setup-pandoc@v1
+      - uses: r-lib/actions/setup-pandoc@v2
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
           cache-version: 2
+          extra-packages: |
+            any::rcmdcheck
+            any::covr
+            any::remotes
           needs: check
 
-      - name: Install package
-        run: R CMD INSTALL .
-
       # Render README.md using rmarkdown
       - name: render README
         run: Rscript -e 'rmarkdown::render("README.Rmd", output_format = "md_document")'
@@ -170,3 +173,33 @@ jobs:
           git add README.md
           git commit -m "Re-build README.md" || echo "No changes to commit"
           git push origin master || echo "No changes to commit"
+
+  pkgdown:
+    name: Build and deploy website
+    needs: render
+    runs-on: macOS-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - uses: actions/checkout@v2
+
+      - uses: r-lib/actions/setup-r@v2
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          cache-version: 2
+          extra-packages: |
+            any::rcmdcheck
+            any::pkgdown
+          needs: check
+
+      - name: Install package
+        run: R CMD INSTALL .
+
+      - name: Deploy package
+        run: |
+          git config --local user.email "[email protected]"
+          git config --local user.name "GitHub Actions"
+          Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)'
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,5 @@
 .Rhistory
 .RData
 .Ruserdata
+.DS_Store
+docs
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,13 +1,22 @@
 Package: poldis
 Type: Package
 Title: Tools for Analyzing Political Discourse
-Version: 0.0.3
-Date: 2022-09-25
-Author: person(given = "Henrique",
-           family = "Sposito", 
-           role = c("cre", "aut", "ctb"),
-           email = "[email protected]",
-           ORDCID = "0000-0003-3420-6085")
+Version: 0.1.0
+Date: 2024-06-14
+Author: c(person(given = "Henrique",
+                 family = "Sposito", 
+                 role = c("cre", "aut", "ctb"),
+                 email = "[email protected]",
+                 ORDCID = c("IHEID", "0000-0003-3420-6085")),
+          person(given = "James",
+                 family = "Hollway",
+                 role = c("ctb"),
+                 email = "[email protected]",
+                 comment = c("IHEID", ORCID = "0000-0002-8361-9647")),
+          person(given = "Jael",
+                 family = "Tan",
+                 role = "ctb",
+                 comment = c("IHEID", ORCID = "0000-0002-6234-9764")))
 Maintainer: Henrique Sposito <[email protected]>
 Description: Tools for analyzing political discourse beyond official speeches.
 License: MIT + file LICENSE
@@ -16,18 +25,24 @@ Imports:
     stringr,
     purrr,
     stringi,
-    messydates,
-    remotes,
-    stringdist,
-    entity
+    quanteda,
+    spacyr,
+    tm,
+    textstem,
+    tidyr,
+    stringdist
 Suggests:
     rmarkdown,
     testthat,
-    covr
-Remotes:
-    trinker/entity
-RoxygenNote: 7.2.0
+    covr,
+    tesseract,
+    pkgdown,
+    quanteda.textstats,
+    keyATM,
+    messydates,
+    pdftools
+RoxygenNote: 7.3.1
 Encoding: UTF-8
 LazyData: True
 Depends: 
-    R (>= 2.10)
+    R (>= 3.5.0)
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,16 +1,37 @@
 # Generated by roxygen2: do not edit by hand
 
+export(annotate_text)
 export(extract_context)
-export(extract_location)
+export(extract_date)
+export(extract_locations)
 export(extract_match)
-export(extract_speaker)
-export(extract_split)
+export(extract_names)
+export(extract_text_similarities)
 export(extract_title)
+export(gather_related_terms)
+export(gather_topics)
+export(get_urgency)
+export(read_pdf)
+export(select_promises)
+export(split_text)
+import(dplyr)
+import(quanteda)
+import(spacyr)
+importFrom(dplyr,"%>%")
 importFrom(dplyr,distinct)
-importFrom(entity,person_entity)
+importFrom(dplyr,filter)
+importFrom(dplyr,group_by)
+importFrom(dplyr,mutate)
+importFrom(dplyr,select)
+importFrom(dplyr,summarise)
+importFrom(dplyr,summarize)
+importFrom(dplyr,ungroup)
 importFrom(purrr,map_chr)
 importFrom(stringdist,stringsimmatrix)
 importFrom(stringi,stri_trans_general)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_extract)
 importFrom(stringr,str_extract_all)
+importFrom(stringr,str_remove_all)
+importFrom(stringr,str_squish)
+importFrom(tidyr,unite)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,24 @@
+# poldis 0.1.0
+
+2024-06-14
+
+## Package
+
+- Closed #3 by adding code coverage and code factor (and badges) to package
+- Closed #7 by adding a getting started section in README
+- Closed #8 by adding a `{pkgdown}` website
+
+## Functions
+
+- Updated text tools
+  - Renamed old text functions to start with "extract_" (`extract_speaker()`, `extract_title()`, `extract_context()`, `extract_date()`, `extract_location()`, `extract_match()`)
+  - Closed #11 by adding `extract_similarities()` to fuzzy match texts
+  - Added `annotate_text()` function to classify words or sentences using NLP
+  - Added `load_pdfs()` function to help users loading readable and non-readable text files from PDFs
+- Closed #14 by adding `select_promises()` function to extract future promises in text  
+- Closed #15 by adding `gather_topics()` and `gather_related_terms()` for assigning topics to texts
+- Added `get_urgency()` function for coding urgency from text
+
 # poldis 0.0.3
 
 2022-09-25

diff --git a/R/promises.R b/R/promises.R
@@ -0,0 +1,47 @@
+#' Select future promises from political discourses
+#'
+#' Political promises are statements in which actors express their
+#' intent or commitment to take political action in the future.
+#' @param .data A (annotated) data frame or text vector.
+#' For data frames, function will search for "text" variable.
+#' For annotated data frames, please declare an annotated data frame
+#' at the sentence level.
+#' @importFrom stringr str_detect str_remove_all
+#' @importFrom dplyr mutate distinct %>%
+#' @examples
+#' #select_promises(US_News_Conferences_1960_1980[1:2,3])
+#' @return A data frame with syntax information by sentences and
+#' a variable identifying which of these sentences are promises.
+#' @export
+select_promises <- function(.data) {
+  tags <- sentence <- lemmas <- promises <- NULL
+  if (inherits(.data, "data.frame")) {
+    if ("token_id" %in% names(.data))
+      stop("Please declare a text vector or an annotated data frame at the sentence level.")
+  } else .data <- suppressMessages(annotate_text(.data, level = "sentences"))
+  out <- .data %>%
+    dplyr::mutate(lemmas = tolower(lemmas),
+                  promises = ifelse(stringr::str_detect(tags, "PRP MD ")|
+                                      stringr::str_detect(lemmas,
+                                                          "going to|go to |need to|ready to|
+                                     |is time to|commit to|promise to|have to|
+                                     |plan to|intend to|let 's|let us|urge|
+                                     |require|want to"),
+                                    paste(sentence), NA), # detect promises
+                  promises = ifelse(stringr::str_detect(promises, " not |
+                                                        |yesterday|last week|
+                                                        |last month|last year|
+                                                        |thank|honor|honour|
+                                                        |applause|greet|laugh|
+                                                        |privilege to|great to|
+                                                        |good to be|good to see") |
+                                      stringr::str_detect(tags, "MD VB( RB)? VBN|
+                                                         |VBD( RB)? VBN|VBZ( RB)? VBN|
+                                                         |VBD( RB)? JJ|PRP( RB)? VBD TO|
+                                                         |VBN( RB)? VBN"),
+                                    # Combinations of NLP tags to select
+                                    NA, promises)) %>%
+    dplyr::distinct()
+  class(out) <- c("promises", class(out))
+  out
+}
diff --git a/R/sysdata.rda b/R/sysdata.rda