Nicolás Schmidt, Diego Luján, Juan Andrés Moraes
Converts the floor speeches of Uruguayan legislators, extracted from the parliamentary minutes, to tidy data.frame where each observation is the intervention of a single legislator.
# Install speech from CRAN
install.packages("speech")
# The development version from GitHub:
if (!require("remotes")) install.packages("remotes")
remotes::install_github("Nicolas-Schmidt/speech")
You can see more complex examples in the following link.
library(speech)
url <- "https://parlamento.gub.uy/documentosyleyes/documentos/diarios-de-sesion/6084/IMG"
text <- speech::speech_build(file = url)
text
#> # A tibble: 24 × 7
#> legislator speech chamber date legislature id sex
#> <chr> <chr> <chr> <date> <int> <chr> <dbl>
#> 1 BORDABERRY SEÑOR BORDABERRY. Pido… COMISI… 2019-09-17 48 2019… 1
#> 2 BORDABERRY SEÑOR BORDABERRY. Prop… COMISI… 2019-09-17 48 2019… 1
#> 3 AVIAGA SEÑORA AVIAGA. Pido la… COMISI… 2019-09-17 48 2019… 0
#> 4 AVIAGA SEÑORA AVIAGA. En el m… COMISI… 2019-09-17 48 2019… 0
#> 5 GOÑI SEÑOR GOÑI. Pido la pa… COMISI… 2019-09-17 48 2019… 1
#> 6 GOÑI SEÑOR GOÑI. El Frente … COMISI… 2019-09-17 48 2019… 1
#> 7 MAHIA SEÑOR MAHIA. Pido la p… COMISI… 2019-09-17 48 2019… 1
#> 8 MAHIA SEÑOR MAHIA. Gracias, … COMISI… 2019-09-17 48 2019… 1
#> 9 ABDALA SEÑOR ABDALA. Voto por… COMISI… 2019-09-17 48 2019… 1
#> 10 ASTI SEÑOR ASTI. Obviamente… COMISI… 2019-09-17 48 2019… 1
#> # ℹ 14 more rows
speech_check(text, initial = c("A", "M"))
#> $A
#> legislator
#> 1 ABDALA
#> 2 ASTI
#> 3 AVIAGA
#>
#> $M
#> legislator
#> 1 MAHIA
#> 2 MERONI
text <- speech::speech_build(file = url, compiler = TRUE)
text
#> # A tibble: 11 × 7
#> legislator legislature chamber date id speech sex
#> <chr> <int> <chr> <date> <chr> <chr> <dbl>
#> 1 ABDALA 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 1
#> 2 ASTI 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 1
#> 3 AVIAGA 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 0
#> 4 BORDABERRY 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 1
#> 5 GOÑI 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 1
#> 6 LAZO 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 0
#> 7 MAHIA 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 1
#> 8 MERONI 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 1
#> 9 PEREYRA 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 0
#> 10 TOURNE 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 0
#> 11 VIERA 48 COMISION PERMANENTE 2019-09-17 20190917c… SEÑOR… 1
text$word <- speech_word_count(text$speech)
dplyr::glimpse(text)
#> Rows: 11
#> Columns: 8
#> $ legislator <chr> "ABDALA", "ASTI", "AVIAGA", "BORDABERRY", "GOÑI", "LAZO", …
#> $ legislature <int> 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48
#> $ chamber <chr> "COMISION PERMANENTE", "COMISION PERMANENTE", "COMISION PE…
#> $ date <date> 2019-09-17, 2019-09-17, 2019-09-17, 2019-09-17, 2019-09-17…
#> $ id <chr> "20190917c0001", "20190917c0001", "20190917c0001", "20190…
#> $ speech <chr> "SEÑOR ABDALA. Voto por la señora legisladora Daisy Tourné…
#> $ sex <dbl> 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1
#> $ word <int> 398, 46, 105, 951, 98, 103, 126, 12, 12, 111, 8
library(magrittr)
minchar <- function(string, min = 3){
string <- stringr::str_remove_all(string, "[[:punct:]]")
string <- unlist(strsplit(string, " "))
string[nchar(string) > min]
}
text$speech %>%
minchar(., min = 4) %>%
quanteda::corpus() %>%
quanteda::dfm(remove = c("señor", "señora")) %>%
quanteda.textplots::textplot_wordcloud(color = rev(RColorBrewer::brewer.pal(10, "RdBu")))
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.2.3
text$speech %>%
minchar(., min = 4) %>%
tibble::enframe() %>%
tidytext::unnest_tokens(word, value) %>%
dplyr::count(word, sort = TRUE) %>%
dplyr::mutate(word = stats::reorder(word, n)) %>%
dplyr::filter(!stringr::str_detect(word, "^señor")) %>%
.[1:40,] %>%
ggplot(aes(word, n)) +
geom_col(col = "black", fill = "#00A08A", width = .7) +
labs(x = "", y = "") +
coord_flip() +
theme_minimal()
urls <- speech_url(chamber = "D", days = c("2002-06-12", "2004-04-14"))
rollcall <- speech_rollcall(file = urls)
rollcall
#> # A tibble: 165 × 10
#> legislator vote argument speech chamber date legislature rollcall
#> <chr> <dbl> <dbl> <chr> <chr> <date> <int> <int>
#> 1 ABDALA 0 0 SEÑOR… CAMARA… 2002-06-12 45 1
#> 2 AGAZZI 1 1 SEÑOR… CAMARA… 2002-06-12 45 1
#> 3 AMEN VAGHETTI 0 0 SEÑOR… CAMARA… 2002-06-12 45 1
#> 4 AMORIN BATLLE 0 0 SEÑOR… CAMARA… 2002-06-12 45 1
#> 5 ARAUJO 0 1 SEÑOR… CAMARA… 2002-06-12 45 1
#> 6 ARGENZIO 0 1 SEÑOR… CAMARA… 2002-06-12 45 1
#> 7 ARGIMON 0 0 SEÑOR… CAMARA… 2002-06-12 45 1
#> 8 ARRARTE FERNAN… 1 0 SEÑOR… CAMARA… 2002-06-12 45 1
#> 9 ARREGUI 1 1 SEÑOR… CAMARA… 2002-06-12 45 1
#> 10 BARAIBAR 1 0 SEÑOR… CAMARA… 2002-06-12 45 1
#> # ℹ 155 more rows
#> # ℹ 2 more variables: id <chr>, sex <dbl>
summary(rollcall)
#> # A tibble: 2 × 10
#> Chamber Date Legislators Affirmative Negative prop_AF prop_NG prop_women
#> <chr> <date> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 CRR 2002-06-12 92 45 47 48.9 51.1 13.0
#> 2 CRR 2004-04-14 73 32 41 43.8 56.2 15.1
#> # ℹ 2 more variables: prop_arg <dbl>, rc <int>
To cite packagespeech
in publications, please use:
citation(package = 'speech')
#>
#> To cite speech in publications use:
#>
#>
#> A BibTeX entry for LaTeX users is
#>
#> @Article{,
#> title = {Estimating Parties’ Policy Positions in Uruguay: Comparing Scaling Methods Based on Legislative Speeches and Roll-Call Votes},
#> author = {Diego Luján and Nicolás Schmidt and Juan A. Moraes},
#> journal = {Latin American Politics and Society},
#> year = {2023},
#> volume = {0},
#> number = {0},
#> pages = {1-17},
#> url = {doi:10.1017/lap.2023.12},
#> }
Nicolas Schmidt ([email protected])