-
Notifications
You must be signed in to change notification settings - Fork 0
/
journals openalex.R
110 lines (91 loc) · 3.14 KB
/
journals openalex.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# The package ggtext needs to be installed to run this chunk
# library(ggtext)
library(openalexR)
library(tidyverse)
library(questionr)
library(openxlsx2)
library(readxl)
library(openxlsx)
jours_all <- oa_fetch(
entity = "venues",
works_count = ">200",
verbose = TRUE
)
jours_all_inf200 <- oa_fetch(
entity = "venues",
works_count = "<200",
verbose = TRUE
)
# save.image("mon_espace_de_travail.RData")
# Union des deux DataFrames jours_all et jours_all_inf200
openalex_venues <- rbind(jours_all, jours_all_inf200)
# filtrer revues uniquement
journals_openalex <- openalex_venues %>%
select(id, display_name, host_organization_name, issn,
is_oa, is_in_doaj, works_count, cited_by_count, type) %>%
filter(type == "journal")
# Utilisez la fonction unnest pour éclater la colonne issn
journals_openalex <- unnest(journals_openalex, issn)
write.xlsx(journals_openalex, file = "D:/bdd pubpeer/journals_openalex.xlsx")
journals_openalex <- journals_openalex %>%
filter(!is.na(issn) & !is.na(host_organization_name))
data_jnal <- data_jnal %>%
filter(issn != "None")
# matcher avec openalex
data_jnal2 <- left_join(data_jnal, data_openalex, by = "issn")
match <- data_jnal2 %>%
select(publication, display_name, host_organization_name) %>%
unique()
count_editeurs_pubpper <- match %>%
group_by(host_organization_name) %>%
count()
count_editeurs_tot <- data_openalex %>%
select(host_organization_name, works_count) %>%
group_by(host_organization_name) %>%
summarise(nb = sum(works_count))
## analyse : voir https://docs.ropensci.org/openalexR/
jours <- jours_all |>
filter(!is.na(x_concepts), type != "ebook platform") |>
slice_max(cited_by_count, n = 9) |>
distinct(display_name, .keep_all = TRUE) |>
select(jour = display_name, x_concepts) |>
tidyr::unnest(x_concepts) |>
filter(level == 0) |>
left_join(concept_abbrev, by = join_by(id, display_name)) |>
mutate(
abbreviation = gsub(" ", "<br>", abbreviation),
jour = gsub("Journal of|Journal of the", "J.", gsub("\\(.*?\\)", "", jour))
) |>
tidyr::complete(jour, abbreviation, fill = list(score = 0)) |>
group_by(jour) |>
mutate(
color = if_else(score > 10, "#1A1A1A", "#D9D9D9"), # CCCCCC
label = paste0("<span style='color:", color, "'>", abbreviation, "</span>")
) |>
ungroup()
jours |>
ggplot() +
aes(fill = jour, y = score, x = abbreviation, group = jour) +
facet_wrap(~jour) +
geom_hline(yintercept = c(45, 90), colour = "grey90", linewidth = 0.2) +
geom_segment(
aes(x = abbreviation, xend = abbreviation, y = 0, yend = 100),
color = "grey95"
) +
geom_col(color = "grey20") +
coord_polar(clip = "off") +
theme_bw() +
theme(
plot.background = element_rect(fill = "transparent", colour = NA),
panel.background = element_rect(fill = "transparent", colour = NA),
panel.grid = element_blank(),
panel.border = element_blank(),
axis.text = element_blank(),
axis.ticks.y = element_blank()
) +
ggtext::geom_richtext(
aes(y = 120, label = label),
fill = NA, label.color = NA, size = 3
) +
scale_fill_brewer(palette = "Set1", guide = "none") +
labs(y = NULL, x = NULL, title = "Journal clocks")