adds visualization scripts

sib-swiss · Jan 10, 2025 · 5506714 · 5506714
1 parent c47fcb6
commit 5506714
Show file tree

Hide file tree

Showing 2 changed files with 136 additions and 0 deletions.
diff --git a/scripts/generate_wordcloud.R b/scripts/generate_wordcloud.R
@@ -0,0 +1,30 @@
+library(httr2)
+library(dplyr)
+library(wordcloud)
+
+parsed <- request("https://glittr.org/api/tags") |>
+  req_perform() |>
+  resp_body_json()
+
+tag_dfs <- list()
+for(i in seq_along(parsed)) {
+  category <- parsed[[i]]$category
+  name <- sapply(parsed[[i]]$tags, function(x) x$name)
+  repositories <- sapply(parsed[[i]]$tags, function(x) x$repositories)
+  tag_dfs[[category]] <- data.frame(name, category, repositories)
+}
+
+tag_df <- do.call(rbind, tag_dfs) |> arrange(repositories)
+
+glittr_cols <- c(
+  "Scripting and languages" =             "#3a86ff",
+  "Computational methods and pipelines" = "#fb5607",
+  "Omics analysis" =                      "#ff006e",
+  "Reproducibility and data management" = "#ffbe0b",
+  "Statistics and machine learning" =     "#8338ec",
+  "Others" =                              "#000000")
+
+pdf("wordcloud_tags.pdf")
+wordcloud(tag_df$name, tag_df$repositories, max.words = 100, colors = glittr_cols,
+          random.color = TRUE)
+dev.off()
diff --git a/scripts/matomo_api.r b/scripts/matomo_api.r
@@ -0,0 +1,106 @@
+library(httr2)
+library(dplyr)
+
+url <- "https://matomo.sib.swiss/?module=API"
+token <- "YOUR_TOKEN"
+
+# &method=Actions.getPageTitles&idSite=217&period=day&date=yesterday&format=json
+
+# url <- "https://demo.matomo.cloud/?module=API&method=API.getMatomoVersion&format=xml"
+# token <- "YOUR_TOKEN"
+
+# url <- 'https://demo.matomo.cloud/?module=API&method=Actions.getPageTitles&idSite=1&date=yesterday'
+
+# Create and send the request
+response <- request(url) |>
+  req_body_form(
+    method = "Actions.getOutlinks",
+    idSite = 217,
+    format = "json",
+    date = "today",
+    period = "year",
+    expanded = 1,
+    filter_limit = -1,
+    token_auth = token
+  ) |>
+  req_perform() |>
+  resp_body_json()
+
+outlinks_list <- list()
+for(domain in response) {
+  label <- domain$label
+  url_info <- lapply(domain$subtable, function(x) {
+    data.frame(
+      url = ifelse(is.null(x$url),NA , x$url),
+      nb_visits = ifelse(is.null(x$nb_visits),NA , x$nb_visits),
+      domain = label
+    )
+  })
+  outlinks_list[[domain$label]] <- do.call(rbind, url_info)
+}
+
+outlinks_df <- do.call(rbind, outlinks_list)
+row.names(outlinks_df) <- NULL
+
+# get all repositories content as nested list
+parsed <- request("https://glittr.org/api/repositories") |>
+  req_perform() |>
+  resp_body_json()
+
+# extract relevant items as dataframe
+url_repo_list <- lapply(parsed$data, function(x) data.frame(
+  name = x$name,
+  repo_url = x$url,
+  website = x$website,
+  author_profile = x$author$profile,
+  author_website = x$author$website
+))
+
+url_repo <- do.call(rbind, url_repo_list)
+
+
+
+outlinks_df$is_website <- outlinks_df$url %in% url_repo$website
+outlinks_df$associated_by_repo_url <- url_repo$name[match(outlinks_df$url, url_repo$website)]
+
+clean_url <- function(url) {
+  trimws(url) |> gsub("/$", "", x = _) |> tolower()
+}
+
+match_url <- function(outlinks_df, url_repo, column = "repo_url") {
+  outlinks_df[[paste0("is_", column)]] <- clean_url(outlinks_df$url) %in% clean_url(url_repo[[column]])
+  outlinks_df[[paste0("ass_repo_", column)]] <- url_repo$name[match(clean_url(outlinks_df$url),
+                                                                    clean_url(url_repo[[column]]))]
+  return(outlinks_df)
+}
+
+for(column in c("repo_url", "website", "author_profile", "author_website")) {
+  outlinks_df <- match_url(outlinks_df, url_repo, column = column)
+}
+
+no_ass <- outlinks_df |>
+  select(starts_with("ass_repo")) |>
+  apply(1, function(x) all(is.na(x)))
+
+outlinks_df$url[no_ass]
+
+outlinks_df$associated_entry <- outlinks_df |>
+  select(ass_repo_repo_url, ass_repo_website) |>
+  apply(1, function(x) {
+    x <- x[!is.na(x)] |> unique()
+
+    if(length(x) == 1) return(x[1]) 
+    if(length(x == 0) == 0) return(NA)
+    if(length(x == 2)) return("do not correspond")
+  })
+
+outlinks_df |>
+  filter(!is.na(associated_entry)) |>
+  filter(associated_entry == "do not correspond")
+
+visits_by_entry <- outlinks_df |>
+  select(url, nb_visits, associated_entry) |>
+  filter(!is.na(associated_entry)) |>
+  group_by(associated_entry) |>
+  summarise(total_visits = sum(nb_visits)) |>
+  arrange(desc(total_visits))