-
Notifications
You must be signed in to change notification settings - Fork 0
/
sharepoint-mapper.R
executable file
·125 lines (108 loc) · 3.5 KB
/
sharepoint-mapper.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#! /usr/bin/Rscript
# SharePoint Mapper
# https://github.com/andrewclausen/sharepoint-mapper
# Copyright © 2023, by Andrew Clausen <[email protected]>
# This program builds a site map of Microsoft Sharepoint sites out of web
# browser logs. Only parts of the site visited by the browser are included.
use_library <- function(pkgs)
{
installed <- installed.packages()[, "Package"]
missing_pkgs <- pkgs[!pkgs %in% installed]
if (length(missing_pkgs) > 0)
install.packages(pkgs)
for (pkg in pkgs)
library(pkg, character.only=TRUE)
}
use_library(c("jsonlite", "magrittr"))
# Normalise hyperlinks by dropping trailing slashes.
normalise_link <- function(x) sub("/*$", "", x)
# Extract Sharepoint folder listing data from web browser logs
# Each listing is inside an XHR request called RenderListDataAsStream
extract_raw_listings <- function(har)
{
x <- har$log$entries
I.listing <- grepl("RenderListDataAsStream", x$request$url)
x$response$content$text[I.listing] %>% lapply(fromJSON)
}
# The Sharepoint web app encodes the folder-listing in a JSON file.
# This function returns the contents as a data frame with "name" and "link"
# columns.
parse_raw_listings <- function(x)
{
if (!"FileLeafRef" %in% names(x$ListData$Row))
return(list())
items <- as.data.frame(x$ListData$Row)[c("FileLeafRef", "FileRef")]
names(items) <- c("name", "link")
items$link <- normalise_link(items$link)
items$domain <- sub("/sites/.*$", "", x$HttpRoot)
items$parent <- ifelse(
grepl("/", items$link),
sub("(.*)/[^/]*", "\\1", items$link),
NA
)
items
}
# Load and parse Sharepoint listings
ingest_sessions <- function()
(
as.list(list.files(pattern="[.]har$"))
%>% lapply(fromJSON)
%>% lapply(extract_raw_listings)
%>% { do.call(c, .) }
%>% lapply(parse_raw_listings)
%>% { do.call(rbind, .) }
)
# Add in missing nodes in the tree, e.g. if the top-level node is missing.
add_missing_nodes <- function(items)
{
missing_link <- items$parent[!items$parent %in% items$link]
missing_domain <- items$domain[!items$parent %in% items$link]
missing_name <- sub("^.*/([^/]*)$", "\\1", missing_link)
missing_parent <- sub("^(.*)/[^/]*$", "\\1", missing_link)
missing <- data.frame(
name=missing_name, link=missing_link, domain=missing_domain, parent=missing_parent)
rbind(items, missing)
}
# This function constructs a forest of trees out of a data frame of folder
# listings.
construct_trees <- function(df)
{
items <- lapply(split(df, seq(nrow(df))), as.list)
roots <- which(!df$parent %in% df$link)
add_children <- function(i)
{
item <- items[[i]]
children <- which(df$parent == df$link[i])
item$children <- lapply(children, add_children)
item
}
lapply(roots, add_children)
}
# Format hyperlinks in markdown format
format_link <- function(name, link)
paste0("[", name, "]", "(", link, ")")
# Format a forest of folder listings as nested lists of links in Markdown
format_trees <- function(roots, indent=-1)
{
spaces <- strrep(" ", max(0, indent * 2 + 1))
if (indent == -1)
bullet <- c("#")
else
bullet <- c("*", "-")[indent %% 2 + 1]
result <- c()
for (root in roots)
{
url <- paste0(root$domain, root$link)
link <- format_link(root$name, url)
line <- paste0(spaces, bullet, " ", link, "\n\n")
child_lines <- format_trees(root$children, indent + 1)
result <- c(result, line, child_lines)
}
paste(result, collapse="")
}
items <- ingest_sessions()
items <- add_missing_nodes(items)
items <- items[!duplicated(items$link), ]
trees <- construct_trees(items)
text <- format_trees(trees)
cat(text, file="site-map.md")