-
Notifications
You must be signed in to change notification settings - Fork 0
/
NanopublicationToVis.R
141 lines (115 loc) · 4.89 KB
/
NanopublicationToVis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# Many thanks to this amazing tutorial: https://kateto.net/network-visualization
# Load required libraries
library(xml2)
library(igraph)
library(plotly)
library(XML)
library(dplyr)
library(stringr)
library(tidyr)
library(visNetwork)
# Download and read the RDF/XML file
url <- "http://np.knowledgepixels.com/RAzquSkwsTAZm61nReG6MOjXEXUx8fNVfdWnAzyn6sOhU.xml" # Replace with your actual URL
name <- "RAzquSkwsT"
xml_data <- read_xml(url)
# Function to get a human-readable label
get_label <- function(uri) {
# There's no labels in nanopubs so we just process the URI here.
#Basically this string checks if it has a nice label (indicated by #)
#If not it returns the tail of the URL which tends to be the ontology URI
label_tails <- str_extract(uri, "/([^/]+)$") %>% str_remove("^/")
label_neat <- str_extract(label_tails, "#([^#]+)$") %>% str_remove("^#")
label <- coalesce(label_neat, label_tails)
return(label)
}
# Extract nodes and edges
# The XML namespace needs to be referenced in the function:
# Find all graph elements
graphs <- xml_find_all(xml_data, ".//d1:graph", xml_ns(xml_data))
# Function to process a single graph into dataframe
process_graph <- function(graph) {
graph_uri <- xml_text(xml_find_first(graph, "./d1:uri"))
triples <- xml_find_all(graph, ".//d1:triple")
triples %>%
lapply(function(triple) {
uris <- xml_find_all(triple, ".//d1:uri") %>% xml_text()
data.frame(
graph = graph_uri,
subject = uris[1],
predicate = uris[2],
object = uris[3],
stringsAsFactors = FALSE
)
}) %>%
bind_rows()
}
# Process all graphs and combine results
df <- graphs %>%
lapply(process_graph) %>%
bind_rows()
# Print the resulting dataframe for debugging
print(df)
#Here we're going to strip the messy URI from graph
#This regex just pulls the end of the URI which is usually the label
df <- df %>%
mutate(graph_labels = get_label(df$graph)) %>%
mutate(object_labels = get_label(df$object)) %>%
mutate(predicate_labels = get_label(df$predicate)) %>%
mutate(subject_labels = get_label(df$subject))
#todo - split up nodes including graph label info
# - create edges table
# - stylize graph
#We need to reorder the df to be subject & object, then predicate.
#We also need to create a separate vertex metadata df for labels
#This is because igraph processes the first two columns as edgelist
edges <- df[, c("subject", "object", "predicate", "predicate_labels")]
#We drop any edges with NA in them.
edges <- drop_na(edges, c("subject", "object"))
#We then need to pull out all unique vertices from subject and object and their
#associated metadata
# Create two dataframes with unique values from column1 and column2
temp1 <- df %>%
select(subject, graph_labels, subject_labels) %>%
distinct(subject, .keep_all = TRUE)
temp2 <- df %>%
select(object, graph_labels, object_labels) %>%
distinct(object, .keep_all = TRUE)
# Combine the two dataframes
merged_vertices <- bind_rows(
temp1 %>% rename(uri = subject),
temp2 %>% rename(uri = object)
) %>%
distinct(uri, .keep_all = TRUE)
merged_vertices <- drop_na(merged_vertices, "uri")
# We need to merge the labels into one label column.
merged_vertices <- merged_vertices %>% unite("label", c("subject_labels", "object_labels"), na.rm = TRUE, remove = FALSE)
#Let's change each graph into a colour so we can split things up.
merged_vertices <- merged_vertices %>% mutate(colour = recode(graph_labels, Head = "blue", Assertion = "green", Provenance = "orange", Pubinfo = "grey"))
clean_vertices <- merged_vertices[,c("uri", "label", "graph_labels")]
clean_vertices <- rename(clean_vertices, clean_label = label)
#NOW we add our edges and vertices and make an igraph object
graph <- graph_from_data_frame(edges, directed = TRUE, vertices = clean_vertices)
#Let's convert for visNetwork which is a nicer visualization
g_vis <- toVisNetworkData(graph)
vis.nodes <- g_vis$nodes
vis.edges <- g_vis$edges
vis.nodes$shape <- "dot"
vis.nodes$shadow <- TRUE # Nodes will drop shadow
vis.nodes$title <- vis.nodes$uri # Text on click
vis.nodes$label <- vis.nodes$clean_label
vis.nodes$borderWidth <- 2 # Node border width
vis.nodes$color <- vis.nodes$colour
vis.nodes$group <- vis.nodes$graph_labels
vis.edges$title <- vis.edges$predicate_labels
vis.edges$label <- vis.edges$predicate_labels
#TODO: Make the below code turn nodes into links when clicked
visNetwork(vis.nodes, vis.edges) %>%
visGroups(groupname = "Head", color = "grey") %>%
visGroups(groupname = "Assertion", color = "green") %>%
visGroups(groupname = "Pubinfo", color = "purple") %>%
visGroups(groupname = "Provenance", color = "orange") %>%
visEvents(doubleClick = "function(properties) {
window.open(properties.nodes)
}") %>%
visLegend() %>%
visSave("vis.html")