-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyse_co-genre_network.R
96 lines (76 loc) · 2.64 KB
/
analyse_co-genre_network.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#===============================================================================
# analyse_co-genre_network.R
# Purpose: Analyse the co-genre network data
# Author: Hiroki Oda
#===============================================================================
library(ggplot2)
library(igraph)
library(dplyr)
# Load the edge data
edges <- read.csv("../data/cogenre_network/edges.csv")
# Create node list from edge list
nodes <- data.frame(Id = unique(c(edges$Source, edges$Target)))
# Create a graph object
g <- graph_from_data_frame(edges, directed = FALSE, vertices = nodes)
# Number of nodes and edges
num_nodes <- vcount(g)
num_edges <- ecount(g)
# Degree distribution
degree_dist <- degree_distribution(g)
# Clustering coefficient
clustering_coef <- transitivity(g, type = "global")
# Assortativity
assortativity <- assortativity_degree(g)
# Centrality measures
#centrality <- centrality(g, measures = c("degree", "betweenness", "closeness", "eigen"))
# Output the results
print("Number of nodes")
num_nodes
print("Number of edges")
num_edges
#degree_dist
print("Clustering coefficient")
clustering_coef
print("Assortativity")
assortativity
# Visualise the degree distribution
degree_dist_df <- data.frame(degree = 1:length(degree_dist), frequency = degree_dist)
ggplot(degree_dist_df, aes(x = degree, y = frequency)) +
geom_point() +
geom_line() +
scale_x_log10() +
scale_y_log10() +
xlab("Degree") +
ylab("Frequency") +
ggtitle("Degree distribution")
# Load the category data
categories <- read.csv("../data/cogenre_network/category_stats.csv")
subgenres <- read.csv("../data/cogenre_network/category_sub-genre.csv")
# Number of categories and sub-genres
num_categories <- nrow(categories)
num_subgenres <- nrow(subgenres)
# Statistics of categories
category_stats <- categories %>%
select(category, num_nodes, num_edges, num_inter_category_edges, average_atypicality)
# Output the results
print("Number of categories")
num_categories
print("Number of sub-genres")
num_subgenres
# Load the atypicality data
atypicality <- read.csv("../data/cogenre_network/genre_atypicality.csv")
# Categories with high atypicality
high_atypicality <- atypicality %>%
arrange(desc(atypicality)) %>%
head(5)
# Categories with low atypicality
low_atypicality <- atypicality %>%
arrange(atypicality) %>%
head(5)
# Description of the atypicality distribution
atypicality_dist <- atypicality %>%
summarise(mean = mean(atypicality), sd = sd(atypicality), median = median(atypicality), min = min(atypicality), q25 = quantile(atypicality, 0.25), q75 = quantile(atypicality, 0.75), max = max(atypicality))
# Output the results
high_atypicality
low_atypicality
atypicality_dist