-
Notifications
You must be signed in to change notification settings - Fork 0
/
MovieRecommender.R
134 lines (93 loc) · 3.59 KB
/
MovieRecommender.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
###### Recommender System algorithm implementaion on Movie Lens 100k data ###
## load libraries ####
library(recommenderlab)
library(reshape2)
######Function Definitions ##
#### Read Data ####
## data downloaded from http://grouplens.org/datasets/movielens/
## read the rating data for all users
readData <- function(){
ratingDF <- read.delim("./data/u.data", header=F)
colnames(ratingDF) <- c("userID","movieID","rating", "timestamp")
## read movie data
moviesDF <- read.delim("./data/u.item", sep="|", header=F, stringsAsFactors = FALSE)
colnames(moviesDF)[colnames(moviesDF)=="V1"] <- "movieID"
colnames(moviesDF)[colnames(moviesDF)=="V2"] <- "name"
return(list(ratingDF=ratingDF, movieDF=moviesDF))
}
#### data Cleansing and processing ####
preProcess = function(ratingDF, moviesDF)
{
ratingDF[,2] <- dataList$movieDF$name[as.numeric(ratingDF[,2])]
# remove duplicate entries for any user-movie combination
ratingDF <- ratingDF[!duplicated(ratingDF[,1:2]),]
}
## Create movie ratingMatrix from rating Data and movie data ####
createRatingMatrix <- function(ratingDF)
{
# converting the ratingData data frame into rating marix
ratingDF_tmp <- dcast( ratingDF, userID ~ movieID, value.var = "rating" , index="userID")
ratingDF <- ratingDF_tmp[,2:ncol(ratingDF_tmp)]
ratingMat <- as(ratingDF, "matrix") ## cast data frame as matrix
movieRatingMat <- as(ratingMat, "realRatingMatrix") ## create the realRatingMatrix
### setting up the dimnames ###
dimnames(movieRatingMat)[[1]] <- row.names(ratingDF)
return (movieRatingMat)
}
##### Create Recommender Model ####
evaluateModels <- function(movieRatingMat)
{
## Find out and anlayse available recommendation algorithm option for realRatingMatrix data
recommenderRegistry$get_entries(dataType = "realRatingMatrix")
scheme <- evaluationScheme(movieRatingMat, method = "split", train = .9,
k = 1, given = 10, goodRating = 4)
algorithms <- list(
RANDOM = list(name="RANDOM", param=NULL),
POPULAR = list(name="POPULAR", param=NULL),
UBCF = list(name="UBCF", param=NULL)
)
# run algorithms, predict next n movies
results <- evaluate(scheme, algorithms, n=c(1, 3, 5, 10, 15, 20))
## select the first results
return (results)
}
visualise <- function(results)
{
# Draw ROC curve
plot(results, annotate = 1:3, legend="topright")
# See precision / recall
plot(results, "prec/rec", annotate=3, legend="topright", xlim=c(0,.22))
}
#### Create prediction model ####
createModel <-function (movieRatingMat,method){
model <- Recommender(movieRatingMat, method = method)
names(getModel(model))
getModel(model)$method
getModel(model)$nn
return (model)
}
### Predict user rating using UBCF recommendation algoithm ####
recommendations <- function(movieRatingMat, model, userID, n)
{
### predict top n recommendations for given user
topN_recommendList <-predict(model,movieRatingMat[userID],n=n)
as(topN_recommendList,"list")
}
# Load Movie Lens data
dataList<- readData()
# data cleansing and preprocessing
ratingDF<- preProcess(dataList$ratingDF, dataList$movieDF)
# create movie rating matrix
movieRatingMat<- createRatingMatrix(ratingDF)
# evaluate models
evalList <- evaluateModels(movieRatingMat)
# plot evaluation result
visualise(evalList)
## on visualization, looks like UBCF has highest precision.
# get Confusion matrix for "UBCF"
getConfusionMatrix(evalList[["UBCF"]])[[1]][,1:4]
## run "UBCF" recommender
rec_model <- createModel(movieRatingMat, "UBCF")
userID <- 1
topN <- 5
recommendations(movieRatingMat, rec_model, userID, topN)