-
Notifications
You must be signed in to change notification settings - Fork 30
/
killrvideo-schema.cql
221 lines (195 loc) · 6.15 KB
/
killrvideo-schema.cql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
// Sample schema for Apache Cassandra
// IF EXISTS added for Cassandra 2.1
// CQL 3.1.0
DROP KEYSPACE IF EXISTS killrvideo;
CREATE KEYSPACE killrvideo WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 3 };
use killrvideo;
// User credentials, keyed by email address so we can authenticate
// Seperated from user in case auth is external (Google, Facebook, etc...)
CREATE TABLE user_credentials (
email text,
password text,
userid uuid,
PRIMARY KEY (email)
);
// Basic entity table for a user
// UUID for userid to link to auth system
CREATE TABLE users (
userid uuid,
firstname varchar,
lastname varchar,
email text,
created_date timestamp,
PRIMARY KEY (userid)
);
// CQL
// Create an index on first name in users table.
// NonTokenizingAnalyzer is used for only match equality
// making it case_sensitive false eliminates any normalization problems.
CREATE CUSTOM INDEX ON users (firstname)
USING 'org.apache.cassandra.index.sasi.SASIIndex'
WITH OPTIONS = {
'analyzer_class':
'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer',
'case_sensitive': 'false'
};
// SASI Index on lastname supports a LIKE '%term%' query
// with the mode: CONTAINS
CREATE CUSTOM INDEX ON users (lastname)
USING 'org.apache.cassandra.index.sasi.SASIIndex'
WITH OPTIONS = {'mode': 'CONTAINS'};
// SASI Index on email is for the same type of query as lastname
CREATE CUSTOM INDEX ON users (email)
USING 'org.apache.cassandra.index.sasi.SASIIndex'
WITH OPTIONS = {'mode': 'CONTAINS'};
// SASI Index using mode SPARSE is good for unique, dense number ranges
// and is best for range queries on timestamps
CREATE CUSTOM INDEX ON users (created_date)
USING 'org.apache.cassandra.index.sasi.SASIIndex'
WITH OPTIONS = {'mode': 'SPARSE'};
// User Defined Type introduced in CQL 3.2 spec
// Meta data - Height, Width, Bit rate, Encoding
CREATE TYPE video_metadata (
height int,
width int,
video_bit_rate set<text>,
encoding text
);
// Entity table that will store many videos for a unique user
CREATE TABLE videos (
videoid uuid,
userid uuid,
name varchar,
description varchar,
location text,
location_type int,
preview_thumbnails map<text,text>, // <position in video, url of thumbnail>
tags set<varchar>,
metadata set <frozen<video_metadata>>,
added_date timestamp,
PRIMARY KEY (videoid)
);
// CQL 3.2 spec included secondary indexes of collections
// These are not for speed but convenience
CREATE INDEX tags_idx ON videos(ENTRIES(tags));
// CQL 3.4 spec introduced MATERIALIZED VIEWS
CREATE MATERIALIZED VIEW videos_by_location
AS SELECT userid, added_date, videoid, location
FROM videos
WHERE videoId IS NOT NULL AND location IS NOT NULL
PRIMARY KEY(location, videoid);
// One-to-many from the user point of view
// Also know as a lookup table
CREATE TABLE user_videos (
userid uuid,
added_date timestamp,
videoid uuid,
name text,
preview_image_location text,
PRIMARY KEY (userid, added_date, videoid)
) WITH CLUSTERING ORDER BY (added_date DESC, videoid ASC);
// Track latest videos, grouped by day (if we ever develop a bad hotspot from the daily grouping here, we could mitigate by
// splitting the row using an arbitrary group number, making the partition key (yyyymmdd, group_number))
CREATE TABLE latest_videos (
yyyymmdd text,
added_date timestamp,
videoid uuid,
name text,
preview_image_location text,
PRIMARY KEY (yyyymmdd, added_date, videoid)
) WITH CLUSTERING ORDER BY (added_date DESC, videoid ASC);
// Counter table
CREATE TABLE video_rating (
videoid uuid,
rating_counter counter,
rating_total counter,
PRIMARY KEY (videoid)
);
// CQL 3.3 adds user defined functions
// This function takes the counters in the video rating table and averages them
// by dividing rating total by rating counter.
CREATE OR REPLACE FUNCTION avg_rating (rating_counter counter, rating_total counter)
CALLED ON NULL INPUT
RETURNS double
LANGUAGE java AS 'return Double.valueOf(rating_total.doubleValue()/rating_counter.doubleValue());';
// Video ratings by user (to try and mitigate voting multiple times)
CREATE TABLE video_ratings_by_user (
videoid uuid,
userid uuid,
rating int,
PRIMARY KEY (videoid, userid)
);
// Index for tag keywords
CREATE TABLE videos_by_tag (
tag text,
videoid uuid,
added_date timestamp,
name text,
preview_image_location text,
tagged_date timestamp,
PRIMARY KEY (tag, videoid)
);
// Inverted index for tags by first letter in the tag
CREATE TABLE tags_by_letter (
first_letter text,
tag text,
PRIMARY KEY (first_letter, tag)
);
// Comments as a many-to-many
// Looking from the video side to many users
CREATE TABLE comments_by_video (
videoid uuid,
commentid timeuuid,
userid uuid,
comment text,
PRIMARY KEY (videoid, commentid)
) WITH CLUSTERING ORDER BY (commentid DESC);
// looking from the user side to many videos
CREATE TABLE comments_by_user (
userid uuid,
commentid timeuuid,
videoid uuid,
comment text,
PRIMARY KEY (userid, commentid)
) WITH CLUSTERING ORDER BY (commentid DESC);
// Time series wide row with reverse comparator
CREATE TABLE video_event (
videoid uuid,
userid uuid,
preview_image_location text static,
event varchar,
event_timestamp timeuuid,
video_timestamp bigint,
PRIMARY KEY ((videoid,userid),event_timestamp,event)
) WITH CLUSTERING ORDER BY (event_timestamp DESC,event ASC);
// Pending uploaded videos by id
CREATE TABLE uploaded_videos (
videoid uuid,
userid uuid,
name text,
description text,
tags set<text>,
added_date timestamp,
jobid text,
PRIMARY KEY (videoid)
);
// Same as uploaded_videos just keyed by the encoding job's id
CREATE TABLE uploaded_videos_by_jobid (
jobid text,
videoid uuid,
userid uuid,
name text,
description text,
tags set<text>,
added_date timestamp,
PRIMARY KEY (jobid)
);
// Log of notifications from Azure Media Services encoding jobs (latest updates first)
CREATE TABLE encoding_job_notifications (
jobid text,
status_date timestamp,
etag text,
newstate text,
oldstate text,
PRIMARY KEY (jobid, status_date, etag)
) WITH CLUSTERING ORDER BY (status_date DESC, etag ASC);