This repository has been archived by the owner on Aug 8, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhistory-to-gource.mjs
373 lines (320 loc) · 11.5 KB
/
history-to-gource.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
// SPDX-License-Identifier: CC0-1.0
import * as fs from "fs/promises";
function todo(desc) {
throw new Error(`Not implemented: ${desc}.`);
}
function assert(bool, desc) {
if (!bool) throw new Error(`Assertion failed: ${desc}.`);
}
const [, , rootId, dataPath, currentUserName] = process.argv;
if (!dataPath) {
console.log(
'Usage: node history-to-gource.mjs RootDirectoryId path/to/activity/log ["Your Name"]'
);
process.exit(1);
}
const root = `items/${rootId}`;
const data = await fs.readFile(dataPath, "utf-8");
const activities = data.split("\n").map(JSON.parse);
const paths = {};
const colors = {};
function getTargetId(activity) {
assert(activity.targets.length == 1, "activity with multiple targets");
if (activity.targets[0].driveItem) {
return activity.targets[0].driveItem.name;
} else if (activity.targets[0].fileComment) {
return activity.targets[0].fileComment.parent.name;
}
todo("unknown target type");
}
function dateToUnix(date) {
return Math.round(Date.parse(date) / 1000);
}
function getColor(mimeType) {
const colors = {
"application/msword": "0000FF",
"application/pdf": "FF0000",
"application/vnd.google-apps.document": "0000FF",
"application/vnd.google-apps.folder": "FFFFFF",
"application/vnd.google-apps.shortcut": "FFFFFF",
"application/vnd.google-apps.spreadsheet": "00FF00",
"application/vnd.openxmlformats-officedocument.presentationml.presentation":
"FFA500",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
"00FF00",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document":
"0000FF",
"image/png": "FF00FF",
};
return colors[mimeType] || "FFFFFF";
}
function pathHasPrefix(path, prefix) {
if (path.length < prefix.length) {
return false;
}
return prefix.every((element, i) => path[i] == element);
}
// If `path` has prefix `oldPrefix`, return the path with the prefix
// `newPrefix` instead. Otherwise, return null.
function replacePathPrefix(path, oldPrefix, newPrefix) {
if (!pathHasPrefix(path, oldPrefix)) {
return null;
}
return [...newPrefix, ...path.slice(oldPrefix.length)];
}
function moveFolder(activity, oldFolder, newFolder) {
Object.keys(paths).forEach((itemId) => {
const newPath = replacePathPrefix(paths[itemId], oldFolder, newFolder);
if (newPath) {
logAction(activity, "D", itemId);
paths[itemId] = newPath;
logAction(activity, "A", itemId);
}
});
}
function deleteFolder(activity, folder) {
Object.keys(paths).forEach((itemId) => {
if (pathHasPrefix(paths[itemId], folder)) {
logAction(activity, "D", itemId);
delete paths[itemId];
}
});
}
// fallback called when a file starts showing up with no indication as to its
// path, and we need to make an inference as to its path at the time
function discoverPath(activity) {
// first, was the file moved later? if so, use the removedParents from
// there: we want to find the first removedParent we recognize
const firstRemovedParent = activities
.filter((act) => getTargetId(act) == getTargetId(activity))
.map((act) => act.primaryActionDetail?.move?.removedParents)
.filter((parents) => parents)
.flat()
.find((parent) => paths[parent.driveItem.name]);
let parent;
if (firstRemovedParent) {
parent = firstRemovedParent.driveItem.name;
} else {
// failing that, use the current parent, which the Apps Script obtains in
// case it's needed for such cases
assert(
activity.targets[0]._d2g_parents.length == 1,
"file has multiple parents"
);
parent = activity.targets[0]._d2g_parents[0];
}
paths[getTargetId(activity)] = [
...paths[parent],
activity.targets[0].driveItem.title,
];
}
let generatedEvents = 0;
function logAction(activity, type, target = getTargetId(activity)) {
// console.error(type, target, paths[target]);
if (!paths[target]) {
// create with no associated move, or file that just randomly starts
// getting edits with neither a create nor a move first; both are known
// to happen
if (type == "D") {
// just ignore double deletes - for some example, we can sometimes get
// a separate delete event for a file in a folder that was just deleted
return;
}
discoverPath(activity);
}
if (!colors[target]) {
assert(
target == getTargetId(activity),
"can't determine color for file first seen in a folder move(!)"
);
colors[target] = getColor(activity.targets[0].driveItem.mimeType);
}
let name;
if (activity.actors[0].user.knownUser.isCurrentUser) {
if (currentUserName) {
name = currentUserName;
} else {
throw new Error(
"This log contains edits by you (or the user who downloaded the log). Google's API makes it difficult to automatically obtain the current user's name, so you'll need to manually provide this on the command line, after the path to the log."
);
}
} else {
name = activity.actors[0].user._d2g_info.names?.[0]?.displayName;
}
console.log(
`${dateToUnix(activity.timestamp)}|${name}|${type}|/${paths[target].join(
"/"
)}|${colors[target]}`
);
generatedEvents++;
}
let successfulActivities = 0;
let errors = 0;
activities.forEach((activity) => {
try {
assert(activity.targets.length == 1, "activity has multiple targets");
assert(activity.timestamp, "activity has no timestamp");
if (activity.primaryActionDetail.create) {
assert(activity.actors.length == 1, "activity has multiple targets");
if (getTargetId(activity) == root) {
paths[getTargetId(activity)] = [];
logAction(activity, "A");
} else {
const moveAction = activity.actions.find((x) => x.detail.move)?.detail
?.move;
// often, a document created in a directory is modelled as a create
// with a bundled move, in which case we can get the parents this way
if (moveAction) {
assert(moveAction.addedParents, "create-move didn't add parents");
assert(
moveAction.addedParents.length == 1,
"create-move added multiple parents"
);
assert(!moveAction.removedParents, "create-move removed parents");
assert(
!paths[getTargetId(activity)],
"create-move for a file that already exists"
);
const parent = paths[moveAction.addedParents[0].driveItem.name];
assert(parent, "create-move into an unknown parent");
paths[getTargetId(activity)] = [
...parent,
activity.targets[0].driveItem.title,
];
}
logAction(activity, "A");
}
} else if (activity.primaryActionDetail.move) {
if (
activity.primaryActionDetail.move.addedParents &&
activity.primaryActionDetail.move.addedParents.length == 1 &&
(!activity.primaryActionDetail.move.removedParents ||
(activity.primaryActionDetail.move.removedParents.every(
(p) => !paths[p.driveItem.name]
) &&
!paths[getTargetId(activity)]))
) {
// moved from outside; this is, for our purposes, a create
const parent =
paths[
activity.primaryActionDetail.move.addedParents[0].driveItem.name
];
assert(parent, "move from outside into unknown parent");
paths[getTargetId(activity)] = [
...parent,
activity.targets[0].driveItem.title,
];
logAction(activity, "A");
} else if (
(!activity.primaryActionDetail.move.addedParents ||
activity.primaryActionDetail.move.addedParents.every(
(p) => !paths[p.driveItem.name]
)) &&
activity.primaryActionDetail.move.removedParents &&
activity.primaryActionDetail.move.removedParents.every(
(p) => paths[p.driveItem.name]
) &&
paths[getTargetId(activity)]
) {
// moved to outside; for our purposes, a delete
logAction(activity, "D");
const oldPath = paths[getTargetId(activity)];
delete paths[getTargetId(activity)];
if (activity.targets[0].driveItem.driveFolder) {
deleteFolder(activity, oldPath);
}
} else if (
activity.primaryActionDetail.move.addedParents &&
activity.primaryActionDetail.move.addedParents.length == 1 &&
paths[
activity.primaryActionDetail.move.addedParents[0].driveItem.name
] &&
activity.primaryActionDetail.move.removedParents &&
activity.primaryActionDetail.move.removedParents.length == 1 &&
paths[
activity.primaryActionDetail.move.removedParents[0].driveItem.name
]
) {
// move within the folder
logAction(activity, "D");
const parent =
paths[
activity.primaryActionDetail.move.addedParents[0].driveItem.name
];
const oldPath = [...paths[getTargetId(activity)]];
paths[getTargetId(activity)] = [
...parent,
activity.targets[0].driveItem.title,
];
logAction(activity, "A");
if (activity.targets[0].driveItem.driveFolder) {
moveFolder(activity, oldPath, paths[getTargetId(activity)]);
}
} else if (
activity.primaryActionDetail.move.addedParents.every(
(x) => !paths[x]
) &&
activity.primaryActionDetail.move.removedParents.every((x) => !paths[x])
) {
// moved from an external folder to another external folder. not
// entirely sure why we get these - maybe things that end up in
// scope later? in any case, ignore it
} else {
todo("unknown move type");
}
} else if (activity.primaryActionDetail.comment) {
logAction(activity, "M");
} else if (activity.primaryActionDetail.rename) {
const path = paths[getTargetId(activity)];
if (!path) {
// annoying edge case: first we hear of a file is it getting renamed.
// google is funny sometimes.
logAction(activity, "A");
} else {
logAction(activity, "D");
const path = paths[getTargetId(activity)];
const oldPath = [...path];
path[path.length - 1] = activity.primaryActionDetail.rename.newTitle;
logAction(activity, "A");
if (activity.targets[0].driveItem.driveFolder) {
moveFolder(activity, oldPath, path);
}
}
} else if (activity.primaryActionDetail.edit) {
logAction(activity, "M");
} else if (activity.primaryActionDetail.permissionChange) {
logAction(activity, "M");
} else if (activity.primaryActionDetail.delete) {
logAction(activity, "D");
let path = paths[getTargetId(activity)];
delete paths[getTargetId(activity)];
if (path && activity.targets[0].driveItem.driveFolder) {
deleteFolder(activity, path);
}
} else {
todo("unknown activity type");
}
successfulActivities++;
} catch (e) {
if (process.env.D2G_DEBUG) {
console.error(
`Error encountered while parsing this log entry: ${JSON.stringify(
activity
)}`
);
console.error(e);
} else {
console.error(e.toString());
}
errors++;
}
});
// not really an error, but there's no better-named function for logging to stderr
console.error(
`Converted ${successfulActivities} activities into ${generatedEvents} events.`
);
if (errors > 0) {
console.error(
`Warning: ${errors} activit(ies) skipped due to errors. Set D2G_DEBUG=1 for more detail.`
);
}