-
Notifications
You must be signed in to change notification settings - Fork 1
/
zoompart.py
executable file
·616 lines (503 loc) · 26.2 KB
/
zoompart.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
#! /usr/bin/python3
#! /home/robbie/software/anaconda3/bin/python
# Purpose : plot bar graph from Zoom video-meeting participant files
# Author : Robbie Morrison <[email protected]> / GitHub @robbiemorrison
# Commenced : 16-Apr-2020
# Status : beta
# Keywords : python zoom participant-list
# OPEN LICENSE
#
# Copyright (c) 2020 Robbie Morrison <[email protected]>
#
# SPDX-License-Identifier: ISC
# License-Text:
#
# ISC License
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# The software is provided "as is" and the author disclaims all warranties
# with regard to this software including all implied warranties of
# merchantability and fitness. In no event shall the author be liable for
# any special, direct, indirect, or consequential damages or any damages
# whatsoever resulting from loss of use, data or profits, whether in an
# action of contract, negligence or other tortious action, arising out of
# or in connection with the use or performance of this software.
# --------------------------------------
# notes
# --------------------------------------
# DateTimeRange
# https://pypi.org/project/DateTimeRange/
# import datetimerange
# --------------------------------------
# user defined variables
# --------------------------------------
# hardcoded here only
numberedTitleFmt = "dummy workshop {0:02d} participation" # used to create indexed title under option '--numbered-title'
# hardcoded defaults here but can be overwritten by command-line options
standinPlotTitle = "stand in title" # used when no title is set on the command-line
cutoffDefault = 20 # durations below this threshold in minutes are excluded in some participant counts
# --------------------------------------
# version information
# --------------------------------------
versionStr = "0.7" # script version string
# -------------------------------------
# modules
# -------------------------------------
import argparse # argument parsing
import enum # enumeration support
import os
import re # support for regular expressions
import stat # 'stat' (file status) results interpretation
import sys
import pandas
import datetime # calculate timedelta intervals
# -------------------------------------
# exit codes
# -------------------------------------
class ExitCode(enum.Enum):
success = 0 # success
failure = 1 # generic failure
usage = 2 # command-line usage issue (the same as the argparse default)
noFile = 50 # regular file not found
datIssue = 51 # DAT file issue
exitCode = ExitCode.success.value # presume success
# -------------------------------------
# argument parsing
# -------------------------------------
description = "plot Zoom participant duration information"
# CAUTION: the 'epilog' is stripped of leading and trailing whitespace, so the usage here is acceptable
epilog = """
The utility plots duration information from the participant CSV file produced at
the end of a Zoom session.
Around May 2020 the CSV file no longer contained two lines of meeting data.
The script tries to import that data in a try block and fails gracefully if
it is absent.
Zoom is a proprietary video-conferencing application. CSV indicates a
comma-separated values file.
The resulting plot window can be either saved automatically using the
--save-plot option or saved manually as a SVG or PNG file from the plot
window. Automatic saving may not work on your system, in which case revert to
saving by hand with your mouse.
A hardcoded custom numbered title is activated under option --numbered-title:
"{0}"
The default plot title is:
"{1}"
You can and should override this default using option --title.
Experience shows some participants will run multiple sessions, often with gaps
and overlaps. This is handled by default by deduplicating participants on their
email addresses and accounting for these gaps and overlaps when calculating
durations. Two options override this default behavior. Option --dedup-name
deduplicates on name and --ignore-gaps identifies the first join and the last
leave timestamps and utilizes the simple difference instead.
The short sessions cutoff under option --cutoff excludes sessions shorter than
the given threshold when calculating the engaged participant count. The default
value is {2} minutes.
A DAT file contains the values used in the bar graph plot. It does not contain
personal information, nor will any plot export files.
The latest version of this file should be available on GitHub under:
https://github.com/robbiemorrison/zoomcsv
This script is open licensed under an ISC software license. See code for
details.
""".format(numberedTitleFmt, standinPlotTitle, cutoffDefault)
def argIsNatural(value):
intvalue = int(value)
if intvalue < 0:
raise argparse.ArgumentTypeError("%s is not a non-negative integer" % value)
return intvalue
parser = argparse.ArgumentParser(description=description, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-V', '--version', action='version', version='%(prog)s ' + ' : ' + versionStr)
parser.add_argument('-t', '--title', dest="title", action='store', required=False, help='specify plot title')
parser.add_argument('-n', '--numbered-title', dest="number", action='store', required=False, type=argIsNatural, help='use custom numbered plot title')
parser.add_argument('-l', '--nominal-duration', dest="duration", action='store', required=False, type=argIsNatural, help='set nominal meeting duration in minutes')
parser.add_argument('-c', '--cutoff', dest="cutoff", action='store', required=False, type=argIsNatural, help='set short session threshold in minutes')
parser.add_argument('-I', '--ignore-gaps', dest="ignoregaps", action='store_true', required=False, help='consider only beginning and closing timestamps')
parser.add_argument('-N', '--dedup-name', dest="usename", action='store_true', required=False, help='deduplicate on name not email address')
parser.add_argument('-d', '--dat-file', dest="dat", action='store_true', required=False, help='create or overwrite existing DAT file')
parser.add_argument('-P', '--no-plot', dest="noplot", action='store_true', required=False, help='omit plot')
parser.add_argument('-S', '--save-plot', dest="saveplot", action='store_true', required=False, help='save plot automatically (system dependent)')
parser.add_argument('-T', '--truncate', dest="length", action='store', required=False, type=argIsNatural, help='truncate input data for testing purposes')
parser.add_argument('-v', '--verbose', dest="verbose", action='store_true', required=False, help='show additional information')
parser.add_argument('-D', '--show-df', dest="showdf", action='store_true', required=False, help='show loaded dataframes')
parser.add_argument('csv', type=str, action="store", help='participant CSV file from Zoom')
args = parser.parse_args()
# misuse of options
if args.title and args.number:
raise argparse.ArgumentTypeError("cannot use options --title and --numbered-title simultaneously")
if args.noplot and args.saveplot:
raise argparse.ArgumentTypeError("cannot use options --no-plot and --save-plot simultaneously")
# for convenience
plotTitle = args.title
titleNumber = args.number
nominalDuration = args.duration
givenCutoff = args.cutoff
useName = args.usename
createDatFile = args.dat
omitPlot = args.noplot
savePlotAlso = args.saveplot
csvTarget = args.csv
ignoreGaps = args.ignoregaps
# -------------------------------------
# report()
# -------------------------------------
def report(funcname='', key='', value=''):
"""convenient reporting function which acknowledges option '--verbose'
* 'funcname' is hardcoded (it is difficult, but not impossible, to obtain the function name programmatically)
* 'key' is a normally a string
* 'value' is anything supported by the function 'str', it can be a string, int, or float, for example
"""
funcname = str(funcname)
key = str(key)
value = str(value)
if value: msg = '{0:<22s} : {1:<16s} : {2:s}'.format(funcname, key, value)
elif key: msg = '{0:<22s} : {1:s}'.format(funcname, key)
elif funcname: msg = '{0:s}'.format(funcname)
else: msg = '' # effectively insert a blank line
print(msg)
def deport(funcname='', key='', value=''): # wrapper to 'report'
if args.verbose:
report(funcname, key, value)
# --------------------------------------
# helper functions
# --------------------------------------
def pythonVer(): # print python version
pyver = sys.version.splitlines()[0]
deport("python version", pyver)
def scriptVer(): # print script version
script = os.path.basename(sys.argv[0])
report(script + " version", versionStr)
def setPandasWide(): # allow full terminal output
termrows, termcols = os.popen('stty size', 'r').read().split()
termheight = int(termrows)
termwidth = int(termcols)
deport("stty rows x cols", '{0:d} x {1:d}'.format(termheight, termwidth))
pandas.set_option('display.width', termwidth) # default width is 80
pandaswidth = pandas.get_option('display.width')
deport("pandas reporting width", pandaswidth)
def sayDf(df, stub, rows=5): # short report on given dataframe
info = "{0} = {1:d} x {2:d}".format(stub, df.shape[0], df.shape[1])
report()
report(info)
cols = list(df.columns)
colstr = "| " + ' | '.join(cols) + " |"
report()
report(colstr)
if not args.showdf:
return
print()
if rows == 0: print(df)
else : print(df.head(rows))
def checkFile(regular): # check 'regular' that is a regular file with at least read permissions
return os.path.isfile(regular) and os.access(regular, os.R_OK)
# read in CSV file meeting data
# Meeting ID | Topic | Start Time | End Time | User Email | Duration (Minutes) | Participants | Unnamed: 7
def readMeta(csvtarget): # read CSV metadata
grab = 1
df = pandas.read_csv(csvtarget, nrows=grab, parse_dates=['Start Time', 'End Time'])
newcols = { 'Meeting ID' : 'MeetingID',
'Topic' : 'Topic',
'Start Time' : 'Start',
'End Time' : 'Close',
"User Email" : "HostEmail",
'Duration (Minutes)' : 'Minutes',
'Participants' : 'Count'}
df = df.rename(columns=newcols)
return df
def sayMeta(df):
mid = df.at[0, 'MeetingID']
topic = df.at[0, 'Topic']
start = df.at[0, 'Start']
close = df.at[0, 'Close']
minutes = df.at[0, 'Minutes']
count = df.at[0, 'Count']
report()
report("meeting ID", mid)
report("stated topic", topic)
report("meeting start", start)
report("meeting close", close)
report("duration minutes", minutes)
report("duration hours", "{0:0.1f}".format(minutes/60.0))
report("individual sessions", count) # no attempt by Zoom to deduplicate
# read in CSV session data
# CSV columns : | Name (Original Name) | User Email | Join Time | Leave Time | Duration (Minutes) | Attentiveness Score |
# date parsing example: 03/26/2020 02:23:34 PM -> 2020-03-26 14:23:34
def readCsv(csvtarget, csvskip=0): # read 'csvtarget' into a dataframe and return same
df = pandas.read_csv(csvtarget,
header=csvskip,
parse_dates=['Join Time', 'Leave Time'])
newcols = { 'Name (Original Name)' : 'Name',
'User Email' : 'Email',
'Join Time' : 'Join',
'Leave Time' : 'Leaf', # usefully 4 chars like "Join"
'Duration (Minutes)' : 'Minutes',
'Attentiveness Score' : 'Score'}
df = df.rename(columns=newcols)
return df
def truncateDf(df, rowslice):
deport()
deport("truncation active")
deport()
deport("row slice", "[:{0:d}]".format(rowslice))
df = df[:rowslice]
return df
def createSecondDf(df, mainkey): # create recipient dataframe and return same
df2 = df.copy()
df2 = df2[[mainkey]] # slice dataframe using 'mainkey'
df2 = df2.drop_duplicates(keep='first') # remove duplicate rows
df2 = df2.sort_values(by=[mainkey]) # sort
df2 = df2.reset_index(drop=True) # CAUTION: reindex essential, 'drop' means do not try to insert new index into a dataframe column
df2['Join'] = None # add new column and initialize to nothing
df2['Leaf'] = None # add new column and initialize to nothing
df2['Delta'] = 0 # add new column and initialize to integer zero
return df2
# loop original dataframe and ratchet up cumulative minutes in recipient dataframe
# iterating over dataframes is not good practice but it will do for now
def stockSecondDfIgnoreGaps(df, df2, mainkey): # load recipient dataframe and return same
# stocking code
deport()
deport("stocking loop")
for index, row in df.iterrows():
# get original data
value = row[mainkey]
join = row['Join']
leaf = row['Leaf']
# get, process, and set recipient data
rowindex2 = df2.index[df2[mainkey] == value].tolist()[0] # list never more than one item
colindex2join = df2.columns.get_loc('Join') # returns zero-based index
colindex2leaf = df2.columns.get_loc('Leaf')
currentjoin = df2.iat[rowindex2, colindex2join] # get current join time
currentleaf = df2.iat[rowindex2, colindex2leaf] # get current leaf time
if currentjoin == None:
df2.iat[rowindex2, colindex2join] = join # set new join time
elif join < currentjoin:
df2.iat[rowindex2, colindex2join] = join # ratchet down join time
if currentleaf == None:
df2.iat[rowindex2, colindex2leaf] = leaf # set new leaf time
elif leaf > currentleaf:
df2.iat[rowindex2, colindex2leaf] = leaf # ratchet up leaf time
# duration calculations
deport()
deport("duration loop")
for index2, row2 in df2.iterrows():
join2 = row2['Join']
leaf2 = row2['Leaf']
duration = leaf2 - join2 # datetime.timedelta object
minutes = duration.total_seconds() / 60.0 # floating point-valued
colindex2delta = df2.columns.get_loc('Delta') # returns zero-based index
df2.iat[index2, colindex2delta] = minutes # integer-valued
df2 = df2.sort_values(by=['Delta']) # sort
return df2
def stockSecondDfConsiderGaps(df, df2, mainkey): # load recipient dataframe and return same
# column names: df : | Email | Join | Leaf | Minutes | # minutes from original Zoom data
# column names : df2 : | Email | Join | Leaf | Delta |
# stocking code
deport()
deport("stocking loop")
# prepare df
df3 = df.sort_values(by=['Join']) # CAUTION: sort is essential
# prepare df2
epochstart = pandas.to_datetime(0, unit='s').tz_localize('UTC') # 1970-01-01 00:00:00+00:00
epochstart = pandas.to_datetime(0, unit='s') # 1970-01-01 CAUTION: TZ-naive necessary for later comparisons
zeroduration = datetime.timedelta(0.0) # duration of zero
report("epochstart", epochstart)
df2['Playhead'] = epochstart # df2 now with column name | Playhead |
colindex2join = df2.columns.get_loc('Join') # returns zero-based index
colindex2leaf = df2.columns.get_loc('Leaf') # leave time
colindex2delt = df2.columns.get_loc('Delta') # class 'numpy.int64'
colindex2head = df2.columns.get_loc('Playhead')
for index, row in df3.iterrows(): # not deduplicated
# get original data
value = row[mainkey]
join = row['Join']
leaf = row['Leaf']
zoommins = row['Minutes']
# get current recipient data
rowindex2 = df2.index[df2[mainkey] == value].tolist()[0] # list never more than one item
currenthead = df2.iat[rowindex2, colindex2head] # get current playhead
currentdelt = df2.iat[rowindex2, colindex2delt] # get current delta
# process this data / next three lines determine how current and previous durations interact
if currenthead > leaf: duration = zeroduration # set to zero
elif currenthead > join: duration = leaf - currenthead
else: duration = leaf - join
minutes = duration.total_seconds() / 60.0 # class 'float'
# update recipient data
df2.iat[rowindex2, colindex2head] = leaf # move playhead
df2.iat[rowindex2, colindex2delt] += minutes # bump existing, integer valued
# debug reporting / negative durations should never exist
minstr = '{0:+0.2f}'.format(minutes)
msg = 'minutes = {0:>7s} | zoom minutes ={1:4d} | join = {2} | leaf = {3} | head = {4} | key = {5}'.format(minstr, zoommins, join, leaf, currenthead, value)
if minutes < 0.0:
report("negative duration", msg)
df2 = df2.sort_values(by=['Delta']) # sort on final duration
return df2
def extractCol(df, fieldname, cutoff): # extract a column and return as list
mysep = "\n" # one value per line
report()
column = df[fieldname].tolist()
column.sort() # redundant statement in this use case
participants = len(column)
report("participants", participants)
cutlen = sum(1 for i in column if i >= cutoff)
report ("stayed " + str(cutoff) + " or more", cutlen)
return column
def getStub(): # generate stub name for creating files
deport()
script = os.path.basename(sys.argv[0])
stub_1 = os.path.splitext(script)[0] # based on script name
stub_2 = re.sub(' +', '-', plotTitle) # based on plot title
stub_2 = stub_2.lower() # downcase
stub = stub_2 # control which 'stub' to use here
deport("filename stub", stub)
return stub
def writeDatFile(filename, data, sep): # create DAT file
deport()
deport("writing DAT file")
report()
report("DAT file", filename)
if os.path.isfile(filename): report("action", "overwriting exiting file")
else: report("action", "creating new file")
# active code
try:
userwritePerms = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP # set file permission to 0640
readonlyPerms = stat.S_IRUSR | stat.S_IRGRP # set file permission to 0440
if os.path.isfile(filename): # file may not exist at this juncture
os.chmod(filename, userwritePerms)
fd = open(filename, 'w')
print(*data, sep=sep, file=fd)
fd.close()
os.chmod(filename, readonlyPerms)
except IOError:
report("file open error", filename)
exitCode = ExitCode.datIssue.value # update exit code
def mvSvgCall(localSvg): # useful reporting
default = "Figure_1.svg"
mvcall = "mv ~/{0:s} {1:s} && chmod 0440 {1:s}".format(default, localSvg)
deport("convenient move call", mvcall)
def myexit(exitcode): # common point of exit
deport()
deport("script", "complete")
if exitcode == 0: deport("exit code", str(exitcode) + " (success)")
else: deport("exit code", str(exitcode) + " (failure)")
report()
sys.exit(exitcode)
# --------------------------------------
# plotting function
# --------------------------------------
def plotList(column, plottitle, stub):
import matplotlib.pyplot as plt
persons = len(column)
title = plottitle
xlabel = "person number (count {0:d})".format(persons)
ylabel = "duration [minutes]"
annot = "nominal duration"
report("plot title", title)
deport("persons", persons)
plt.figure(figsize=(8,6), dpi=200) # 'figsize' in inches, default 'dpi' is 80
bar = plt.bar(range(len(cumulatives)), cumulatives) # bar graph not histogram
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
if nominalDuration: # overplot horizontal line
# overplot line
nomLen = int(nominalDuration)
report("nominal duration line", nomLen)
plt.axhline(y=nomLen, color='black', linestyle='dotted')
# add annotation
offset = 0
annot = "{0:s} {1:d}".format(annot, nomLen)
deport("label offset", offset)
plt.annotate(annot, [offset, nomLen + 5])
plt.show()
if savePlotAlso:
svgFilename = stub + ".svg"
plt.savefig(filename=svgFilename, format='svg') # this provided an empty plot for some reason, so instead save manually from plot window
# --------------------------------------
# active code
# --------------------------------------
# outset reporting
report() # add initial blank line
scriptVer() # print script version
deport("verbose", "on") # report debug status
if args.showdf: deport("show dataframes", "on")
if args.length: deport("truncate", "active")
pythonVer() # print python version
report("target", csvTarget)
# improve pandas terminal reporting
if not sys.platform == 'win32':
setPandasWide() # possibly contains OS-specific code?
# process title
if titleNumber: plotTitle = numberedTitleFmt.format(titleNumber)
if plotTitle: plotTitle = plotTitle.strip()
else: plotTitle = standinPlotTitle
report("processed title", plotTitle)
# process deduplication key
if useName: participantKey = 'Name' # deduplicate by name, less reliable as subsequent attempts may use different string
else: participantKey = 'Email' # deduplicate by email address
report("deduplication key", "'" + participantKey + "'")
# process cutoff
if givenCutoff: cutoff = givenCutoff
else: cutoff = cutoffDefault # revert to default
report("cutoff minutes", cutoff)
# report again
if ignoreGaps: report("gap treatment", "simple difference between first appearance and final departure")
else: report("gap treatment", "consider gaps and overlaps in attendance")
# check CSV file exists and is readable
if not checkFile(csvTarget):
report("absent or unreadable", csvTarget)
myexit(ExitCode.noFile.value)
# read in CSV file and report
try:
meta = readMeta(csvTarget)
except ValueError as e:
report ("readMeta catch", e)
report("caution", "CSV file does not contain meeting information")
csvskip = 0 # nothing to skip in CSV file
except BaseException as e:
report("readMeta catch", e)
report("caution", "unexpected error")
csvskip = 0 # nothing to skip in CSV file
else:
sayDf(meta, "meta dataframe")
sayMeta(meta)
csvskip = 2 # skip first 2 lines describing the meeting
df = readCsv(csvTarget, csvskip)
sayDf(df, "original dataframe")
# truncate for development purposes as required
if args.length:
df = truncateDf(df, args.length)
# create recipient dataframe and report
df2 = createSecondDf(df, participantKey) # 'participantKey' either 'Name' or 'Email'
sayDf(df2, "deduplicated dataframe")
# stock recipient dataframe and report
if ignoreGaps: df2 = stockSecondDfIgnoreGaps(df, df2, participantKey)
else: df2 = stockSecondDfConsiderGaps(df, df2, participantKey)
sayDf(df2, "ratcheted dataframe", 0) # zero is print entire dataframe
# extract column and report
cumulatives = extractCol(df2, 'Delta', cutoff)
# sum the deltas and report
cumminutes = sum(cumulatives)
cumhours = cumminutes/60.0
cumhourstr = format("%0.1f" % (cumhours))
report("cumulative hours", cumhourstr)
# print column to file as required
if createDatFile:
stub = getStub()
mysep = "\n" # one value per line
writeDatFile(stub + ".dat", cumulatives, mysep)
# plot as required
report()
if omitPlot:
deport("omitting plot")
else:
deport("creating plot")
stub = getStub()
plotList(cumulatives, plotTitle, stub)
if not savePlotAlso:
mvSvgCall(stub + ".svg") # passive reporting only
# -------------------------------------
# housekeeping
# -------------------------------------
myexit(exitCode)
# end of file