-
Notifications
You must be signed in to change notification settings - Fork 1
/
4-butter.py
115 lines (92 loc) · 4.18 KB
/
4-butter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import csv
import os
import random
import string
import sys
import pandas as pd
theSearchWord = "but"
#time in seconds the length should be to make fun of myself
thePause = 0.01
# Set the path to the CSV file
#csv_path = 'transcript.csv'
theAlignmentFile = 'intermediate/lecture_alignments/ESSP520-02butter.csv'
# Set the path to the folder containing the image files
theImageFolder = 'lecture-daemon_data/img_library/but'
theWordImages = os.listdir(theImageFolder)
print(" Opening alignment file: '%s'" % theAlignmentFile)
theAlignmentRead = pd.read_csv(theAlignmentFile, header=None, names=['index','word','start','stop','token','slide', 'meta'], dtype={'slide':'object', 'meta':'object'})
print(theAlignmentRead)
wordList = list(theAlignmentRead['word'])
#print(wordList)
theWordOccuranceList=[]
theSlide = ""
print(theAlignmentRead['slide'].index.isnull())
for i, theWord in enumerate(wordList):
# if math.isnan(theAlignmentRead['slide'][i]==NaN:
# print("what")
###is there a previous slide?
# if theAlignmentRead['slide'][i]!="" or theAlignmentRead['slide'][i].isnull():
# theSlide=theAlignmentRead['slide'][i]
# print(theSlide)
if theWord == theSearchWord:
theStartTime = theAlignmentRead['start'][i]
theStopTime = theAlignmentRead['stop'][i]
if theStartTime!="" and theStopTime != "":
theStartTime = float(theStartTime)
theStopTime = float(theStopTime)
if theStopTime-theStartTime>=thePause:
aRandomFile = random.choice(theWordImages)
theNewEntry = {'start': theStartTime, 'stop': theStopTime, 'slide': aRandomFile}
theWordOccuranceList.append(theNewEntry)
df=pd.DataFrame(theWordOccuranceList, columns=['start','stop','slide'])
#df.loc[len(df)] = theWordOccuranceList
print(df)
print(" Inserting best calculated '%s' starts..." % (theSearchWord) )
alignmentWithWord = pd.concat((theAlignmentRead, df))
print(" Sorting slides by start time...")
alignmentWithWord = alignmentWithWord.sort_values(by='start')
print(" Writing edited alignment file...")
#alignmentWithTiming.to_csv('test.csv', header=False)
alignmentWithWord.to_csv("bla.csv", header=False, index=False)
# theColumnNames = ['index', 'word', 'start time', 'stop time', 'token', 'video meta', 'slide']
# # Open the CSV file and read the contents into a list of dictionaries
# with open(theCsvPath, 'r') as aCsvFile:
# theCsvData = csv.DictReader(aCsvFile, fieldnames = theColumnNames)
# theRows = [row for row in theCsvData]
# # Iterate through the list of lists
# for i, theRow in enumerate(theRows):
# theWord = theRow['word']
# theWord = theWord.rstrip(string.punctuation)
# theWord = theWord.lower()
# if theWord == "but":
# if theRow['start time'] !="":
# theStartTime = float(theRow['start time'])
# #don't use the word end. use the start of the next word
# #if theRows[i+1]['start time'] !="":
# if theRow['stop time'] !="":
# theEndTime = float(theRows[i+1]['start time'])
# # print(start_time)
# # print(end_time)
# theWordDelta = theEndTime - theStartTime
# #print(theWordDelta)
# if theWordDelta>=theButPause:
# aRandomFile = random.choice(theButtFiles)
# print(aRandomFile)
# theRows[i]['slide'] = "test"
# print(theRows)
# # if start_time !="" and end_time!="":
# # start_time = start_time = float(row['start_time'])
# # end_time = end_time = float(row['end_time'])
# # # Check if the word is “but” and the time difference is greater than 0.3
# # if word == 'but' and (end_time - start_time) > 0.2:
# #
# # # Choose a random audio file from the list
# # random_file = random.choice(butt_files)
# # # Add the file path to the new column entry
# # row[6]="test"
# # rows[i].append(os.path.join(image_folder, random_file))
# # Write the modified list of lists back to the CSV file
# sys.exit()
# with open(csv_path, 'w', newline='') as csv_file:
# csv_writer = csv.writer(csv_file)
# csv_writer.writerows(rows)