-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfinal.py
109 lines (51 loc) · 2.32 KB
/
final.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import pandas as pd
import numpy as np
import os
import re
from datetime import datetime
def getWhatsAppData():
df = pd.read_csv('pulak2.csv')
responseDictionary = dict()
receivedMessages = df[df['name'] != personName]
sentMessages = df[df['name'] == personName]
combined = pd.concat([sentMessages, receivedMessages])
otherPersonsMessage, myMessage = "",""
firstMessage = True
for index, row in combined.iterrows():
if (row['name'] != personName):
if myMessage and otherPersonsMessage:
otherPersonsMessage = cleanMessage(otherPersonsMessage)
myMessage = cleanMessage(myMessage)
responseDictionary[otherPersonsMessage.rstrip()] = myMessage.rstrip()
otherPersonsMessage, myMessage = "",""
otherPersonsMessage = otherPersonsMessage + str(row['text']) + " "
else:
if (firstMessage):
firstMessage = False
# Don't include if I am the person initiating the convo
continue
myMessage = myMessage + str(row['text']) + " "
return responseDictionary
###############################################
def cleanMessage(message):
# Remove new lines within message
cleanedMessage = message.replace('\x8f',' ').lower()
# Deal with some weird tokens
cleanedMessage = cleanedMessage.replace("\xc2\xa0", "")
# Remove punctuation
cleanedMessage = re.sub('([.,!?])','', cleanedMessage)
# Remove multiple spaces in message
cleanedMessage = re.sub('ðÿ˜',' ', cleanedMessage)
return cleanedMessage
combinedDictionary = {}
combinedDictionary.update(getWhatsAppData())
print ('Total len of dictionary', len(combinedDictionary))
print ('Saving conversation data dictionary')
np.save('conversationDictionary.npy', combinedDictionary)
np.load("conversationDictionary.npy")
conversationFile = open('conversationData.txt', 'w')
for key,value in combinedDictionary.items():
if (not key.strip() or not value.strip()):
# If there are empty strings
continue
conversationFile.write(key.strip() + value.strip())