-
Notifications
You must be signed in to change notification settings - Fork 0
/
bulk-transcript-downloader.py
163 lines (129 loc) · 6.69 KB
/
bulk-transcript-downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
##
# Cisco SocialMiner Bulk Chat Transcript Downloader
#
# This python script does the following:
#
# 0. Invokes a `/search` REST API request on a Cisco SocialMiner server for all handled chat contacts
# 1. Processes the response from the server, and extracts chat transcript data
# 2. Exports transcript for each chat session into a separate text file (with additional metadata)
# 3. Archives all the exported transcripts into a ZIP file
#
# Requires Python 2.7+
#
# Licensed under the MIT License. For more details, see LICENSE file
#
# Cisco™ and SocialMiner™ are registered trademarks of Cisco Systems, Inc. (https://cisco.com)
#
import sys
import os
import errno
import shutil
import argparse
import requests
import time
import xml.etree.ElementTree as ElementTree
# CONSTANTS
SEARCH_API_URL = "https://{}/ccp-webapp/ccp/search/contacts?q=sc.sourceType:chat%20AND%20sc.socialContactStatus:handled"
TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S %Z"
TIME_FORMAT = "%H:%M:%S" # for every chat message, just need the time of day
FILENAME_TIMESTAMP_FORMAT = "%Y_%m_%d_%H_%M_%S_%Z" # timestamp format to compose filenames
TRANSCRIPT_METADATA = """+--------------- WEB CHAT TRANSCRIPT ---------------+
| Exported from Cisco SocialMiner [{}] by '{}' at {}
|
| ID: {}
| Customer: {}
| Started: {}
| Ended: {}
+---------------------------------------------------+
"""
TRANSCRIPT_MSG = """{} [{}]: {}
"""
TRANSCRIPT_FILENAME = "ChatTranscript_{}-{}.txt"
TRANSCRIPT_ARCHIVENAME = "ChatTranscripts_{}-{}"
TRANSCRIPT_TEMP_DIRNAME = "exported_transcripts"
def usage():
print __name__ + " --host=<HOSTNAME/IP OF SOCIALMINER> --user=<ADMIN_USERNAME> --password=<ADMIN_PASSWORD>"
def make_search_request(url, user, password):
print "Making a GET request to the URL: %s\n" % url
# We are making a HTTPS (secure) request, but ignoring SSL certificate verification intentionally
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
response = requests.get(url, auth=(user, password), verify=False)
if response.status_code != 200:
print "ERROR - API request to SocialMiner failed with status [%d]\n" % response.status_code
print "Error response: %s\n" % response.text
sys.exit(1)
return response.text
def compose_transcript_metadata(transcript_node, host, user):
return TRANSCRIPT_METADATA.format(host,
user,
time.strftime(TIMESTAMP_FORMAT, time.localtime(time.time())),
transcript_node.find('id').text,
transcript_node.find('chatInitiator').text,
time.strftime(TIMESTAMP_FORMAT, time.localtime(
float(transcript_node.find('startDate').text) / 1000)),
time.strftime(TIMESTAMP_FORMAT, time.localtime(
float(transcript_node.find('endDate').text) / 1000)));
def extract_chat_messages(transcript_node):
chat_messages = ""
for chat_message in transcript_node.iter('chat'):
chat_messages += TRANSCRIPT_MSG.format(chat_message.find('name').text,
time.strftime(TIME_FORMAT, time.localtime(
float(chat_message.find('time').text) / 1000)),
chat_message.find('msg').text) + "\n"
return chat_messages
def extract_transcript(transcript_node, host, user):
transcript_content = compose_transcript_metadata(transcript_node, host, user)
transcript_content += "\n" + extract_chat_messages(transcript_node)
return transcript_content
def create_temp_dir():
try:
os.makedirs(TRANSCRIPT_TEMP_DIRNAME)
except OSError as exception:
if exception.errno == errno.EEXIST:
print "Directory `%s` already exists in the current working directory.\n" \
"Please delete this directory completely and run the program again." % TRANSCRIPT_TEMP_DIRNAME
sys.exit(2)
else:
raise
def export_transcript(transcript_node, host, user):
transcript_text = extract_transcript(transcript_node, host, user)
filename = TRANSCRIPT_FILENAME.format(time.strftime(FILENAME_TIMESTAMP_FORMAT,
time.localtime(
float(transcript_node.find('startDate').text) / 1000)),
transcript_node.find('chatInitiator').text)
print "Exporting transcript into file: %s" % filename
# write to text file
with open(TRANSCRIPT_TEMP_DIRNAME + os.path.sep + filename, 'w') as text_file:
text_file.write(transcript_text)
def archive_transcripts(archive_name):
print "\nArchiving ...\n"
shutil.make_archive(archive_name, 'zip', TRANSCRIPT_TEMP_DIRNAME)
print "\n Transcripts successfully exported into archive: %s\n" % archive_name
# also, delete the temporary directory holding exported transcripts
shutil.rmtree(TRANSCRIPT_TEMP_DIRNAME)
def main():
argParser = argparse.ArgumentParser(description="Cisco SocialMiner Bulk Chat Transcript Downloader")
argParser.add_argument("--host", help="Hostname / IP Address of SocialMiner", required=True)
argParser.add_argument("--user", help="Username of application admin account in SocialMiner", required=True)
argParser.add_argument("--password", help="Password of application admin account in SocialMiner", required=True)
args = vars(argParser.parse_args())
host = args["host"]
username = args["user"]
password = args["password"]
# make a search API request to SocialMiner to get all SCs and their transcripts
search_response = make_search_request(SEARCH_API_URL.format(host), username, password)
root = ElementTree.fromstring(search_response)
transcript_count = len(root.findall('.//ChatTranscript'))
if transcript_count > 0:
# create a temporary directory to hold exported transcripts
create_temp_dir()
print "\nFound %d chat transcripts. Starting export ...\n" % transcript_count
for chat_transcript in root.iter('ChatTranscript'):
export_transcript(chat_transcript, host, username)
archive_name = TRANSCRIPT_ARCHIVENAME.format(host, time.strftime(FILENAME_TIMESTAMP_FORMAT,
time.localtime(time.time())))
archive_transcripts(archive_name)
else:
print "No chat transcripts found on %s. Terminating program." % host
if __name__ == '__main__':
main()