-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfull_integration.py
702 lines (583 loc) · 31.1 KB
/
full_integration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
import requests
from requests.auth import HTTPBasicAuth
import datetime as dt
import time
import pandas as pd
import os
import re
import ctypes
import xml.etree.ElementTree as ET
import math
import os
from tableauhyperapi import HyperProcess, Connection, TableDefinition, SqlType, Telemetry, Inserter, CreateMode
from urllib3.fields import RequestField
from urllib3.filepost import encode_multipart_formdata
from tkinter import *
from sys import exit
def cleanResults(noteID,user,password):
'''
returns list of lists containing paragraph ID, Title, and Output [{id, title, timeFin, columns, data}, {id, title, timeFin, columns, data}, etc]
'''
print('Formatting Results')
### gets json with information about all paragraphs
noteInfoQ = 'https://ZEPPELIN_SERVER/zeppelin/api/notebook/' + noteID
results = requests.get(noteInfoQ, auth=HTTPBasicAuth(user, password)).json()['body']['paragraphs']
clean = []
for i in range(len(results)):
### first paragraph is to change output line limit settings and will not return output so it is skipped
if i == 0:
continue
### extracts necessary information from nested dictionaries
title = results[i]['title']
id = results[i]['id']
timeFinish = results[i]['dateFinished'].replace(' ','_').replace(',','').replace(':','.')
columns = list(results[i]['config']['results']['0']['graph']['setting']['table']['tableColumnTypeState']['names'].keys())
rawData = results[i]['results']['msg'][0]['data']
### splits data from one long string into 1 string per row
unsplitRows = rawData.split('\n')
### splits each data row into list of strings
listOfRows = []
for i in range(len(unsplitRows)):
if unsplitRows[i] == '':
del unsplitRows[i]
else:
listOfRows.append(unsplitRows[i].split('\t'))
### clean is the list of dictionaries to return
clean.append({'id':id, 'title':title, 'timeFin':timeFinish, 'columns':columns , 'data':listOfRows[1:]})
return clean
def getResults(noteID,user,password):
'''Asynchronously runs all paragraphs and returns pharagraph results. Will check for completion at set interval'''
### run all paragraphs, longer runtime notebooks will return 504, but notebook will still run
runNote = 'https://ZEPPELIN_SERVER/zeppelin/api/notebook/job/' + noteID
requests.post(runNote, auth=HTTPBasicAuth(user, password))
while True:
### wait time in seconds between status checks
time.sleep(10)
### get paragraph status
getStatus = 'https://ZEPPELIN_SERVER/zeppelin/api/notebook/job/' + noteID
allStatus = requests.get(getStatus, auth=HTTPBasicAuth(user, password)).json()['body']
### creates list of status
status = []
for i in range(len(allStatus)):
status.append(allStatus[i]['status'])
print(status)
### check paragraph status
if 'ERROR' in status: # if any paragraphs error, retry
print('Zeppelin error: Try Again')
main()
elif 'RUUNNING' in status or 'READY' in status or 'PENDING' in status or 'ABORT' in status: # if any paragraphs are not finished, continue loop and chekc again after sleep time
continue
else:
for i in range(len(status)):
if status[i] == 'FINISHED'and i != len(status)-1:
continue
elif status[i] == 'FINISHED' and i == len(status)-1:
return cleanResults(noteID,user,password)
else:
break
continue
def export(array, folderName):
'''takes list of dictionaries and exports each as its own csv'''
for i in range(len(array)):
dictionary = {}
#gets column headers to use as dictionary keys
keys = array[i]['columns']
#restructures data from list per row to list per column
for j in range(len(keys)):
info=[]
for k in range(len(array[i]['data'])):
info.append(array[i]['data'][k][j])
dictionary[keys[j]] = info
#converts dictionary to pandas dataframe
df = pd.DataFrame(dictionary)
#saves dataframe as csv. file name is paragraph title + server finish date and time. server time is taiwan - 15hr (pacific standard time)
path = folderName + '\\' + array[i]['title'] + '_' +array[i]['timeFin'] + '.csv'
df.to_csv(path, encoding='utf-8',header=True)
# The namespace for the REST API is 'http://tableau.com/api' for Tableau Server 9.1 or later
xmlns = {'t': 'http://tableau.com/api'}
# The maximum size of a file that can be published in a single request is 64MB
FILESIZE_LIMIT = 1024 * 1024 * 64 # 64MB
# For when a data source is over 64MB, break it into 5MB (standard chunk size) chunks
CHUNK_SIZE = 1024 * 1024 * 5 # 5MB
class ApiCallError(Exception):
pass
class UserDefinedFieldError(Exception):
pass
class WindowsInhibitor:
'''Prevent OS sleep/hibernate in windows; code from:
https://github.com/h3llrais3r/Deluge-PreventSuspendPlus/blob/master/preventsuspendplus/core.py
API documentation:
https://msdn.microsoft.com/en-us/library/windows/desktop/aa373208(v=vs.85).aspx'''
ES_CONTINUOUS = 0x80000000
ES_SYSTEM_REQUIRED = 0x00000001
def __init__(self):
pass
def inhibit(self):
import ctypes
print("Preventing Windows from going to sleep")
ctypes.windll.kernel32.SetThreadExecutionState(
WindowsInhibitor.ES_CONTINUOUS | \
WindowsInhibitor.ES_SYSTEM_REQUIRED)
def uninhibit(self):
import ctypes
print("Allowing Windows to go to sleep")
ctypes.windll.kernel32.SetThreadExecutionState(
WindowsInhibitor.ES_CONTINUOUS)
def _encode_for_display(text):
"""
Encodes strings so they can display as ASCII in a Windows terminal window.
This function also encodes strings for processing by xml.etree.ElementTree functions.
Returns an ASCII-encoded version of the text.
Unicode characters are converted to ASCII placeholders (for example, "?").
"""
return text.encode('ascii', errors="backslashreplace").decode('utf-8')
def _make_multipart(parts):
"""
Creates one "chunk" for a multi-part upload
'parts' is a dictionary that provides key-value pairs of the format name: (filename, body, content_type).
Returns the post body and the content type string.
For more information, see this post:
http://stackoverflow.com/questions/26299889/how-to-post-multipart-list-of-json-xml-files-using-python-requests
"""
mime_multipart_parts = []
for name, (filename, blob, content_type) in parts.items():
multipart_part = RequestField(name=name, data=blob, filename=filename)
multipart_part.make_multipart(content_type=content_type)
mime_multipart_parts.append(multipart_part)
post_body, content_type = encode_multipart_formdata(mime_multipart_parts)
content_type = ''.join(('multipart/mixed',) + content_type.partition(';')[1:])
return post_body, content_type
def _check_status(server_response, success_code):
"""
Checks the server response for possible errors.
"""
if server_response.status_code != success_code:
parsed_response = ET.fromstring(server_response.text)
# Obtain the 3 xml tags from the response: error, summary, and detail tags
error_element = parsed_response.find('t:error', namespaces=xmlns)
summary_element = parsed_response.find('.//t:summary', namespaces=xmlns)
detail_element = parsed_response.find('.//t:detail', namespaces=xmlns)
# Retrieve the error code, summary, and detail if the response contains them
code = error_element.get('code', 'unknown') if error_element is not None else 'unknown code'
summary = summary_element.text if summary_element is not None else 'unknown summary'
detail = detail_element.text if detail_element is not None else 'unknown detail'
error_message = '{0}: {1} - {2}'.format(code, summary, detail)
raise ApiCallError(error_message)
return
def sign_in(server, username, password, site=""):
"""
Signs in to the server specified with the given credentials
"""
url = server + "/api/{0}/auth/signin".format('3.2')
# Builds the request
xml_request = ET.Element('tsRequest')
credentials_element = ET.SubElement(xml_request, 'credentials', name=username, password=password)
ET.SubElement(credentials_element, 'site', contentUrl=site)
xml_request = ET.tostring(xml_request)
# Make the request to server
server_response = requests.post(url, data=xml_request)
_check_status(server_response, 200)
# ASCII encode server response to enable displaying to console
server_response = _encode_for_display(server_response.text)
# Reads and parses the response
parsed_response = ET.fromstring(server_response)
# Gets the auth token and site ID
token = parsed_response.find('t:credentials', namespaces=xmlns).get('token')
site_id = parsed_response.find('.//t:site', namespaces=xmlns).get('id')
user_id = parsed_response.find('.//t:user', namespaces=xmlns).get('id')
return token, site_id, user_id
def sign_out(server, auth_token, version):
"""
Destroys the active session and invalidates authentication token.
"""
url = server + "/api/{0}/auth/signout".format(version)
server_response = requests.post(url, headers={'x-tableau-auth': auth_token})
_check_status(server_response, 204)
return
def start_upload_session(server, auth_token, site_id, version):
"""
Creates a POST request that initiates a file upload session.
"""
print(auth_token)
url = server + "/api/{0}/sites/{1}/fileUploads".format(version, site_id)
server_response = requests.post(url, headers={'x-tableau-auth': auth_token})
_check_status(server_response, 201)
xml_response = ET.fromstring(_encode_for_display(server_response.text))
return xml_response.find('t:fileUpload', namespaces=xmlns).get('uploadSessionId')
def get_default_project_id(server, auth_token, site_id, version, projectName):
"""
Returns the project ID for the desired project on the Tableau server.
"""
### name of destination project
page_num, page_size = 1, 100 # Default paginating values
### builds the request
url = server + "/api/{0}/sites/{1}/projects".format(version, site_id)
paged_url = url + "?pageSize={0}&pageNumber={1}".format(page_size, page_num)
server_response = requests.get(paged_url, headers={'x-tableau-auth': auth_token})
_check_status(server_response, 200)
xml_response = ET.fromstring(_encode_for_display(server_response.text))
### used to determine if more requests are required to find all projects on server
total_projects = int(xml_response.find('t:pagination', namespaces=xmlns).get('totalAvailable'))
max_page = int(math.ceil(total_projects / page_size))
projects = xml_response.findall('.//t:project', namespaces=xmlns)
### continue querying if more projects exist on the server
for page in range(2, max_page + 1):
paged_url = url + "?pageSize={0}&pageNumber={1}".format(page_size, page)
server_response = requests.get(paged_url, headers={'x-tableau-auth': auth_token})
_check_status(server_response, 200)
xml_response = ET.fromstring(_encode_for_display(server_response.text))
projects.extend(xml_response.findall('.//t:project', namespaces=xmlns))
### look through all projects (EN and DE locales)
for project in projects:
if project.get('name') == projectName:
return project.get('id')
print("\tProject was not found in {0}".format(server))
def existing(server, auth_token, site_id, datasource_name, version):
'''
Checks through ALL data sources for given datasource name. In addition to determining if create datasource or append datasource is appropriate. This has the side effect of preventing creation of datasources with the same name in tableau site, by allowing the program to attempt append to a datasource that doesn't exist in the desired project. This will return an error.
'''
### get all datasources on current site
url = server + "/api/{0}/sites/{1}/datasources".format(version, site_id)
server_response = requests.get(url, headers={'x-tableau-auth': auth_token})
_check_status(server_response, 200)
xml_response = ET.fromstring(_encode_for_display(server_response.text))
datasources = xml_response.findall('.//t:datasource', namespaces=xmlns)
### check if the name of the datasource we want to add already exists in the current list if datasource names on the site
for datasource in datasources:
if datasource_name == datasource.get('name'):
print('existing')
return True
print('new')
return False
def publish_new_datasource(server, auth_token, site_id, datasource_filename, dest_project_id, version):
"""
Publishes the data source to the desired project.
"""
datasource_name, file_extension = datasource_filename.split('.', 1)
datasource_size = os.path.getsize(datasource_filename)
chunked = datasource_size >= FILESIZE_LIMIT
### build a general request for publishing
xml_request = ET.Element('tsRequest')
datasource_element = ET.SubElement(xml_request, 'datasource', name=datasource_name)
ET.SubElement(datasource_element, 'project', id=dest_project_id)
xml_request = ET.tostring(xml_request)
if chunked:
print("\tPublishing '{0}' in {1}MB chunks (data source over 64MB):".format(datasource_name, CHUNK_SIZE / 1024000))
### initiates an upload session
print(auth_token)
upload_id = start_upload_session(server, auth_token, site_id, version)
### URL for PUT request to append chunks for publishing
put_url = server + "/api/{0}/sites/{1}/fileUploads/{2}".format(version, site_id, upload_id)
### reads and uploads chunks of the data source
with open(datasource_filename, 'rb') as f:
while True:
data = f.read(CHUNK_SIZE)
if not data:
break
payload, content_type = _make_multipart({'request_payload': ('', '', 'text/xml'),
'tableau_file': ('file', data, 'application/octet-stream')})
print("\tPublishing a chunk...")
server_response = requests.put(put_url, data=payload,
headers={'x-tableau-auth': auth_token, "content-type": content_type})
_check_status(server_response, 200)
### finish building request for chunking method
payload, content_type = _make_multipart({'request_payload': ('', xml_request, 'text/xml')})
publish_url = server + "/api/{0}/sites/{1}/datasources".format(version, site_id)
publish_url += "?uploadSessionId={0}".format(upload_id)
publish_url += "&datasourceType={0}&overwrite=false".format(file_extension)
else:
print("\tPublishing '{0}' using the all-in-one method (data source under 64MB)".format(datasource_name))
### read the contents of the file to publish
with open(datasource_filename, 'rb') as f:
datasource_bytes = f.read()
### finish building request for all-in-one method
parts = {'request_payload': ('', xml_request, 'text/xml'),
'tableau_datasource': (datasource_filename, datasource_bytes, 'application/octet-stream')}
payload, content_type = _make_multipart(parts)
publish_url = server + "/api/{0}/sites/{1}/datasources".format(version, site_id)
publish_url += "?datasourceType={0}&overwrite=false".format(file_extension)
### make the request to publish and check status code
print("\tUploading...")
server_response = requests.post(publish_url, data=payload,
headers={'x-tableau-auth': auth_token, 'content-type': content_type})
_check_status(server_response, 201)
def publish_datasource(server, auth_token, site_id, datasource_filename, dest_project_id, version, datasource_name):
"""
Publishes the data source to the desired project.
"""
### Check if the datasource already exists in tableau. If no, a new datasource will be published
if existing(server, auth_token, site_id, datasource_name, version) == False:
publish_new_datasource(server, auth_token, site_id, datasource_filename, dest_project_id, version)
return
### download backup file
datasource_id = get_datasource_id(server, auth_token, site_id, datasource_name, version)
download(server, auth_token, site_id, datasource_id, version)
datasource_name, file_extension = datasource_filename.split('.', 1)
datasource_size = os.path.getsize(datasource_filename)
chunked = datasource_size >= FILESIZE_LIMIT
### build a general request for publishing
xml_request = ET.Element('tsRequest')
datasource_element = ET.SubElement(xml_request, 'datasource', name=datasource_name)
ET.SubElement(datasource_element, 'project', id=dest_project_id)
xml_request = ET.tostring(xml_request)
if chunked:
print("\tAppending '{0}' in {1}MB chunks (data source over 64MB):".format(datasource_name, CHUNK_SIZE / 1024000))
### initiates an upload session
print(auth_token)
upload_id = start_upload_session(server, auth_token, site_id, version)
### URL for PUT request to append chunks for publishing
put_url = server + "/api/{0}/sites/{1}/fileUploads/{2}".format(version, site_id, upload_id)
### reads and uploads chunks of the data source
with open(datasource_filename, 'rb') as f:
while True:
data = f.read(CHUNK_SIZE)
if not data:
break
payload, content_type = _make_multipart({'request_payload': ('', '', 'text/xml'),
'tableau_file': ('file', data, 'application/octet-stream')})
print("\tPublishing a chunk...")
server_response = requests.put(put_url, data=payload,
headers={'x-tableau-auth': auth_token, "content-type": content_type})
_check_status(server_response, 200)
### finish building request for chunking method
payload, content_type = _make_multipart({'request_payload': ('', xml_request, 'text/xml')})
publish_url = server + "/api/{0}/sites/{1}/datasources".format(version, site_id)
publish_url += "?uploadSessionId={0}".format(upload_id)
publish_url += "&datasourceType={0}&append=true".format(file_extension)
else:
print("\tAppending '{0}' using the all-in-one method (data source under 64MB)".format(datasource_name))
### read the contents of the file to publish
with open(datasource_filename, 'rb') as f:
datasource_bytes = f.read()
### finish building request for all-in-one method
parts = {'request_payload': ('', xml_request, 'text/xml'),
'tableau_datasource': (datasource_filename, datasource_bytes, 'application/octet-stream')}
payload, content_type = _make_multipart(parts)
publish_url = server + "/api/{0}/sites/{1}/datasources".format(version, site_id)
publish_url += "?datasourceType={0}&append=true".format(file_extension)
### make the request to publish and check status code
print("\tUploading...")
server_response = requests.post(publish_url, data=payload,
headers={'x-tableau-auth': auth_token, 'content-type': content_type})
_check_status(server_response, 201)
def get_datasource_id(server, auth_token, site_id, datasource_name, version):
"""
Gets the id of the desired data source to relocate.
"""
url = server + "/api/{0}/sites/{1}/datasources".format(version, site_id)
server_response = requests.get(url, headers={'x-tableau-auth': auth_token})
_check_status(server_response, 200)
xml_response = ET.fromstring(_encode_for_display(server_response.text))
datasources = xml_response.findall('.//t:datasource', namespaces=xmlns)
for datasource in datasources:
if datasource.get('name') == datasource_name:
return datasource.get('id')
error = "Data source named '{0}' not found.".format(datasource_name)
raise LookupError(error)
def download(server, auth_token, site_id, datasource_id,version):
"""
Downloads the desired data source from the server (temp-file).
"""
print("\tDownloading data source to an archive file")
url = server + "/api/{0}/sites/{1}/datasources/{2}/content".format(version, site_id, datasource_id)
server_response = requests.get(url, headers={'x-tableau-auth': auth_token})
_check_status(server_response, 200)
### Header format: Content-Disposition: name="tableau_datasource"; filename="datasource-filename"
now = dt.datetime.now()
currentDir = 'CURRENT_DIRECTORY_PATH'
filename = re.findall(r'filename="(.*)"', server_response.headers['Content-Disposition'])[0][:-5]
folder = currentDir + filename
### check if folder already exists. if not, creat one.
if not os.path.exists(folder):
os.makedirs(folder)
path = currentDir + filename + '/' + filename + '-' + now.strftime("%Y-%m-%d_%H-%M-%S") + '.tdsx'
### writes server response to file at path
with open(path, 'wb') as f:
f.write(server_response.content)
def delete_datasource(datasource_filename):
os.remove(datasource_filename)
def isDate(string):
'''
try if the string is a date in the correct format as dictated by the server
'''
try:
t = dt.datetime.strptime(string, "%Y-%m-%d") #"%Y-%m-%d" Zeppelin format 2023-01-31, "%m/%d/%Y" excel format 1/31/2023
return True, t
except ValueError as err:
return False, 'not a date'
def isfloat(num):
try:
float(num)
return True
except ValueError:
return False
def convertToHyper(dictionary, filename):
with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU, 'myapp') as hyper:
### create the extract, replace it if it already exists
### iso alpha2 country codes
alpha2 = ['AF', 'AX', 'AL', 'DZ', 'AS', 'AD', 'AO', 'AI', 'AQ', 'AG', 'AR', 'AM', 'AW', 'AU', 'AT', 'AZ', 'BH', 'BS', 'BD', 'BB', 'BY', 'BE', 'BZ', 'BJ', 'BM', 'BT', 'BO', 'BQ', 'BA', 'BW', 'BV', 'BR', 'IO', 'BN', 'BG', 'BF', 'BI', 'KH', 'CM', 'CA', 'CV', 'KY', 'CF', 'TD', 'CL', 'CN', 'CX', 'CC', 'CO', 'KM', 'CG', 'CD', 'CK', 'CR', 'CI', 'HR', 'CU', 'CW', 'CY', 'CZ', 'DK', 'DJ', 'DM', 'DO', 'EC', 'EG', 'SV', 'GQ', 'ER', 'EE', 'ET', 'FK', 'FO', 'FJ', 'FI', 'FR', 'GF', 'PF', 'TF', 'GA', 'GM', 'GE', 'DE', 'GH', 'GI', 'GR', 'GL', 'GD', 'GP', 'GU', 'GT', 'GG', 'GN', 'GW', 'GY', 'HT', 'HM', 'VA', 'HN', 'HK', 'HU', 'IS', 'IN', 'ID', 'IR', 'IQ', 'IE', 'IM', 'IL', 'IT', 'JM', 'JP', 'JE', 'JO', 'KZ', 'KE', 'KI', 'KP', 'KR', 'KW', 'KG', 'LA', 'LV', 'LB', 'LS', 'LR', 'LY', 'LI', 'LT', 'LU', 'MO', 'MK', 'MG', 'MW', 'MY', 'MV', 'ML', 'MT', 'MH', 'MQ', 'MR', 'MU', 'YT', 'MX', 'FM', 'MD', 'MC', 'MN', 'ME', 'MS', 'MA', 'MZ', 'MM', 'NA', 'NR', 'NP', 'NL', 'NC', 'NZ', 'NI', 'NE', 'NG', 'NU', 'NF', 'MP', 'NO', 'OM', 'PK', 'PW', 'PS', 'PA', 'PG', 'PY', 'PE', 'PH', 'PN', 'PL', 'PT', 'PR', 'QA', 'RE', 'RO', 'RU', 'RW', 'BL', 'SH', 'KN', 'LC', 'MF', 'PM', 'VC', 'WS', 'SM', 'ST', 'SA', 'SN', 'RS', 'SC', 'SL', 'SG', 'SX', 'SK', 'SI', 'SB', 'SO', 'ZA', 'GS', 'SS', 'ES', 'LK', 'SD', 'SR', 'SJ', 'SZ', 'SE', 'CH', 'SY', 'TW', 'TJ', 'TZ', 'TH', 'TL', 'TG', 'TK', 'TO', 'TT', 'TN', 'TR', 'TM', 'TC', 'TV', 'UG', 'UA', 'AE', 'GB', 'US', 'UM', 'UY', 'UZ', 'VU', 'VE', 'VN', 'VG', 'VI', 'WF', 'EH', 'YE', 'ZM', 'ZW']
### set column name and type and set data values to correct type
columns = []
for i in range(len(dictionary['columns'])):
columnName = dictionary['columns'][i]
sample = dictionary['data'][2][i].translate(str.maketrans('', '', '$,'))
#print(sample)
if isfloat(sample): #checks if value is an float
type = SqlType.double()
for j in range(len(dictionary['data'])):
dictionary['data'][j][i] = float(dictionary['data'][j][i].translate(str.maketrans('', '', '$,')))
elif True in isDate(sample): #checks if value is a date in the same format as the databases
type = SqlType.date()
for j in range(len(dictionary['data'])):
dictionary['data'][j][i] = isDate(dictionary['data'][j][i])[1]
elif sample == 2: #checks if value is shorter than 2
if sample in alpha2: #checks if value is in the list of alpha2 country codes
type = SqlType.geography()
for j in range(len(dictionary['data'])):
dictionary['data'][j][i] = bytes(dictionary['data'][j][i], 'utf-8')
else:
type = SqlType.text()
columns.append(TableDefinition.Column(columnName, type)) #add current column to list of columns
with Connection(hyper.endpoint, filename, CreateMode.CREATE_AND_REPLACE) as connection: #create
schema = TableDefinition(dictionary['title'], columns)
connection.catalog.create_table(schema)
with Inserter(connection, schema) as inserter:
inserter.add_rows(dictionary['data'])
inserter.execute()
def appendTableau(dictionary, dest_server, dest_auth_token, dest_site_id, dest_project_id, version):
'''
Takes one item from array {id, title, timeFin, columns, data} and publishes to tableau
'''
### initialization
datasource_filename = dictionary['title'] + '.hyper'
datasource_name = dictionary['title']
### convert data to .hyper
#print('\nConverting data to .hyper format')
convertToHyper(dictionary, datasource_filename)
### publish to new site
#print("\nPublishing data source to {0}".format(dest_server))
publish_datasource(dest_server, dest_auth_token, dest_site_id, datasource_filename, dest_project_id, version, datasource_name)
### Deleting data source from the source site #####
#print("\nDeleting temp file")
delete_datasource(datasource_filename)
def askInfo():
root = Tk()
# Field 1
l1 = Label(text='Zeppelin Username:')
l1.grid(row=0,column=0)
zeppelinUser = Entry(root)
zeppelinUser.insert(0, "") # fill string to prefill field
zeppelinUser.grid(row=0,column=1)
# Field 2
l2 = Label(text='Zeppelin Password:')
l2.grid(row=1,column=0)
zeppelinPass = Entry(root)
zeppelinPass.insert(0, "") # fill string to prefill field
zeppelinPass.grid(row=1,column=1)
# Field 3
l3 = Label(text='Zeppelin Note ID:')
l3.grid(row=2,column=0)
notebook = Entry(root)
notebook.insert(0, "") # fill string to prefill field
notebook.grid(row=2,column=1)
# Field 4
l4 = Label(text='Tableau Username:')
l4.grid(row=3,column=0)
tableauUser = Entry(root)
tableauUser.insert(0, "") # fill string to prefill field
tableauUser.grid(row=3,column=1)
# Field 5
l5 = Label(text='Tableau Password:')
l5.grid(row=4,column=0)
tableauPass = Entry(root)
tableauPass.insert(0, "") # fill string to prefill field
tableauPass.grid(row=4,column=1)
# Field 6
l6 = Label(text='Tableau Project name:')
l6.grid(row=5,column=0)
tableauProject = Entry(root)
tableauProject.insert(0, "") # fill string to prefill field
tableauProject.grid(row=5,column=1)
# Field 7
l7 = Label(text='Tableau API Version:')
l7.grid(row=6,column=0)
version = Entry(root)
version.insert(0, "") # fill string to prefill field
version.grid(row=6,column=1)
var = IntVar()
b1=Button(root, text='submit', command = lambda: var.set(1))
b1.grid(row=8,column=1)
b1.wait_variable(var)
zU = zeppelinUser.get()
zP = zeppelinPass.get()
noteID = notebook.get()
tU = tableauUser.get()
tP = tableauPass.get()
tProj = tableauProject.get()
ver = version.get()
root.destroy()
root.mainloop()
return zU, zP, noteID,tU, tP, tProj, ver
def main():
### in Windows, prevent the OS from sleeping while we run
osSleep = None
if os.name == 'nt':
osSleep = WindowsInhibitor()
osSleep.inhibit()
### CHANGE CREDENTIALS MODE BELOW
# zUser, zPass, noteID, tUser, tPass, tProj, ver = askInfo()
zUser = 'ZEPPELIN_USERNAME' # zeppelin username
zPass = 'ZEPPELIN_PASSWORD' # zeppelin password
noteID = 'NOTE_ID' # zeppelin note ID
tUser = 'TABLEAU_USERNAME' # tableau username
tPass = 'TABLEAU_PASSWORD' # tableau password
tProj = 'TABLEAU_PROJECT_NAME' # tableau destination project
ver = 'TABLEAU_ API_VERSION' #tableau API version
print('\nBeginning Zeppelin queries')
### clear old outputs
print('\nClearing old results')
clearOutput = 'https://ZEPPELIN_SERVER/zeppelin/api/notebook/' + noteID + '/clear'
requests.put(clearOutput, auth=HTTPBasicAuth(zUser, zPass))
### restart interpreter to allow higher setting for output limit
print('\nRestarting Interpreter')
requests.put('https://ZEPPELIN_SERVER/zeppelin/api/interpreter/setting/restart/spark', auth=HTTPBasicAuth(zUser, zPass))
### run all paragraphs
print('\nRunning all Paragraphs')
array = getResults(noteID,zUser, zPass)
### Data can be saved to a csv if desired. Set save to y to set this option. Un-comment the two lines below and comment out save = 'n' to ask the user each time.
#ctypes.windll.user32.MessageBoxW(0, "Zeppelin Queries Complete", "", 0x00001000)
#save = input('Do you want to save to CSV? y/n')
save = 'n'
if save in ['y','Y']:
# create folder with name current date and time
print('\nCreating new Folder')
now = dt.datetime.now()
folderName = now.strftime("%b_%d_%Y_%H.%M.%S")
os.mkdir(folderName)
# save data as csv
print('\nSaving data as CSV in new folder')
export(array, folderName)
### append to tableau data source
print('\033[1m' + '\n Beginning Tableau Append Process')
### initialization
print('\nInitializing')
dest_server = 'TABLEAU_SERVER'
### sign in
print("\nSigning in to obtain authentication tokens")
dest_auth_token, dest_site_id, dest_user_id = sign_in(dest_server, tPass, tUser)
### find project id for destination server
print("\nFinding project id for {0}".format(dest_server))
dest_project_id = get_default_project_id(dest_server, dest_auth_token, dest_site_id, ver, tProj)
for i in range(len(array)):
appendTableau(array[i], dest_server, dest_auth_token, dest_site_id, dest_project_id, ver)
### sign out
print("\nSigning out and invalidating the authentication token")
sign_out(dest_server, dest_auth_token, ver)
### allows windows to sleep again
if osSleep:
osSleep.uninhibit()
### popup window upon completion
ctypes.windll.user32.MessageBoxW(0, "Tableau Datasource Uploads Complete", "", 0x00001000)
exit()
if __name__ == "__main__":
main()