-
Notifications
You must be signed in to change notification settings - Fork 3
/
tabledesc.py
executable file
·253 lines (228 loc) · 8.92 KB
/
tabledesc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
#!/usr/bin/python3
import argparse
import csv
import json
import logging
from collections import OrderedDict
import config
import pg
import query
from salesforce import get_Salesforce
logger = logging.getLogger(__name__)
class TableDesc:
def __init__(self, name):
self.name = name
self.__sf_desc_cache = None
self.__sf_field_definition_cache = None
self.__fields_cache = None
def get_sf_desc(self):
'''
Connects to saleforce and return raw description.
Data is cached for reuse.
'''
if self.__sf_desc_cache is None:
sf = get_Salesforce()
accessor = sf.__getattr__(self.name)
self.__sf_desc_cache = accessor.describe()
return self.__sf_desc_cache
def get_sf_field_definition(self):
'''
Run a query against salesforce FieldDefinition table to get extra
field information.
Data is cached for reuse.
Note that this table cannot be fetched entirely. Attempt result in:
> MALFORMED_QUERY: FieldDefinition: a filter on a reified column is
> required [EntityDefinitionId,DurableId]
'''
if self.__sf_field_definition_cache is None:
soql = """SELECT QualifiedApiName,IsIndexed
FROM FieldDefinition
WHERE EntityDefinitionId='{}'""".format(self.name)
qry = query.query(soql)
self.__sf_field_definition_cache = list(qry)
return self.__sf_field_definition_cache
def get_sf_fields(self):
'''
Return the fields as an OrderedDict.
Data is cached for reuse.
'''
if self.__fields_cache is None:
self.__fields_cache = OrderedDict()
# First get the info from sf_desc
for sf_field_info in self.get_sf_desc()['fields']:
self.__fields_cache[sf_field_info['name']] = sf_field_info
# Then the the IsIndexed from table FieldDefinition
sf_definition = self.get_sf_field_definition()
for record in sf_definition:
name = record['QualifiedApiName']
# if name == 'ChannelProgramName':
# print('NNN2:', record)
if name in self.__fields_cache.keys():
self.__fields_cache[name]['IsIndexed'] = \
record['IsIndexed']
else:
logger.warning(
'Table %s, field %s '
'is not available from describe',
self.name, name)
return self.__fields_cache
def get_pg_fields(self):
cursor = pg.cursor()
cursor.execute('''
select column_name, is_nullable
from information_schema.columns
where table_schema={esc_schema}
and table_name={esc_name}
order by table_name, ordinal_position'''.format(
esc_schema=pg.escape_str(config.DB_SCHEMA),
esc_name=pg.escape_str(self.name)),
)
while True:
line = cursor.fetchone()
if line is None:
break
print(line)
def get_sync_field_names(self):
'''
Returns a list of field names to be synchronized.
That is the field names in first column of the csv mapping file having
the second column set to 1.
See self.make_csv_fieldlist
'''
filename = 'mapping/{}.csv'.format(self.name)
result = []
with open(filename) as f:
for row in csv.reader(f):
if row[1] == '1':
result.append(row[0])
return result
def get_indexed_sync_field_names(self):
'''
Returns a list of field names that should be indexed.
That is the field names in first column of the csv mapping file having
the third column set to 1.
See self.make_csv_fieldlist
'''
filename = 'mapping/{}.csv'.format(self.name)
result = []
with open(filename) as f:
for row in csv.reader(f):
if row[2] == '1':
result.append(row[0])
return result
def get_sync_fields(self):
'''
Returns an OrderedDict of fields to be synchronized
'''
sf_fields = self.get_sf_fields()
sync_field_names = self.get_sync_field_names()
result = OrderedDict()
for fieldname in sync_field_names:
result[fieldname] = sf_fields[fieldname]
return result
def is_field_compound(self, fieldname):
'''
returns True if this field is a compound field, like address
that is if another field as a compoundFieldName with that value
'''
for fieldinfo in self.get_sf_fields().values():
if fieldinfo['compoundFieldName'] == fieldname:
return True
return False
def make_csv_fieldlist(self, default=None):
'''
Creates the initial csv file with a list of fields that can be
replicated.
This downloads the field description from salesforce.
default None means import all fields but formulas and compound fields.
default 'minimal' means only import fields listed in
default_import_fields.
'''
default_import_fields = (
'Id', 'DurableId', 'CreatedDate', 'IsDeleted', 'SystemModstamp'
)
sf_fields = self.get_sf_fields()
filename = 'mapping/{}.csv'.format(self.name)
print('Writing', filename)
with open(filename, 'x') as f:
f.write('"FieldName", "Import", "Indexed", "Note"\n') # header
for fieldname, fieldinfo in sf_fields.items():
logging.debug("Describing field %s : %s", fieldname, fieldinfo)
if default == 'minimal':
if fieldname in default_import_fields:
if (fieldname == 'Id'
and 'DurableId' in sf_fields.items()):
isimport = ''
else:
isimport = '1'
else:
isimport = ''
else: # default default
if fieldinfo['calculated']:
isimport = ''
elif (fieldname == 'Id'
and 'DurableId' in sf_fields.items()):
isimport = ''
else:
isimport = '1'
notes = []
if self.is_field_compound(fieldname):
notes.append('compound')
isimport = ''
# if fieldinfo['type'] == 'encryptedstring':
# notes.append('encryptedstring')
# isimport = ''
if fieldinfo['calculated']:
notes.append('calculated')
isimport = ''
isindexed = fieldinfo.get('IsIndexed', None)
if isindexed is None:
notes.append('nofielddefinition')
if isindexed:
isindexed = '1'
else:
isindexed = ''
f.write('"{}",{},{},{}\n'.format(
fieldname, isimport, isindexed, ','.join(notes)))
def get_pk_fieldname(self):
sf_fields = self.get_sf_fields()
if 'DurableId' in sf_fields:
return 'DurableId'
if 'Id' in sf_fields:
return 'Id'
raise AssertionError(
'Could not find primary key for table {}'.format(self.name))
def get_timestamp_name(self):
fieldnames = self.get_sync_field_names()
for name in ('SystemModStamp',
'SystemModstamp',
'LastModifiedDate',
'CreatedDate'):
if name in fieldnames:
return name
raise AssertionError(
'No field to synchronize from. Tried SystemModStamp,'
' SystemModstamp, LastModifiedDate and CreatedDate.')
if __name__ == '__main__':
def main():
parser = argparse.ArgumentParser(
description='Create initial list of fields to synchronise')
parser.add_argument(
'--rawdump',
action='store_true',
help='dump the raw SF description')
parser.add_argument(
'table',
help='table name')
args = parser.parse_args()
logging.basicConfig(
filename=config.LOGFILE,
format=config.LOGFORMAT.format('tabledesc '+args.table),
level=config.LOGLEVEL)
main_td = TableDesc(args.table)
# main_td.get_pg_fields()
if args.rawdump:
print(json.dumps(main_td.get_sf_desc(), indent=2))
else:
main_td.make_csv_fieldlist()
main()