-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathp4r_python_utils.py
74 lines (64 loc) · 2.72 KB
/
p4r_python_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python
# -*- coding: utf-8 -*-
## Import
import logging
import os
import pandas as pd
import sys
logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
#handler.setFormatter(logging.Formatter('%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S'))
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)
pipe_replace = '-' # to replace | when writing files
def log_and_exit(code, rep): # temporary code to store the return status in file since it cannot be retrieve directly in shell Launch* when using the current version of ../bin/p4r (as of 05/14/2024)
with open(os.path.join(rep, 'python_return_status'), 'w') as f:
f.write(str(code))
sys.exit(code)
def read_input_csv(cfg, file_name_key, **kwargs):
file = os.path.join(cfg['inputpath'], cfg['csvfiles'][file_name_key])
if not os.path.isfile(file):
logger.error('File '+file+' does not exist. Use key inputpath in configuration file to specify input directory.')
log_and_exit(2, cfg['path'])
if 'csv_delim' in cfg.keys():
kwargs.update({'sep' : cfg['csv_delim']})
logger.info('Read file '+file)
data = pd.read_csv(file, **kwargs)
if data.index.has_duplicates:
msg = 'Error: input csv '+file+' has some duplicated names '
if 'index_col' in kwargs.keys():
msg += 'in columns ['+', '.join(kwargs['index_col'])+']'
msg += '. Duplicates are '+'\n\t'.join(str(i) for i in data.index[data.index.duplicated()])
logger.error(msg)
log_and_exit(3, cfg['path'])
return data
def save_input_csv(cfg, file_name_key,data, index=True, **kwargs):
file = os.path.join(cfg['inputpath'], cfg['csvfiles'][file_name_key])
indexSave=0
while os.path.isfile(os.path.join(cfg['inputpath'], cfg['csvfiles'][file_name_key]+'.save_'+str(indexSave)+'.csv')):
indexSave=indexSave+1
fileSave = os.path.join(cfg['inputpath'], cfg['csvfiles'][file_name_key]+'.save_'+str(indexSave)+'.csv')
if 'csv_delim' in cfg.keys():
kwargs.update({'sep' : cfg['csv_delim']})
#data.to_csv(fileSave,**kwargs)
data.to_csv(fileSave,index=index, **kwargs)
def write_input_csv(cfg, file_name_key,data, index=True, **kwargs):
file = os.path.join(cfg['inputpath'], cfg['csvfiles'][file_name_key])
indexSave=0
if 'csv_delim' in cfg.keys():
kwargs.update({'sep' : cfg['csv_delim']})
#data.to_csv(file,**kwargs)
data.to_csv(file,index=index, **kwargs)
def check_and_read_csv(cfg, file, **kwargs):
if not os.path.isfile(file):
logger.error('File '+file+' does not exist.')
log_and_exit(2, cfg['path'])
return pd.read_csv(file, **kwargs)
def get_path():
p4r_root = os.path.normpath(os.environ.get("PLAN4RESROOT"))
pwd = os.path.normpath(os.environ.get("PWD"))
if "P4R_DIR_LOCAL" in os.environ:
path = os.path.normpath(os.environ.get("P4R_DIR_LOCAL"))
else:
path = p4r_root
return path