diff --git a/easyaccess/easyaccess.py b/easyaccess/easyaccess.py index 0569803..ae57cca 100755 --- a/easyaccess/easyaccess.py +++ b/easyaccess/easyaccess.py @@ -1757,25 +1757,7 @@ def do_show_index(self, arg): def complete_show_index(self, text, line, begidx, lastidx): return self._complete_tables(text) - def get_filename(self, line): - # Good to move some of this into eautils.fileio - line = line.replace(';', '') - if line == "": - print('\nMust include table filename!\n') - return - if line.find('.') == -1: - print(colored('\nError in filename\n', "red")) - return - - filename = "".join(line.split()) - basename = os.path.basename(filename) - alls = basename.split('.') - if len(alls) > 2: - # Oracle tables cannot contain a '.' - print("\nDo not use extra '.' in filename\n") - return - return filename def check_table_exists(self, table): # check table first @@ -1915,11 +1897,11 @@ def insert_data(self, table, columns, values, dtypes=None, niter = 0): niter+1, len(values), len(columns), table.upper(), t2 - t1), "green")) - def do_load_table(self, line, name=None, chunksize=None): + def do_load_table(self, line, name=None, chunksize=None, memsize=None): """ DB:Loads a table from a file (csv or fits) taking name from filename and columns from header - Usage: load_table [--tablename NAME] [--chunksize CHUNK] + Usage: load_table [--tablename NAME] [--chunksize CHUNK] [--memsize MEMCHUNK] Ex: example.csv has the following content RA,DEC,MAG 1.23,0.13,23 @@ -1932,6 +1914,8 @@ def do_load_table(self, line, name=None, chunksize=None): --tablename NAME given name for the table, default is taken from filename --chunksize CHUNK Number of rows to be inserted at a time. Useful for large files that do not fit in memory + --memsize MEMCHUNK The size in Mb to be read in chunks. If both specified, the lower + number of rows is selected (the lower memory limitations) Note: - For csv or tab files, first line must have the column names (without # or any other comment) and same format as data (using ',' or space) @@ -1943,7 +1927,9 @@ def do_load_table(self, line, name=None, chunksize=None): load_parser.add_argument('filename', help='name for the file', action='store', default=None) load_parser.add_argument('--tablename', help='name for the table', action='store', default=None) load_parser.add_argument('--chunksize', help='number of rows to read in blocks to avoid memory ' - 'issues', action='store', type=int, default=None) + 'issues', action='store', type=int, default=None) + load_parser.add_argument('--memsize', help='size of the chunks to be read in Mb ', + action='store', type=int, default=None) load_parser.add_argument('-h', '--help', help='print help', action='store_true') try: load_args = load_parser.parse_args(line.split()) @@ -1953,11 +1939,20 @@ def do_load_table(self, line, name=None, chunksize=None): if load_args.help: self.do_help('load_table') return - filename = self.get_filename(load_args.filename) + filename = eafile.get_filename(load_args.filename) table = load_args.tablename chunk = load_args.chunksize + memchunk = load_args.memsize if chunksize is not None: chunk = chunksize + if memsize is not None: + memchunk = memsize + if memchunk is not None: + memchunk_rows = eafile.get_chunksize(filename, memory=memchunk) + if chunk is not None: + chunk = min(chunk, memchunk_rows) + else: + chunk = memchunk_rows if filename is None: return base, ext = os.path.splitext(os.path.basename(filename)) @@ -1978,7 +1973,7 @@ def do_load_table(self, line, name=None, chunksize=None): return try: - data, iterator = self.load_data(filename) + data, iterator = eafile.read_file(filename) except: print_exception() return @@ -2070,11 +2065,11 @@ def complete_load_table(self, text, line, start_idx, end_idx): return _complete_path(line) - def do_append_table(self, line, name=None, chunksize=None): + def do_append_table(self, line, name=None, chunksize=None, memsize=None): """ DB:Appends a table from a file (csv or fits) taking name from filename and columns from header. - Usage: append_table [--tablename NAME] [--chunksize CHUNK] + Usage: append_table [--tablename NAME] [--chunksize CHUNK] [--memsize MEMCHUNK] Ex: example.csv has the following content RA,DEC,MAG 1.23,0.13,23 @@ -2087,7 +2082,9 @@ def do_append_table(self, line, name=None, chunksize=None): --tablename NAME given name for the table, default is taken from filename --chunksize CHUNK Number of rows to be inserted at a time. Useful for large files - that do not fit in memory + that do not fit in memory + --memsize MEMCHUNK The size in Mb to be read in chunks. If both specified, the lower + number of rows is selected (the lower memory limitations) Note: - For csv or tab files, first line must have the column names (without # or any other comment) and same format as data (using ',' or space) @@ -2100,6 +2097,8 @@ def do_append_table(self, line, name=None, chunksize=None): append_parser.add_argument('--tablename', help='name for the table to append to', action='store', default=None) append_parser.add_argument('--chunksize', help='number of rows to read in blocks to avoid memory ' 'issues', action='store', default=None, type=int) + append_parser.add_argument('--memsize', help='size of the chunks to be read in Mb ', action='store', + type=int, default=None) append_parser.add_argument('-h', '--help', help='print help', action='store_true') try: append_args = append_parser.parse_args(line.split()) @@ -2109,11 +2108,21 @@ def do_append_table(self, line, name=None, chunksize=None): if append_args.help: self.do_help('append_table') return - filename = self.get_filename(append_args.filename) + filename = eafile.get_filename(append_args.filename) table = append_args.tablename chunk = append_args.chunksize + memchunk = append_args.memsize if chunksize is not None: chunk = chunksize + if memsize is not None: + memchunk = memsize + if memchunk is not None: + memchunk_rows = eafile.get_chunksize(filename, memory=memchunk) + if chunk is not None: + chunk = min(chunk, memchunk_rows) + else: + chunk = memchunk_rows + if filename is None: return base, ext = os.path.splitext(os.path.basename(filename)) @@ -2134,7 +2143,7 @@ def do_append_table(self, line, name=None, chunksize=None): '\n DESDB ~> CREATE TABLE %s (COL1 TYPE1(SIZE), ..., COLN TYPEN(SIZE));\n' % table.upper()) return try: - data, iterator = self.load_data(filename) + data, iterator = eafile.read_file(filename) except: print_exception() return @@ -2508,7 +2517,7 @@ def myquota(self): """ self.do_myquota('') - def load_table(self, table_file, name=None, chunksize=None): + def load_table(self, table_file, name=None, chunksize=None, memsize=None): """ Loads and create a table in the DB. If name is not passed, is taken from the filename. Formats supported are 'fits', 'csv' and 'tab' files @@ -2518,6 +2527,7 @@ def load_table(self, table_file, name=None, chunksize=None): table_file : Filename to be uploaded as table (.csv, .fits, .tab) name : Name of the table to be created chunksize : Number of rows to upload at a time to avoid memory issues + memsize : Size of chunk to be read. In Mb. If both specified, the lower number of rows is selected Returns: -------- @@ -2525,14 +2535,14 @@ def load_table(self, table_file, name=None, chunksize=None): """ try: - self.do_load_table(table_file, name=name, chunksize=chunksize) + self.do_load_table(table_file, name=name, chunksize=chunksize, memsize=memsize) return True except: # exception return False - def append_table(self, table_file, name=None, chunksize=None): + def append_table(self, table_file, name=None, chunksize=None, memsize=None): """ Appends data to a table in the DB. If name is not passed, is taken from the filename. Formats supported are 'fits', 'csv' and 'tab' files @@ -2542,13 +2552,14 @@ def append_table(self, table_file, name=None, chunksize=None): table_file : Filename to be uploaded as table (.csv, .fits, .tab) name : Name of the table to be created chunksize : Number of rows to upload at a time to avoid memory issues + memsize : Size of chunk to be read. In Mb. If both specified, the lower number of rows is selected Returns: -------- True if success otherwise False """ try: - self.do_append_table(table_file, name=name, chunksize=chunksize) + self.do_append_table(table_file, name=name, chunksize=chunksize, memsize=memsize) return True except: return False @@ -2664,7 +2675,10 @@ def initial_message(quiet=False, clear=True): or --append_table") parser.add_argument("--chunksize", dest='chunksize', type=int, default = None, help="Number of rows to be inserted at a time. Useful for large files \ - that do not fit in memory. Use with --load_table") + that do not fit in memory. Use with --load_table or --append_table") + parser.add_argument("--memsize", dest='memsize', type=int, default = None, + help=" Size of chunk to be read at a time in Mb. Use with --load_table or " + "--append_table") parser.add_argument("-s", "--db",dest='db', #choices=[...]? help="Override database name [dessci,desoper,destest]") parser.add_argument("-q", "--quiet", action="store_true", dest='quiet', @@ -2785,6 +2799,8 @@ def colored(line, color): return line linein += ' --tablename ' + args.tablename if args.chunksize is not None: linein += ' --chunksize ' + str(args.chunksize) + if args.memsize is not None: + linein += ' --memsize ' + str(args.memsize) cmdinterp.onecmd(linein) os._exit(0) elif args.appendtable is not None: @@ -2795,6 +2811,8 @@ def colored(line, color): return line linein += ' --tablename ' + args.tablename if args.chunksize is not None: linein += ' --chunksize ' + str(args.chunksize) + if args.memsize is not None: + linein += ' --memsize ' + str(args.memsize) cmdinterp.onecmd(linein) os._exit(0) else: diff --git a/easyaccess/eautils/fileio.py b/easyaccess/eautils/fileio.py index 4960b14..0af126c 100644 --- a/easyaccess/eautils/fileio.py +++ b/easyaccess/eautils/fileio.py @@ -21,6 +21,11 @@ import eautils.dtypes as eatypes import version +try: + from termcolor import colored +except: + def colored(line, color): return line + PANDAS_DEFS = ('comma separated text', 'space separated tex', 'HDF5 format') PANDAS_EXTS = ('.csv', '.tab', '.h5') @@ -31,6 +36,63 @@ FILE_EXTS = PANDAS_EXTS + FITS_EXTS +def get_filename(line): + """ + Return filename after checking it has the right structure (no extra periods) + """ + line = line.replace(';', '') + if line == "": + print('\nMust include table filename!\n') + return + if line.find('.') == -1: + print(colored('\nError in filename\n', "red")) + return + + filename = "".join(line.split()) + basename = os.path.basename(filename) + alls = basename.split('.') + if len(alls) > 2: + # Oracle tables cannot contain a '.' + print("\nDo not use extra '.' in filename\n") + return + + return filename + + +def get_chunksize(filename, memory=500): + """ + Get the approximate number of lines ot be read given memory constrains + + Parameters: + ----------- + filename : File name + memory : Memory in MB to compute the approximate number of rows + + Returns: + -------- + The number of rows need to be read for each chunk of memory + """ + base, ext = os.path.splitext(filename) + check_filetype(ext, FILE_EXTS) + + if ext in PANDAS_EXTS: + if ext == '.csv': sepa = ',' + elif ext == '.tab' : sepa = None + elif ext == '.h5': + return IOError('\nReading HDF5 files by chunks is not supported yet\n') + temp = pd.read_csv(filename, sep=sepa, nrows=100) + bytes_per_row = temp.memory_usage(index=True).sum()/100. + del temp + elif ext in FITS_EXTS: + temp = fitsio.FITS(filename) + temp_data = temp[1][0:100] + bytes_per_row = temp_data.nbytes/100. + temp.close() + del temp_data + + return int(memory*1024**2/bytes_per_row) + + def cutquery(query, length): """ Return query in a list of fixed sized character strings diff --git a/tests/test_api.py b/tests/test_api.py index cbd3870..814da25 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,3 +1,4 @@ +from __future__ import print_function import unittest import easyaccess as ea import numpy as np @@ -19,6 +20,7 @@ class TestApi(unittest.TestCase): nrows = 10000 prefetch = 4000 chunk = 1000 + memsize = 1 sqlfile = 'temp.sql' csvfile = 'temp.csv' fitsfile = 'temp.fits' @@ -26,6 +28,7 @@ class TestApi(unittest.TestCase): def test_ea_import(self): + print('\n*** test_ea_import ***\n') test1 = self.con.ea_import('wrapped') if test1 is not None: self.assertTrue(test1) @@ -34,6 +37,7 @@ def test_ea_import(self): self.assertTrue(test2) def test_pandas_to_db(self): + print('\n*** test_pandas_to_db ***\n') data = create_test_data() df = pd.DataFrame(data) self.assertEqual( len(df), self.nrows) @@ -61,6 +65,7 @@ def test_pandas_to_db(self): cursor.close() def test_query_to_pandas(self): + print('\n*** test_query_to_pandas ***\n') data = create_test_data() df = pd.DataFrame(data) self.assertEqual( len(df), self.nrows) @@ -81,6 +86,7 @@ def test_query_to_pandas(self): self.con.drop_table(self.tablename) def test_describe_table(self): + print('\n*** test_describe_table ***\n') data = create_test_data() df = pd.DataFrame(data) self.assertEqual( len(df), self.nrows) @@ -93,6 +99,7 @@ def test_describe_table(self): self.con.drop_table(self.tablename) def test_loadsql(self): + print('\n*** test_loadsql ***\n') data = create_test_data() df = pd.DataFrame(data) self.assertEqual( len(df), self.nrows) @@ -122,11 +129,13 @@ def test_loadsql(self): def test_mytables(self): + print('\n*** test_mytables ***\n') df = self.con.mytables() self.assertTrue('FGOTTENMETADATA' in df['TABLE_NAME'].values.tolist()) def test_load_table_csv(self): + print('\n*** test_load_table_csv ***\n') data = create_test_data() df = pd.DataFrame(data) self.assertEqual( len(df), self.nrows) @@ -175,8 +184,61 @@ def test_load_table_csv(self): self.con.drop_table(self.tablename) os.remove(self.csvfile) - + + def test_load_append_table_memory_csv(self): + print('\n*** test_load_append_table_memory_csv ***\n') + data = create_test_data() + df = pd.DataFrame(data) + self.assertEqual( len(df), self.nrows) + df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') + for i in range(9): + df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',',mode='a',header=False) + self.assertTrue(os.path.exists(self.csvfile)) + # memsize + self.con.drop_table(self.tablename) + self.assertTrue(self.con.load_table(self.csvfile, name=self.tablename, memsize=self.memsize)) + cursor = self.con.cursor() + temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) + fetch = temp.fetchall() + self.assertEqual(len(fetch), self.nrows*10) + # appending + self.assertTrue(self.con.append_table(self.csvfile, name=self.tablename, memsize=self.memsize)) + cursor = self.con.cursor() + temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) + fetch = temp.fetchall() + self.assertEqual(len(fetch), self.nrows*20) + ## end + os.remove(self.csvfile) + self.con.drop_table(self.tablename) + + def test_load_append_table_memory_chunk_csv(self): + print('\n*** test_load_append_table_memory_chunk_csv ***\n') + data = create_test_data() + df = pd.DataFrame(data) + self.assertEqual( len(df), self.nrows) + df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') + for i in range(9): + df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',',mode='a',header=False) + self.assertTrue(os.path.exists(self.csvfile)) + # memsize + self.con.drop_table(self.tablename) + self.assertTrue(self.con.load_table(self.csvfile, name=self.tablename, memsize=self.memsize, chunksize=self.chunk*10)) + cursor = self.con.cursor() + temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) + fetch = temp.fetchall() + self.assertEqual(len(fetch), self.nrows*10) + # appending + self.assertTrue(self.con.append_table(self.csvfile, name=self.tablename, memsize=self.memsize, chunksize=self.chunk*200)) + cursor = self.con.cursor() + temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) + fetch = temp.fetchall() + self.assertEqual(len(fetch), self.nrows*20) + ## end + os.remove(self.csvfile) + self.con.drop_table(self.tablename) + def test_load_table_fits(self): + print('\n*** test_load_table_fits ***\n') data = create_test_data() fitsio.write(self.fitsfile, data, clobber=True) self.assertTrue(os.path.exists(self.fitsfile)) @@ -223,7 +285,57 @@ def test_load_table_fits(self): self.con.drop_table(self.tablename) os.remove(self.fitsfile) + def test_load_append_table_memory_fits(self): + print('\n*** test_load_append_table_memory_fits ***\n') + data = create_test_data() + for i in range(4): + data = np.concatenate((data,data)) + fitsio.write(self.fitsfile, data, clobber=True) + self.assertTrue(os.path.exists(self.fitsfile)) + # memsize + self.con.drop_table(self.tablename) + self.assertTrue(self.con.load_table(self.fitsfile, name=self.tablename, memsize=self.memsize)) + cursor = self.con.cursor() + temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) + fetch = temp.fetchall() + self.assertEqual(len(fetch), self.nrows*16) + # appending + self.assertTrue(self.con.append_table(self.fitsfile, name=self.tablename, memsize=self.memsize)) + cursor = self.con.cursor() + temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) + fetch = temp.fetchall() + self.assertEqual(len(fetch), self.nrows*2*16) + ## end + os.remove(self.fitsfile) + self.con.drop_table(self.tablename) + + def test_load_append_table_memory_chunk_fits(self): + print('\n*** test_load_append_table_memory_chunk_fits ***\n') + data = create_test_data() + for i in range(4): + data = np.concatenate((data,data)) + fitsio.write(self.fitsfile, data, clobber=True) + self.assertTrue(os.path.exists(self.fitsfile)) + # memsize + self.con.drop_table(self.tablename) + self.assertTrue(self.con.load_table(self.fitsfile, name=self.tablename, memsize=self.memsize, chunksize=self.chunk*10)) + cursor = self.con.cursor() + temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) + fetch = temp.fetchall() + self.assertEqual(len(fetch), self.nrows*16) + # appending + self.assertTrue(self.con.append_table(self.fitsfile, name=self.tablename, memsize=self.memsize, chunksize=self.chunk*200)) + cursor = self.con.cursor() + temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) + fetch = temp.fetchall() + self.assertEqual(len(fetch), self.nrows*2*16) + ## end + os.remove(self.fitsfile) + self.con.drop_table(self.tablename) + + def test_load_table_hdf5(self): + print('\n*** test_load_table_hdf5 ***\n') data = create_test_data() df = pd.DataFrame(data) self.assertEqual( len(df), self.nrows) @@ -261,6 +373,7 @@ def test_load_table_hdf5(self): def test_query_and_save(self): + print('\n*** test_query_and_save ***\n') data = create_test_data() df = pd.DataFrame(data) self.assertEqual( len(df), self.nrows) @@ -287,7 +400,7 @@ def test_query_and_save(self): self.assertEqual(len(fetch), self.nrows*35) self.con.outfile_max_mb = 1 self.con.query_and_save(query, self.csvfile, print_time=False) - for i in range(6): + for i in range(4): self.assertTrue(os.path.exists(os.path.splitext(self.csvfile)[0]+'_00000'+str(i+1)+'.csv')) os.remove(os.path.splitext(self.csvfile)[0]+'_00000'+str(i+1)+'.csv') self.con.query_and_save(query, self.fitsfile, print_time=False) @@ -299,6 +412,7 @@ def test_query_and_save(self): self.con.drop_table(self.tablename) def test_inline_functions(self): + print('\n*** test_inline_functions ***\n') data = create_test_data() df = pd.DataFrame(data) self.assertEqual( len(df), self.nrows) diff --git a/tests/test_interpreter.py b/tests/test_interpreter.py index 50d4fe6..4a3df48 100644 --- a/tests/test_interpreter.py +++ b/tests/test_interpreter.py @@ -1,3 +1,4 @@ +from __future__ import print_function import unittest import easyaccess as ea import numpy as np @@ -24,12 +25,14 @@ class TestInterpreter(unittest.TestCase): nrows = 10000 prefetch = 4000 chunk = 1000 + memsize = 1 sqlfile = 'temp.sql' csvfile = 'temp.csv' fitsfile = 'temp.fits' h5file = 'temp.h5' def test_describe(self): + print('\n*** test_describe ***\n') data = create_test_data() df = pd.DataFrame(data) self.assertEqual( len(df), self.nrows) @@ -45,6 +48,7 @@ def test_describe(self): def test_add_comment(self): + print('\n*** test_add_comment ***\n') data = create_test_data() df = pd.DataFrame(data) self.assertEqual( len(df), self.nrows) @@ -62,6 +66,7 @@ def test_add_comment(self): os.remove(self.csvfile) def test_select(self): + print('\n*** test_select ***\n') data = create_test_data() df = pd.DataFrame(data) self.assertEqual( len(df), self.nrows) @@ -75,6 +80,7 @@ def test_select(self): os.remove(self.csvfile) def test_select_csv(self): + print('\n*** test_select_csv ***\n') data = create_test_data() df = pd.DataFrame(data) self.assertEqual( len(df), self.nrows) @@ -90,6 +96,7 @@ def test_select_csv(self): def test_select_fits(self): + print('\n*** test_select_fits ***\n') data = create_test_data() df = pd.DataFrame(data) self.assertEqual( len(df), self.nrows) @@ -105,6 +112,7 @@ def test_select_fits(self): self.con.drop_table(self.tablename) def test_select_hdf5(self): + print('\n*** test_select_hdf5 ***\n') data = create_test_data() df = pd.DataFrame(data) self.assertEqual( len(df), self.nrows) @@ -120,6 +128,7 @@ def test_select_hdf5(self): self.con.drop_table(self.tablename) def test_select_by_chunks(self): + print('\n*** test_select_by_chunks ***\n') global load_bar load_bar = False data = create_test_data() @@ -150,7 +159,8 @@ def test_select_by_chunks(self): if os.path.exists(self.csvfile): os.remove(self.csvfile) - def test_load_table_csv(self): + def test_load_append_table_csv(self): + print('\n*** test_load_append_table_csv ***\n') data = create_test_data() df = pd.DataFrame(data) self.assertEqual( len(df), self.nrows) @@ -173,7 +183,16 @@ def test_load_table_csv(self): fetch = temp.fetchall() self.assertEqual(len(fetch), self.nrows*2) self.con.drop_table(os.path.splitext(self.csvfile)[0].upper()) - + os.remove(self.csvfile) + + + def test_load_append_table_name_csv(self): + print('\n*** test_load_append_table_name_csv ***\n') + data = create_test_data() + df = pd.DataFrame(data) + self.assertEqual( len(df), self.nrows) + df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') + self.assertTrue(os.path.exists(self.csvfile)) # name from tablename self.con.drop_table(self.tablename) command = "load_table %s --tablename %s" % (self.csvfile, self.tablename) @@ -189,6 +208,17 @@ def test_load_table_csv(self): temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) fetch = temp.fetchall() self.assertEqual(len(fetch), self.nrows*2) + self.con.drop_table(self.tablename) + os.remove(self.csvfile) + + + def test_load_append_table_chunk_csv(self): + print('\n*** test_load_append_table_chunk_csv ***\n') + data = create_test_data() + df = pd.DataFrame(data) + self.assertEqual( len(df), self.nrows) + df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') + self.assertTrue(os.path.exists(self.csvfile)) # chunksize self.con.drop_table(self.tablename) command = "load_table %s --tablename %s --chunksize %s" % (self.csvfile, self.tablename, self.chunk) @@ -204,11 +234,65 @@ def test_load_table_csv(self): temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) fetch = temp.fetchall() self.assertEqual(len(fetch), self.nrows*2) + + def test_load_append_table_memory_csv(self): + print('\n*** test_load_append_table_memory_csv ***\n') + data = create_test_data() + df = pd.DataFrame(data) + self.assertEqual( len(df), self.nrows) + df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') + for i in range(9): + df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',',mode='a',header=False) + self.assertTrue(os.path.exists(self.csvfile)) + # memsize + self.con.drop_table(self.tablename) + command = "load_table %s --tablename %s --memsize %s" % (self.csvfile, self.tablename, self.memsize) + self.con.onecmd(command) + cursor = self.con2.cursor() + temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) + fetch = temp.fetchall() + self.assertEqual(len(fetch), self.nrows*10) + # appending + command = "append_table %s --tablename %s --memsize %s" % (self.csvfile, self.tablename, self.memsize) + self.con.onecmd(command) + cursor = self.con2.cursor() + temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) + fetch = temp.fetchall() + self.assertEqual(len(fetch), self.nrows*20) + ## end os.remove(self.csvfile) self.con.drop_table(self.tablename) + def test_load_append_table_memory_chunk_csv(self): + print('\n*** test_load_append_table_memory_chunk_csv ***\n') + data = create_test_data() + df = pd.DataFrame(data) + self.assertEqual( len(df), self.nrows) + df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',') + for i in range(9): + df.to_csv(self.csvfile, index=False, float_format='%.8f', sep=',',mode='a',header=False) + self.assertTrue(os.path.exists(self.csvfile)) + # memsize + self.con.drop_table(self.tablename) + command = "load_table %s --tablename %s --memsize %s --chunksize %s" % (self.csvfile, self.tablename, self.memsize, self.chunk*10) + self.con.onecmd(command) + cursor = self.con2.cursor() + temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) + fetch = temp.fetchall() + self.assertEqual(len(fetch), self.nrows*10) + # appending + command = "append_table %s --tablename %s --memsize %s --chunksize %s" % (self.csvfile, self.tablename, self.memsize, self.chunk*200) + self.con.onecmd(command) + cursor = self.con2.cursor() + temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) + fetch = temp.fetchall() + self.assertEqual(len(fetch), self.nrows*20) + ## end + os.remove(self.csvfile) + self.con.drop_table(self.tablename) - def test_load_table_fits(self): + def test_load_append_table_fits(self): + print('\n*** test_load_append_table_fits ***\n') data = create_test_data() fitsio.write(self.fitsfile, data, clobber=True) self.assertTrue(os.path.exists(self.fitsfile)) @@ -229,7 +313,13 @@ def test_load_table_fits(self): fetch = temp.fetchall() self.assertEqual(len(fetch), self.nrows*2) self.con.drop_table(os.path.splitext(self.fitsfile)[0].upper()) - + os.remove(self.fitsfile) + + def test_load_append_table_name_fits(self): + print('\n*** test_load_append_table_name_fits ***\n') + data = create_test_data() + fitsio.write(self.fitsfile, data, clobber=True) + self.assertTrue(os.path.exists(self.fitsfile)) # name from tablename self.con.drop_table(self.tablename) command = "load_table %s --tablename %s" % (self.fitsfile, self.tablename) @@ -245,6 +335,14 @@ def test_load_table_fits(self): temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) fetch = temp.fetchall() self.assertEqual(len(fetch), self.nrows*2) + os.remove(self.fitsfile) + self.con.drop_table(self.tablename) + + def test_load_append_table_chunk_fits(self): + print('\n*** test_load_append_table_chunk_fits ***\n') + data = create_test_data() + fitsio.write(self.fitsfile, data, clobber=True) + self.assertTrue(os.path.exists(self.fitsfile)) # chunksize self.con.drop_table(self.tablename) command = "load_table %s --tablename %s --chunksize %s" % (self.fitsfile, self.tablename, self.chunk) @@ -263,7 +361,60 @@ def test_load_table_fits(self): os.remove(self.fitsfile) self.con.drop_table(self.tablename) + def test_load_append_table_memory_fits(self): + print('\n*** test_load_append_table_memory_fits ***\n') + data = create_test_data() + for i in range(4): + data = np.concatenate((data,data)) + fitsio.write(self.fitsfile, data, clobber=True) + self.assertTrue(os.path.exists(self.fitsfile)) + # memsize + self.con.drop_table(self.tablename) + command = "load_table %s --tablename %s --memsize %s" % (self.fitsfile, self.tablename, self.memsize) + self.con.onecmd(command) + cursor = self.con2.cursor() + temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) + fetch = temp.fetchall() + self.assertEqual(len(fetch), self.nrows*16) + # appending + command = "append_table %s --tablename %s --memsize %s" % (self.fitsfile, self.tablename, self.memsize) + self.con.onecmd(command) + cursor = self.con2.cursor() + temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) + fetch = temp.fetchall() + self.assertEqual(len(fetch), self.nrows*2*16) + ## end + os.remove(self.fitsfile) + self.con.drop_table(self.tablename) + + def test_load_append_table_memory_chunk_fits(self): + print('\n*** test_load_append_table_memory_chunk_fits ***\n') + data = create_test_data() + for i in range(4): + data = np.concatenate((data,data)) + fitsio.write(self.fitsfile, data, clobber=True) + self.assertTrue(os.path.exists(self.fitsfile)) + # memsize + self.con.drop_table(self.tablename) + command = "load_table %s --tablename %s --memsize %s --chunksize %s" % (self.fitsfile, self.tablename, self.memsize, self.chunk*10) + self.con.onecmd(command) + cursor = self.con2.cursor() + temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) + fetch = temp.fetchall() + self.assertEqual(len(fetch), self.nrows*16) + # appending + command = "append_table %s --tablename %s --memsize %s --chunksize %s" % (self.fitsfile, self.tablename, self.memsize, self.chunk*200) + self.con.onecmd(command) + cursor = self.con2.cursor() + temp = cursor.execute('select RA,DEC from %s' % self.tablename.upper()) + fetch = temp.fetchall() + self.assertEqual(len(fetch), self.nrows*2*16) + ## end + os.remove(self.fitsfile) + self.con.drop_table(self.tablename) + def test_loadsql(self): + print('\n*** test_loadsql ***\n') data = create_test_data() df = pd.DataFrame(data) df.to_csv(self.csvfile,index=False, float_format='%.8f', sep=',') @@ -290,6 +441,7 @@ def test_loadsql(self): def test_inline(self): + print('\n*** test_inline ***\n') data = create_test_data() df = pd.DataFrame(data) df.to_csv(self.csvfile,index=False, float_format='%.8f', sep=',')