diff --git a/orangecontrib/single_cell/tests/data/data.xls b/orangecontrib/single_cell/tests/data/data.xls new file mode 100644 index 0000000..7d789b1 Binary files /dev/null and b/orangecontrib/single_cell/tests/data/data.xls differ diff --git a/orangecontrib/single_cell/tests/test_load_data.py b/orangecontrib/single_cell/tests/test_load_data.py index 8897c7a..31816c9 100644 --- a/orangecontrib/single_cell/tests/test_load_data.py +++ b/orangecontrib/single_cell/tests/test_load_data.py @@ -76,12 +76,13 @@ def test_file_summary_gz(self): self.assertEqual(round(loader.sparsity, 2), 0.86) def test_file_summary_xls(self): - file_name = os.path.join(os.path.dirname(__file__), "data/data.xlsx") - loader = ExcelLoader(file_name) - self.assertEqual(loader.file_size, 9160) - self.assertEqual(loader.n_rows, 11) - self.assertEqual(loader.n_cols, 15) - self.assertEqual(round(loader.sparsity, 2), 0.86) + for file, size in (("data/data.xlsx", 9160), ("data/data.xls", 27648)): + file_name = os.path.join(os.path.dirname(__file__), file) + loader = ExcelLoader(file_name) + self.assertEqual(loader.file_size, size) + self.assertEqual(loader.n_rows, 11) + self.assertEqual(loader.n_cols, 15) + self.assertEqual(round(loader.sparsity, 2), 0.86) def test_file_summary_loom(self): file_name = os.path.join(os.path.dirname(__file__), "data/data.loom") @@ -121,17 +122,18 @@ def test_load_data_mtx(self): npt.assert_array_equal(X, array) def test_load_data_xls(self): - kwargs = {"header_rows": 0, "header_cols": 0} - xls_path = os.path.join(os.path.dirname(__file__), "data/data.xlsx") - xls_loader = ExcelLoader(xls_path) - xls_attrs, xls_X, xls_M, xls_M_index = xls_loader._load_data(**kwargs) - csv_loader = Loader(os.path.join(os.path.dirname(__file__), - "data/DATA_MATRIX_LOG_TPM.txt")) - csv_attrs, csv_X, csv_M, csv_M_index = csv_loader._load_data(**kwargs) - self.assertEqual(xls_attrs, csv_attrs) - npt.assert_array_almost_equal(xls_X, csv_X) - npt.assert_array_equal(xls_M, csv_M) - npt.assert_array_equal(xls_M_index, csv_M_index) + for file in ("data/data.xlsx", "data/data.xls"): + kwargs = {"header_rows": 0, "header_cols": 0} + xls_path = os.path.join(os.path.dirname(__file__), file) + xls_loader = ExcelLoader(xls_path) + xls_attrs, xls_X, xls_M, xls_M_index = xls_loader._load_data(**kwargs) + csv_loader = Loader(os.path.join(os.path.dirname(__file__), + "data/DATA_MATRIX_LOG_TPM.txt")) + csv_attrs, csv_X, csv_M, csv_M_index = csv_loader._load_data(**kwargs) + self.assertEqual(xls_attrs, csv_attrs) + npt.assert_array_almost_equal(xls_X, csv_X) + npt.assert_array_equal(xls_M, csv_M) + npt.assert_array_equal(xls_M_index, csv_M_index) def test_n_genes_n_cells(self): file_name = os.path.join(os.path.dirname(__file__), @@ -200,3 +202,5 @@ def test_concatenate_union(self): self.assertEqual(2 * len(data1) + len(data2), len(concat_data)) self.assertEqual(len(concat_data.domain.attributes), 8) self.assertEqual(len(concat_data.domain.metas), 2) + + diff --git a/orangecontrib/single_cell/widgets/load_data.py b/orangecontrib/single_cell/widgets/load_data.py index 12e61ce..6018382 100644 --- a/orangecontrib/single_cell/widgets/load_data.py +++ b/orangecontrib/single_cell/widgets/load_data.py @@ -18,6 +18,7 @@ guess_data_type, sanitize_variable ) from Orange.widgets.utils.filedialogs import RecentPath +from openpyxl.reader.excel import load_workbook def separator_from_filename(file_name): @@ -510,10 +511,9 @@ def _set_enable_annotations(self): def _set_file_parameters(self): try: - with open_compressed(self._file_name, "rb") as f: - self.n_rows, self.n_cols, non_zero_el = scipy.io.mminfo(f)[:3] - all_el = self.n_rows * self.n_cols - self.sparsity = (all_el - non_zero_el) / all_el + self.n_rows, self.n_cols, non_zero_el = scipy.io.mminfo(self._file_name)[:3] + all_el = self.n_rows * self.n_cols + self.sparsity = (all_el - non_zero_el) / all_el except OSError: pass except ValueError: @@ -616,9 +616,16 @@ def __init__(self, file_name): def _set_file_parameters(self): try: - sheet = xlrd.open_workbook(self._file_name).sheet_by_index(0) - self.n_cols = sheet.ncols - self.n_rows = sheet.nrows + if self._file_name.endswith(".xls"): + # xlrd support only historic xls files + sheet = xlrd.open_workbook(self._file_name).sheet_by_index(0) + self.n_cols, self.n_rows = sheet.ncols, sheet.nrows + elif self._file_name.endswith(".xlsx"): + # use openpyxl library for xlsx files + wb = load_workbook(self._file_name, read_only=True) + sheet = wb.worksheets[0] + self.n_cols, self.n_rows = sheet.max_column, sheet.max_row + wb.close() self._set_sparsity() except Exception: pass diff --git a/setup.py b/setup.py index 84706b4..2acc13a 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,8 @@ 'fastdtw==0.3.2', 'pandas>=0.23', 'loompy>=2.0.10', - 'xlrd~=1.2.0', + 'xlrd>=2.0.1', + 'openpyxl', 'anndata>=0.6.21', 'numpy', 'scikit-learn', diff --git a/tox.ini b/tox.ini index dfc88e5..2a29f16 100644 --- a/tox.ini +++ b/tox.ini @@ -25,6 +25,7 @@ deps = oldest: orange3==3.34.0 oldest: orange-canvas-core==0.1.28 oldest: orange-widget-base==4.19.0 + oldest: pandas==1.4.0 latest: https://github.com/biolab/orange3/archive/refs/heads/master.zip#egg=orange3 latest: https://github.com/biolab/orange-canvas-core/archive/refs/heads/master.zip#egg=orange-canvas-core latest: https://github.com/biolab/orange-widget-base/archive/refs/heads/master.zip#egg=orange-widget-base