From fbd7a6da0a3292b4da47540443c460c8b255c11b Mon Sep 17 00:00:00 2001 From: Yao-Yuan Mao Date: Tue, 26 Oct 2021 13:59:03 -0400 Subject: [PATCH 1/2] add where and several QueryMaker; v0.2.0 --- easyquery.py | 67 +++++++++++++++++++++++++++++++++++++++++++++++----- test_main.py | 40 +++++++++++++++++++++---------- 2 files changed, 89 insertions(+), 18 deletions(-) diff --git a/easyquery.py b/easyquery.py index 515b296..552258c 100644 --- a/easyquery.py +++ b/easyquery.py @@ -3,7 +3,7 @@ NumPy structured arrays, astropy Table, and Pandas DataFrame. Project website: https://github.com/yymao/easyquery The MIT License (MIT) -Copyright (c) 2017-2020 Yao-Yuan Mao (yymao) +Copyright (c) 2017-2021 Yao-Yuan Mao (yymao) http://opensource.org/licenses/MIT """ @@ -20,7 +20,7 @@ __all__ = ['Query', 'QueryMaker'] -__version__ = '0.1.6' +__version__ = '0.2.0' def _is_string_like(obj): @@ -42,7 +42,8 @@ class Query(object): All of them operate on NumPy structured array and astropy Table: - `filter` returns a new table that only has entries satisfying the query; - `count` returns the number of entries satisfying the query; - - `mask` returns a bool array for masking the table. + - `mask` returns a bool array for masking the table; + - `where` returns a int array for the indices that select satisfying entries. For most simple cases a Query object can be created with a numexpr string. A Query object can also be created with a tuple, where the first element of @@ -69,6 +70,8 @@ class Query(object): 1 >>> q.mask(t) array([False, False, False, True], dtype=bool) + >>> q.where(t) + array([3], dtype=int64) >>> q2 = (~q & Query('b > c')) >>> q2.count(t) @@ -216,7 +219,7 @@ def mask(self, table): """ if self._operator is None: if self._operands is None: - return np.ones(self._get_table_len(table), dtype=np.bool) + return np.ones(self._get_table_len(table), dtype=bool) else: return self._create_mask(table, self._operands) @@ -243,7 +246,7 @@ def filter(self, table, column_slice=None): If `column_slice` is provided, also select on columns. Equivalent to table[Query(...).mask(table)][column_slice] - but with more efficient implementaion. + but with more efficient implementation. Parameters ---------- @@ -289,6 +292,24 @@ def count(self, table): return np.count_nonzero(self.mask(table)) + def where(self, table): + """ + Return the indices of the rows in `table` that satisfy input queries. + Equivalent to calling `np.flatnonzero(Query(...).mask(table)`. + + Parameters + ---------- + table : NumPy structured array, astropy Table, etc. + + Returns + ------- + indices : numpy int array + """ + if self._operator is None and self._operands is None: + return np.arange(self._get_table_len(table)) + + return np.flatnonzero(self.mask(table)) + def copy(self): """ Create a copy of the current Query object. @@ -405,6 +426,24 @@ def mask(table, *queries): return _query_class(*queries).mask(table) +def where(table, *queries): + """ + A convenient function to get the indices of the rows in `table` that + satisfy input `queries`. + Equivalent to `Query(*queries).where(table)` + + Parameters + ---------- + table : NumPy structured array, astropy Table, etc. + queries : string, tuple, callable + + Returns + ------- + indices : numpy int array + """ + return _query_class(*queries).where(table) + + class QueryMaker(): """ provides convenience functions to generate query objects @@ -419,7 +458,7 @@ def isin(col_name, test_elements, assume_unique=False, invert=False): @staticmethod def vectorize(row_function, *col_names): - return _query_class((lambda *args: np.fromiter(map(row_function, *args), np.bool),) + tuple(col_names)) + return _query_class((lambda *args: np.fromiter(map(row_function, *args), bool),) + tuple(col_names)) @staticmethod def contains(col_name, test_value): @@ -456,3 +495,19 @@ def startswith(col_name, prefix, start=0, end=None): @staticmethod def endswith(col_name, suffix, start=0, end=None): return _query_class((functools.partial(np.char.endswith, suffix=suffix, start=start, end=end), col_name)) + + @staticmethod + def isfinite(col_name): + return QueryMaker.vectorize(np.isfinite, col_name) + + @staticmethod + def isnan(col_name): + return QueryMaker.vectorize(np.isnan, col_name) + + @staticmethod + def isnotnan(col_name): + return ~QueryMaker.isnan(col_name) + + @staticmethod + def isclose(col1_name, col2_name): + return QueryMaker.vectorize(np.isclose, col1_name, col2_name) diff --git a/test_main.py b/test_main.py index 3a40707..5e6e0d4 100644 --- a/test_main.py +++ b/test_main.py @@ -6,19 +6,19 @@ def test_valid_init(): """ test valid Query object creation """ - q1 = Query() - q2 = Query(None) + q1 = Query() # noqa: F841 + q2 = Query(None) # noqa: F841 q3 = Query('x > 2') - q4 = Query(lambda t: t['x'] > 2) - q5 = Query((lambda c: c > 2, 'x')) - q6 = Query('x > 2', lambda t: t['x'] > 2, (lambda c: c > 2, 'x')) - q7 = Query(q3) - q8 = Query(q3, 'x > 2') + q4 = Query(lambda t: t['x'] > 2) # noqa: F841 + q5 = Query((lambda c: c > 2, 'x')) # noqa: F841 + q6 = Query('x > 2', lambda t: t['x'] > 2, (lambda c: c > 2, 'x')) # noqa: F841 + q7 = Query(q3) # noqa: F841 + q8 = Query(q3, 'x > 2') # noqa: F841 def check_invalid_init(*queries): try: - q = Query(*queries) + q = Query(*queries) # noqa: F841 except ValueError: pass else: @@ -34,22 +34,31 @@ def test_invalid_init(): def gen_test_table(): - return np.array([(1, 5, 4.5, "abcd"), (1, 1, 6.2, "pqrs"), (3, 2, 0.5, "asdf"), (5, 5, -3.5, "wxyz")], - dtype=np.dtype([('a', ' -1) check_query_on_table(t, QueryMaker.find("s", "a"), np.char.find(t["s"], "a") > -1) + check_query_on_table(t, QueryMaker.isfinite("c"), np.isfinite(t["c"])) + check_query_on_table(t, QueryMaker.isnan("c"), np.isnan(t["c"])) + check_query_on_table(t, QueryMaker.isnotnan("c"), ~np.isnan(t["c"])) + check_query_on_table(t, QueryMaker.isclose("a", "b"), np.isclose(t["a"], t["b"])) + assert QueryMaker.equal_columns("s", "s").mask(t).all() + if __name__ == '__main__': test_valid_init() test_invalid_init() From 57aedc500961f8f90b92f0c5b0d9f017447455c6 Mon Sep 17 00:00:00 2001 From: Yao-Yuan Mao Date: Tue, 26 Oct 2021 13:59:14 -0400 Subject: [PATCH 2/2] update CI test --- .github/workflows/pythonpackage.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index d1f27f3..9d0728a 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -1,6 +1,10 @@ name: Python package -on: [push] +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] jobs: build: @@ -9,7 +13,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: [2.7, 3.6, 3.7, 3.8] + python-version: ["2.7", "3.6", "3.7", "3.8", "3.9"] steps: - uses: actions/checkout@v2