From 974f9c5ff79ac1ff2fa990cf77562b1ff146e31b Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 2 Feb 2024 10:24:16 +0800 Subject: [PATCH] versioneer fix --- README.md | 31 +++++++++++++++++++++++++++++-- cu_cat/__init__.py | 5 +++-- cu_cat/_version.py | 4 ++-- docs/source/conf.py | 2 +- docs/source/cu_cat.rst | 2 +- docs/source/versioneer.rst | 2 +- setup.py | 7 +++---- 7 files changed, 40 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 24b1f7a1e..52fc9248a 100644 --- a/README.md +++ b/README.md @@ -42,8 +42,35 @@ However, this graph does not mean to imply the trend goes on forever, as current GPU = colab T4 + 15gb mem and colab CPU + 12gb memory - -## Startup Code: +## Startup Code demonstrating speedup: + + ! pip install cu-cat dirty-cat + from time import time + from cu_cat._table_vectorizer import TableVectorizer as cu_TableVectorizer + from dirty_cat._table_vectorizer import TableVectorizer as dirty_TableVectorizer + from sklearn.datasets import fetch_20newsgroups + n_samples = 2000 # speed boost improves as n_samples increases, to the limit of gpu mem + + news, _ = fetch_20newsgroups( + shuffle=True, + random_state=1, + remove=("headers", "footers", "quotes"), + return_X_y=True, + ) + + news = news[:n_samples] + news=pd.DataFrame(news) + table_vec = cu_TableVectorizer() + t = time() + aa = table_vec.fit_transform((news)) + ct = time() - t + # if deps.dirty_cat: + t = time() + bb = dirty_TableVectorizer().fit_transform(news) + dt = time() - t + print(f"cu_cat: {ct:.2f}s, dirty_cat: {dt:.2f}s, speedup: {dt/ct:.2f}x") + >>> cu_cat: 58.76s, dirty_cat: 84.54s, speedup: 1.44x +## Enhanced Code using Graphistry: # !pip install graphistry[ai] ## future releases will have this by default !pip install git+https://github.com/graphistry/pygraphistry.git@dev/depman_gpufeat diff --git a/cu_cat/__init__.py b/cu_cat/__init__.py index cc48ad80c..0fe7a0c14 100644 --- a/cu_cat/__init__.py +++ b/cu_cat/__init__.py @@ -20,9 +20,10 @@ from ._gap_encoder import GapEncoder # type: ignore from ._table_vectorizer import SuperVectorizer, TableVectorizer -with open(_Path(__file__).parent / "VERSION.txt") as _fh: - __version__ = _fh.read().strip() +from ._version import get_versions +__version__ = get_versions()["version"] +del get_versions __all__ = [ "DatetimeEncoder", diff --git a/cu_cat/_version.py b/cu_cat/_version.py index b17a7b1f3..33f0c6a08 100644 --- a/cu_cat/_version.py +++ b/cu_cat/_version.py @@ -43,8 +43,8 @@ def get_config(): cfg.VCS = "git" cfg.style = "pep440" cfg.tag_prefix = "" - cfg.parentdir_prefix = "graphistry-" - cfg.versionfile_source = "graphistry/_version.py" + cfg.parentdir_prefix = "cu_cat-" + cfg.versionfile_source = "cu_cat/_version.py" cfg.verbose = False return cfg diff --git a/docs/source/conf.py b/docs/source/conf.py index 8a2c87380..7874ad331 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -21,7 +21,7 @@ # -- Project information ----------------------------------------------------- project = "CU_CAT" -copyright = "2023, Graphistry, Inc." +copyright = "2024, Graphistry, Inc." author = "Graphistry, Inc." # The full version, including alpha/beta/rc tags diff --git a/docs/source/cu_cat.rst b/docs/source/cu_cat.rst index 2a3f2ffd9..2bba5d6d4 100644 --- a/docs/source/cu_cat.rst +++ b/docs/source/cu_cat.rst @@ -25,7 +25,7 @@ Table_Vectorizer Versioneer ================== -.. automodule:: graphistry._version +.. automodule:: cu_cat._version :members: :undoc-members: :show-inheritance: diff --git a/docs/source/versioneer.rst b/docs/source/versioneer.rst index 1f5d4bae4..51dd74798 100644 --- a/docs/source/versioneer.rst +++ b/docs/source/versioneer.rst @@ -3,4 +3,4 @@ .. toctree:: :maxdepth: 2 - graphistry.plugins_types + cu_cat.plugins_types diff --git a/setup.py b/setup.py index 38b21ee1a..d2ea36112 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages import versioneer -# # #github + def unique_flatten_dict(d): return list(set(sum( d.values(), [] ))) @@ -16,9 +16,8 @@ def unique_flatten_dict(d): 'flake8>=5.0', 'psutil', 'build', - 'versioneer', - 'dirty-cat', -# 'cuml', ## cannot test on github actions + 'dirty-cat', # only for pytest speed comparison +# 'cuml', # cannot test on github actions # 'cudf', # 'cupy' ]