From e9aea826c7263b373e7c64cb3f6361a562f0e53a Mon Sep 17 00:00:00 2001 From: Albert DeFusco Date: Mon, 8 Feb 2021 15:01:50 -0500 Subject: [PATCH 1/2] catalog support fix datetime parsing --- intake_metabase/__init__.py | 3 ++ intake_metabase/source.py | 89 ++++++++++++++++++++++++++++++++++--- setup.py | 5 ++- 3 files changed, 90 insertions(+), 7 deletions(-) diff --git a/intake_metabase/__init__.py b/intake_metabase/__init__.py index e44e9d1..a0377e1 100644 --- a/intake_metabase/__init__.py +++ b/intake_metabase/__init__.py @@ -1,3 +1,6 @@ from ._version import get_versions __version__ = get_versions()['version'] del get_versions + +import intake +from .source import MetabaseCatalog, MetabaseTableSource diff --git a/intake_metabase/source.py b/intake_metabase/source.py index fb4365f..5aad4e2 100644 --- a/intake_metabase/source.py +++ b/intake_metabase/source.py @@ -3,12 +3,55 @@ from urllib.parse import urljoin import requests +from intake.catalog import Catalog +from intake.catalog.local import LocalCatalogEntry from intake.source.base import DataSource, Schema +from . import __version__ -class MetabaseDatasetSource(DataSource): + +class MetabaseCatalog(Catalog): + name = 'metabase_catalog' + version = __version__ + # partition_access = False + + def __init__(self, domain, username, password, metadata=None): + self.domain = domain + self.username = username + self.password = password + + self._metabase = MetabaseAPI(self.domain, self.username, self.password) + + super().__init__(name='metabase', metadata=metadata) + + def _load(self): + databases = self._metabase.get_databases() + + self._entries = {} + for db in databases: + for table in db['tables']: + e = LocalCatalogEntry( + name=table['name'], + description=table['description'], + driver=MetabaseTableSource, + catalog=self, + args={ + 'domain': self.domain, + 'username': self.username, + 'password': self.password, + 'database': db['id'], + 'table': table['id'] + } + ) + e._plugin = [MetabaseTableSource] + # self._entries[db['name']][table['name']] = e + self._entries[table['name']] = e + + +class MetabaseTableSource(DataSource): + name = 'metabase_table' container = 'dataframe' - version = '0.1.0' + version = __version__ partition_access = True def __init__(self, domain, username, password, database, table, *kwargs, metadata=None): @@ -21,11 +64,11 @@ def __init__(self, domain, username, password, database, table, *kwargs, metadat self._metabase = MetabaseAPI(self.domain, self.username, self.password) - super(MetabaseDatasetSource, self).__init__(metadata=metadata) + super(MetabaseTableSource, self).__init__(metadata=metadata) def _get_schema(self): if self._df is None: - self._df = self._metabase.get_data(self.database, self.table) + self._df = self._metabase.get_table(self.database, self.table) return Schema(datashape=None, dtype=self._df, @@ -73,12 +116,46 @@ def _create_or_refresh_token(self): self._token = res.json()['id'] self._token_expiration = datetime.now() + timedelta(days=10) - def get_data(self, database, table): + def get_databases(self): + self._create_or_refresh_token() + + headers = { + 'X-Metabase-Session': self._token + } + params = {'include': 'tables'} + + res = requests.get( + urljoin(self.domain, '/api/database'), + headers=headers, params=params + ) + + return res.json() + + def get_metadata(self, table): + self._create_or_refresh_token() + + headers = { + 'X-Metabase-Session': self._token + } + + res = requests.get( + urljoin(self.domain, f'/api/table/{table}/query_metadata'), + headers=headers + ) + + return res.json() + + def get_table(self, database, table): from io import StringIO + import pandas as pd self._create_or_refresh_token() + table_metadata = self.get_metadata(table) + date_fields = [f['display_name'] for f in table_metadata['fields'] + if 'date' in f['base_type'].lower()] + body = { "database": database, "query": {"source-table": table}, @@ -103,4 +180,4 @@ def get_data(self, database, table): res.raise_for_status() csv = res.text - return pd.read_csv(StringIO(csv), parse_dates=True, infer_datetime_format=True) + return pd.read_csv(StringIO(csv), parse_dates=date_fields, infer_datetime_format=True) diff --git a/setup.py b/setup.py index 902a40c..449534f 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,10 @@ url='https://github.com/ContinuumIO/intake-metabase', packages=['intake_metabase'], entry_points={ - 'intake.drivers': ['metabase_table = intake_metabase.source:MetabaseDatasetSource'] + 'intake.drivers': [ + 'metabase_catalog = intake_metabase.source:MetabaseCatalog', + 'metabase_table = intake_metabase.source:MetabaseTableSource', + ] }, install_requires=requirements, keywords='intake-metabase', From 82468359262a6dfd7c2fc1c6cbf5716d2e6229c7 Mon Sep 17 00:00:00 2001 From: Albert DeFusco Date: Mon, 8 Feb 2021 15:05:54 -0500 Subject: [PATCH 2/2] update readme for catalog --- README.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/README.md b/README.md index 981a37c..d051eac 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,35 @@ conda install -c defusco intake-metabase ## Quickstart +To access a catalog of tables in Metabase you will need the following information +* `domain`: The URL where Metabase is running +* `username`: Your username, typically an email address +* `password`: Your password (Google Auth is not yet supported) + +To load the catalog and list the tables + +```python +import intake +catalog = intake.open_metabase_catalog(domain, username, password) +list(catalog) +``` + +This will produce output like + +``` +[table1, table2, table3] +``` + +To load a table as a Pandas DataFrame + +``` +df = catalog..read() +``` + +Replace `
` with the name of the table from the list. + +## Load a single table To load a table as a Pandas DataFrames you will need to know the following information * `domain`: The URL where Metabase is running