Skip to content

Commit

Permalink
Merge pull request #1 from ContinuumIO/catalog
Browse files Browse the repository at this point in the history
Catalogs
  • Loading branch information
AlbertDeFusco authored Feb 8, 2021
2 parents 16d7ab5 + 8246835 commit 0187629
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 7 deletions.
28 changes: 28 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,35 @@ conda install -c defusco intake-metabase


## Quickstart
To access a catalog of tables in Metabase you will need the following information

* `domain`: The URL where Metabase is running
* `username`: Your username, typically an email address
* `password`: Your password (Google Auth is not yet supported)

To load the catalog and list the tables

```python
import intake
catalog = intake.open_metabase_catalog(domain, username, password)
list(catalog)
```

This will produce output like

```
[table1, table2, table3]
```

To load a table as a Pandas DataFrame

```
df = catalog.<table>.read()
```

Replace `<table>` with the name of the table from the list.

## Load a single table
To load a table as a Pandas DataFrames you will need to know the following information

* `domain`: The URL where Metabase is running
Expand Down
3 changes: 3 additions & 0 deletions intake_metabase/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from ._version import get_versions
__version__ = get_versions()['version']
del get_versions

import intake
from .source import MetabaseCatalog, MetabaseTableSource
89 changes: 83 additions & 6 deletions intake_metabase/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,55 @@
from urllib.parse import urljoin

import requests
from intake.catalog import Catalog
from intake.catalog.local import LocalCatalogEntry
from intake.source.base import DataSource, Schema

from . import __version__

class MetabaseDatasetSource(DataSource):

class MetabaseCatalog(Catalog):
name = 'metabase_catalog'
version = __version__
# partition_access = False

def __init__(self, domain, username, password, metadata=None):
self.domain = domain
self.username = username
self.password = password

self._metabase = MetabaseAPI(self.domain, self.username, self.password)

super().__init__(name='metabase', metadata=metadata)

def _load(self):
databases = self._metabase.get_databases()

self._entries = {}
for db in databases:
for table in db['tables']:
e = LocalCatalogEntry(
name=table['name'],
description=table['description'],
driver=MetabaseTableSource,
catalog=self,
args={
'domain': self.domain,
'username': self.username,
'password': self.password,
'database': db['id'],
'table': table['id']
}
)
e._plugin = [MetabaseTableSource]
# self._entries[db['name']][table['name']] = e
self._entries[table['name']] = e


class MetabaseTableSource(DataSource):
name = 'metabase_table'
container = 'dataframe'
version = '0.1.0'
version = __version__
partition_access = True

def __init__(self, domain, username, password, database, table, *kwargs, metadata=None):
Expand All @@ -21,11 +64,11 @@ def __init__(self, domain, username, password, database, table, *kwargs, metadat

self._metabase = MetabaseAPI(self.domain, self.username, self.password)

super(MetabaseDatasetSource, self).__init__(metadata=metadata)
super(MetabaseTableSource, self).__init__(metadata=metadata)

def _get_schema(self):
if self._df is None:
self._df = self._metabase.get_data(self.database, self.table)
self._df = self._metabase.get_table(self.database, self.table)

return Schema(datashape=None,
dtype=self._df,
Expand Down Expand Up @@ -73,12 +116,46 @@ def _create_or_refresh_token(self):
self._token = res.json()['id']
self._token_expiration = datetime.now() + timedelta(days=10)

def get_data(self, database, table):
def get_databases(self):
self._create_or_refresh_token()

headers = {
'X-Metabase-Session': self._token
}
params = {'include': 'tables'}

res = requests.get(
urljoin(self.domain, '/api/database'),
headers=headers, params=params
)

return res.json()

def get_metadata(self, table):
self._create_or_refresh_token()

headers = {
'X-Metabase-Session': self._token
}

res = requests.get(
urljoin(self.domain, f'/api/table/{table}/query_metadata'),
headers=headers
)

return res.json()

def get_table(self, database, table):
from io import StringIO

import pandas as pd

self._create_or_refresh_token()

table_metadata = self.get_metadata(table)
date_fields = [f['display_name'] for f in table_metadata['fields']
if 'date' in f['base_type'].lower()]

body = {
"database": database,
"query": {"source-table": table},
Expand All @@ -103,4 +180,4 @@ def get_data(self, database, table):
res.raise_for_status()
csv = res.text

return pd.read_csv(StringIO(csv), parse_dates=True, infer_datetime_format=True)
return pd.read_csv(StringIO(csv), parse_dates=date_fields, infer_datetime_format=True)
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@
url='https://github.com/ContinuumIO/intake-metabase',
packages=['intake_metabase'],
entry_points={
'intake.drivers': ['metabase_table = intake_metabase.source:MetabaseDatasetSource']
'intake.drivers': [
'metabase_catalog = intake_metabase.source:MetabaseCatalog',
'metabase_table = intake_metabase.source:MetabaseTableSource',
]
},
install_requires=requirements,
keywords='intake-metabase',
Expand Down

0 comments on commit 0187629

Please sign in to comment.