Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor/cleanup additional deadcode #2770

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9,727 changes: 0 additions & 9,727 deletions output.txt

This file was deleted.

4 changes: 2 additions & 2 deletions plugins/ibis/plugin_test/test_sanity.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from superduper.base.leaf import Leaf
from superduper.base.base import Base


class New(Leaf):
class New(Base):
a: str


Expand Down
4 changes: 2 additions & 2 deletions plugins/ibis/superduper_ibis/field_types.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import typing as t

from ibis.expr.datatypes import DataType, dtype as _dtype
from superduper.base.leaf import Leaf
from superduper.base.base import Base


class FieldType(Leaf):
class FieldType(Base):
"""Field type to represent the type of a field in a table.

This is a wrapper around ibis.expr.datatypes.DataType to make it
Expand Down
9 changes: 2 additions & 7 deletions plugins/openai/superduper_openai/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import typing as t

from functools import lru_cache as cache
import numpy
import requests
import tqdm
Expand All @@ -18,8 +19,7 @@
from superduper.backends.query_dataset import QueryDataset
from superduper.base import exceptions
from superduper.base.datalayer import Datalayer
from superduper.components.model import APIBaseModel, Inputs
from superduper.misc.compat import cache
from superduper.components.model import APIBaseModel
from superduper.misc.retry import Retry, safe_retry

retry = Retry(
Expand Down Expand Up @@ -118,11 +118,6 @@ class OpenAIEmbedding(_OpenAI):
signature: str = 'singleton'
batch_size: int = 100

@property
def inputs(self):
"""The inputs of the model."""
return Inputs(['input'])

@retry
def predict(self, X: str):
"""Generates embeddings from text.
Expand Down
3 changes: 0 additions & 3 deletions superduper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@


from .base.document import Document
from .base.leaf import imported, imported_value
from .components.application import Application
from .components.component import Component
from .components.dataset import Dataset
Expand Down Expand Up @@ -69,7 +68,5 @@
'Component',
'pickle_serializer',
'dill_serializer',
'imported',
'imported_value',
'Streamlit',
)
13 changes: 13 additions & 0 deletions superduper/backends/base/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,16 @@ def db(self, value):
self.compute.db = value
self.cdc.db = value

def load_custom_plugins(self):
"""Load user plugins."""
from superduper import logging

if 'Plugin' in self.db.show('Table'):
logging.info(f"Found custom plugins - loading...")
for plugin in self.db.show('Plugin'):
logging.info(f"Loading plugin: {plugin}")
plugin = self.db.load('Plugin', plugin)

def initialize(self, with_compute: bool = False):
"""Initialize the cluster.

Expand All @@ -88,6 +98,9 @@ def initialize(self, with_compute: bool = False):

start = time.time()
assert self.db

self.load_custom_plugins()

if with_compute:
self.compute.initialize()

Expand Down
94 changes: 56 additions & 38 deletions superduper/backends/base/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
import typing as t
import uuid

from superduper import logging
from superduper.base.exceptions import DatabackendError
from superduper.base.leaf import Leaf
from superduper.base.base import Base
from superduper.components.cdc import CDC
from superduper.components.schema import Schema
from superduper.components.table import Table
from superduper.misc.importing import import_object


class NonExistentMetadataError(Exception):
Expand All @@ -25,7 +27,7 @@ class UniqueConstraintError(Exception):


# TODO merge with Event/Job
class Job(Leaf):
class Job(Base):
"""Job table.

#noqa
Expand All @@ -43,7 +45,7 @@ class Job(Leaf):
dependencies: t.List[str] = dc.field(default_factory=list)


class ParentChildAssociations(Leaf):
class ParentChildAssociations(Base):
"""Parent-child associations table.

:param parent_component: parent component type
Expand All @@ -58,7 +60,7 @@ class ParentChildAssociations(Leaf):
child_uuid: str


class ArtifactRelations(Leaf):
class ArtifactRelations(Base):
"""Artifact relations table.

:param component_id: UUID of component version
Expand Down Expand Up @@ -89,32 +91,28 @@ def __init__(self, db):

self.preset_components = {
('Table', 'Table'): Table(
'Table',
cls=Table,
identifier='Table',
primary_id='uuid',
uuid='abc',
component=True,
path='superduper.components.table.Table',
).encode(),
('Table', 'ParentChildAssociations'): Table(
'ParentChildAssociations',
cls=ParentChildAssociations,
identifier='ParentChildAssociations',
primary_id='uuid',
uuid='def',
component=True,
path='superduper.backends.base.metadata.ParentChildAssociations',
).encode(),
('Table', 'ArtifactRelations'): Table(
'ArtifactRelations',
cls=ArtifactRelations,
identifier='ArtifactRelations',
primary_id='uuid',
uuid='ghi',
component=True,
path='superduper.backends.base.metadata.ArtifactRelations',
).encode(),
('Table', 'Job'): Table(
'Job',
cls=Job,
identifier='Job',
primary_id='uuid',
uuid='jkl',
component=True,
Expand Down Expand Up @@ -142,8 +140,8 @@ def get_schema(self, table: str):
r = self.db['Table'].get(identifier=table)
try:
r = r.unpack()
if r['cls'] is not None:
return r['cls'].class_schema
if r['path'] is not None:
return import_object(r['path']).class_schema
return Schema.build(r['fields'])
except AttributeError as e:
if 'unpack' in str(e) and 'NoneType' in str(e):
Expand All @@ -154,7 +152,7 @@ def get_schema(self, table: str):
raise NonExistentMetadataError(f'{table} does not exist in metadata')
raise e

def create(self, cls: t.Type[Leaf]):
def create(self, cls: t.Type[Base]):
"""
Create a table in the metadata store.

Expand All @@ -165,9 +163,9 @@ def create(self, cls: t.Type[Leaf]):
except DatabackendError as e:
if 'not found' in str(e):
self.db.databackend.create_table_and_schema('Table', Table.class_schema)
t = Table('Table', cls=Table, primary_id='uuid', component=True)
t = Table('Table', path='superduper.components.table.Table', primary_id='uuid', component=True)
r = self.db['Table'].insert(
[t.dict(schema=True, path=False)], auto_schema=False
[t.dict(schema=True, path=False)],
)
else:
raise e
Expand All @@ -178,8 +176,9 @@ def create(self, cls: t.Type[Leaf]):
)

self.db.databackend.create_table_and_schema(cls.__name__, cls.class_schema)
t = Table(cls.__name__, cls=cls, primary_id='uuid', component=True)
self.db['Table'].insert([t.dict(schema=True, path=False)], auto_schema=False)
t = Table(identifier=cls.__name__, path=f'{cls.__module__}.{cls.__name__}', primary_id='uuid', component=True)
self.db['Table'].insert([t.dict(path=False)])
return t

def delete_parent_child_relationships(self, parent_uuid: str):
"""
Expand Down Expand Up @@ -333,6 +332,18 @@ def create_job(self, info: t.Dict):
"""
self.create_entry(info, 'Job', raw=False)

def show_jobs(self, component: str, identifier: str):
"""
Show all jobs in the metadata store.

:param component: type of component
:param identifier: identifier of component
"""
return self.db['Job'].filter(
self.db['Job']['component'] == component,
self.db['Job']['identifier'] == identifier,
).distinct('job_id')

def show_components(self, component: str | None = None):
"""
Show all components in the metadata store.
Expand All @@ -345,30 +356,36 @@ def show_components(self, component: str | None = None):
for component in t.filter(t['component'] == True).distinct( # noqa: E712
'identifier'
):
if component in metaclasses:
if component in metaclasses.keys():
continue
out.extend(
[
{'component': component, 'identifier': x}
for x in self.db[component].distinct('identifier')
]
)

try:
out.extend(
[
{'component': component, 'identifier': x}
for x in self.db[component].distinct('identifier')
]
)
except ModuleNotFoundError as e:
logging.error(f'Component type not found: {component}; ', e)
out.extend(
[
{'component': 'Table', 'identifier': x}
for x in self.db['Table'].distinct('identifier')
]
)
return out
return self.db[component].distinct('identifier')

def get_classes(self):
"""Get all classes in the metadata store."""
data = self['Metadata'].execute()
return [r['cls'] for r in data]

def show_cdc_tables(self):
"""List the tables used for CDC."""
cdc_classes = []
for r in self.db['Table'].execute():
if r['cls'] is None:
if r['path'] is None:
continue
cls = import_object(r['path'])
r = r.unpack()
if issubclass(r['cls'], CDC):
if issubclass(cls, CDC):
cdc_classes.append(r)

cdc_tables = []
Expand All @@ -384,9 +401,10 @@ def show_cdcs(self, table):
"""
cdc_classes = []
for r in self.db['Table'].execute():
if r['cls'] is None:
if r['path'] is None:
continue
if issubclass(r['cls'], CDC):
cls = import_object(r['path'])
if issubclass(cls, CDC):
cdc_classes.append(r)

cdcs = []
Expand Down Expand Up @@ -474,9 +492,8 @@ def get_component_by_uuid(self, component: str, uuid: str):
if uuid in self.preset_uuids:
return self.preset_uuids[uuid]
r = self.db[component].get(uuid=uuid, raw=True)
cls = self.db['Table'].get(identifier=component)['cls']
_path = cls.__module__ + '.' + cls.__name__
r['_path'] = _path
path = self.db['Table'].get(identifier=component)['path']
r['_path'] = path
return r

def get_component(
Expand All @@ -497,6 +514,7 @@ def get_component(
if (component, identifier) in self.preset_components:
return self.preset_components[(component, identifier)]

# TODO find a more efficient way to do this.
if version is None:
version = self.get_latest_version(
component=component,
Expand Down
Loading
Loading