Skip to content

Commit

Permalink
Finish DatasetResource
Browse files Browse the repository at this point in the history
  • Loading branch information
koenvo committed Sep 17, 2024
1 parent 17affec commit a48b1fe
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 15 deletions.
9 changes: 5 additions & 4 deletions ingestify/application/dataset_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
Dataset,
DatasetCollection,
DatasetRepository,
DatasetResource,
DraftFile,
File,
LoadedFile,
Expand Down Expand Up @@ -205,14 +206,14 @@ def add_revision(
def update_dataset(
self,
dataset: Dataset,
dataset_identifier: Identifier,
dataset_resource: DatasetResource,
files: Dict[str, DraftFile],
):
"""The add_revision will also save the dataset."""
metadata_changed = False
# if dataset.update_from_identifier(dataset_identifier):
# self.dataset_repository.save(bucket=self.bucket, dataset=dataset)
# metadata_changed = True
if dataset.update_from_resource(dataset_resource):
self.dataset_repository.save(bucket=self.bucket, dataset=dataset)
metadata_changed = True

self.add_revision(dataset, files)

Expand Down
7 changes: 4 additions & 3 deletions ingestify/application/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
logger = logging.getLogger(__name__)


DEFAULT_CHUNK_SIZE = 100
DEFAULT_CHUNK_SIZE = 1000


def to_batches(input_):
Expand All @@ -52,7 +52,8 @@ def load_file(
file_resource.file_id
)

if file_resource.json_content:
if file_resource.json_content is not None:
# Empty dictionary is allowed
return DraftFile.from_input(
file_=json.dumps(file_resource.json_content, indent=4),
data_serialization_format="json",
Expand Down Expand Up @@ -93,7 +94,7 @@ def __init__(
def run(self):
self.store.update_dataset(
dataset=self.dataset,
dataset_identifier=Identifier(self.dataset_resource.dataset_resource_id),
dataset_resource=self.dataset_resource,
files={
file_id: load_file(file_resource, dataset=self.dataset)
for file_id, file_resource in self.dataset_resource.files.items()
Expand Down
14 changes: 7 additions & 7 deletions ingestify/domain/models/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,18 @@ def add_revision(self, revision: Revision):
self.revisions.append(revision)
self.updated_at = utcnow()

def update_from_identifier(self, dataset_identifier: Identifier) -> bool:
def update_from_resource(self, dataset_resource) -> bool:
changed = False
if self.name != dataset_identifier.name:
self.name = dataset_identifier.name
if self.name != dataset_resource.name:
self.name = dataset_resource.name
changed = True

if self.metadata != dataset_identifier.metadata:
self.metadata = dataset_identifier.metadata
if self.metadata != dataset_resource.metadata:
self.metadata = dataset_resource.metadata
changed = True

if self.state != dataset_identifier.state:
self.state = dataset_identifier.state
if self.state != dataset_resource.state:
self.state = dataset_resource.state
changed = True

if changed:
Expand Down
11 changes: 10 additions & 1 deletion ingestify/domain/models/resources/dataset_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from datetime import datetime
from typing import Optional, Callable, TYPE_CHECKING

from ingestify.exceptions import DuplicateFile

if TYPE_CHECKING:
from ingestify.domain import DraftFile, File
from ingestify.domain.models.dataset.dataset import DatasetState
Expand All @@ -28,6 +30,12 @@ class FileResource:
Callable[["FileResource", Optional["File"]], Optional["DraftFile"]]
] = None

def __post_init__(self):
if self.json_content is None and not self.url and not self.file_loader:
raise TypeError(
"You need to specify `json_content`, `url` or a custom `file_loader`"
)


class DatasetResource:
def __init__(
Expand Down Expand Up @@ -55,7 +63,8 @@ def add_file(
self,
last_modified: datetime,
data_feed_key: str,
data_spec_version: str,
# Some sources might not have a DataSpecVersion. Set a default
data_spec_version: str = "v1",
json_content: Optional[dict] = None,
url: Optional[str] = None,
http_options: Optional[dict] = None,
Expand Down

0 comments on commit a48b1fe

Please sign in to comment.