Skip to content

Commit

Permalink
Properly batch items to be removed (#188)
Browse files Browse the repository at this point in the history
  • Loading branch information
mruwnik committed Sep 14, 2023
1 parent 16e4c84 commit 21d7887
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
1 change: 0 additions & 1 deletion align_data/db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
Enum,
ForeignKey,
String,
Boolean,
Text,
func,
event,
Expand Down
8 changes: 5 additions & 3 deletions align_data/embeddings/pinecone/update_pinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ def save_batch(self, session: Session, batch: List[Any]):
session.rollback()

def batch_entries(self, article_stream: Generator[Article, None, None]) -> Iterator[List[Article]]:
while batch := tuple(islice(article_stream, self.batch_size)):
items = iter(article_stream)
while batch := tuple(islice(items, self.batch_size)):
yield list(batch)


Expand Down Expand Up @@ -102,7 +103,8 @@ def process_batch(self, batch: List[Tuple[Article, PineconeEntry | None]]):
def batch_entries(
self, article_stream: Generator[Article, None, None]
) -> Iterator[List[Tuple[Article, PineconeEntry | None]]]:
while batch := tuple(islice(article_stream, self.batch_size)):
items = iter(article_stream)
while batch := tuple(islice(items, self.batch_size)):
yield [(article, self._make_pinecone_entry(article)) for article in batch]

def _make_pinecone_entry(self, article: Article) -> PineconeEntry | None:
Expand Down Expand Up @@ -163,7 +165,7 @@ def _articles_by_id(self, session, ids: List[str], _force_update: bool):

def process_batch(self, batch: List[Article]):
self.pinecone_db.delete_entries([a.id for a in batch])
logger.info('removing batch %s', len(batch))
logger.info('removing batch of %s items', len(batch))
for article in batch:
article.pinecone_status = PineconeStatus.absent
return batch
Expand Down

0 comments on commit 21d7887

Please sign in to comment.