Skip to content

Commit

Permalink
Address mypy issues
Browse files Browse the repository at this point in the history
Signed-off-by: Aakanksha Duggal <[email protected]>
  • Loading branch information
aakankshaduggal committed Nov 13, 2024
1 parent f8f6959 commit 7e67967
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 15 deletions.
26 changes: 15 additions & 11 deletions src/instructlab/sdg/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import logging
import os
import time
import yaml

# Third Party
# instructlab - All of these need to go away (other than sdg) - issue #6
Expand Down Expand Up @@ -220,19 +221,22 @@ def _sdg_init(ctx, pipeline):
data_dirs = [os.path.join(xdg_data_home(), "instructlab", "sdg")]
data_dirs.extend(os.path.join(dir, "instructlab", "sdg") for dir in xdg_data_dirs())

sdg_models_path = docling_models_path = None
sdg_models_path = docling_models_path = None

Check warning on line 224 in src/instructlab/sdg/generate_data.py

View workflow job for this annotation

GitHub Actions / pylint

W0612: Unused variable 'docling_models_path' (unused-variable)
for d in data_dirs:
if os.path.exists(os.path.join(d, "models")):
sdg_models_path = os.path.join(d, "models")
break

if sdg_models_path is not None:
try:
with open(os.path.join(sdg_models_path, "config.yaml"), "r", encoding="utf-8") as file:
config = yaml.safe_load(file)
docling_models_path = config['models'][0]['path']
except (FileNotFoundError, NotADirectoryError, PermissionsError) as e:
log.warning(f"unable to read docling models path from config.yaml")
sdg_models_path = os.path.join(d, "models")
break

if sdg_models_path is not None:
try:
with open(
os.path.join(sdg_models_path, "config.yaml"), "r", encoding="utf-8"
) as file:
config = yaml.safe_load(file)
docling_models_path = config["models"][0]["path"]
except (FileNotFoundError, NotADirectoryError, PermissionsError) as e:

Check failure on line 237 in src/instructlab/sdg/generate_data.py

View workflow job for this annotation

GitHub Actions / pylint

E0602: Undefined variable 'PermissionsError' (undefined-variable)

Check warning on line 237 in src/instructlab/sdg/generate_data.py

View workflow job for this annotation

GitHub Actions / pylint

W0612: Unused variable 'e' (unused-variable)
logger.warning(f"unable to read docling models path from config.yaml")

Check warning on line 238 in src/instructlab/sdg/generate_data.py

View workflow job for this annotation

GitHub Actions / pylint

W1309: Using an f-string that does not have any interpolated variables (f-string-without-interpolation)

for d in data_dirs:
pipeline_path = os.path.join(d, "pipelines", pipeline)
if os.path.exists(pipeline_path):
Expand Down
5 changes: 1 addition & 4 deletions src/instructlab/sdg/utils/chunkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from typing import DefaultDict, Iterable, List, Tuple
import json
import logging
import os
import re

# Third Party
Expand Down Expand Up @@ -217,9 +216,7 @@ def chunk_documents(self) -> List:
return []

if self.docling_model_path is None:
logger.info(
f"Docling models not found on disk, downloading models..."
)
logger.info("Docling models not found on disk, downloading models...")
self.docling_model_path = StandardPdfPipeline.download_models_hf()
else:
logger.info("Found the docling models")
Expand Down

0 comments on commit 7e67967

Please sign in to comment.