Skip to content

Commit

Permalink
Remove calls to logging.basicConfig on import
Browse files Browse the repository at this point in the history
Fixes #196

instructlab.sdg calls logging.basicConfig on import.

Libraries should not configure logging themselves. It's the job of the
application or main script to configure logging and set up log
handlers according to their needs. logging.basicConfig is a one-shot
API. The second and any following call are a no-op, unless it is
called with force=True.

Remove setup_logging and just use logging.getLogger in the code.

Co-authored-by: Christian Heimes <[email protected]>
Signed-off-by: Mark McLoughlin <[email protected]>
  • Loading branch information
markmc and tiran committed Jul 26, 2024
1 parent 76a8624 commit 81a69bb
Show file tree
Hide file tree
Showing 10 changed files with 21 additions and 41 deletions.
6 changes: 2 additions & 4 deletions src/instructlab/sdg/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@
# Standard
from abc import ABC
from typing import Any, Dict, Union
import logging
import os.path

# Third Party
import yaml

# Local
from .logger_config import setup_logger

logger = setup_logger(__name__)
logger = logging.getLogger(__name__)


# This is part of the public API.
Expand Down
4 changes: 2 additions & 2 deletions src/instructlab/sdg/datamixing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Standard
from typing import Optional
import json
import logging
import os.path
import random
import uuid
Expand All @@ -10,11 +11,10 @@
import yaml

# First Party
from instructlab.sdg.logger_config import setup_logger
from instructlab.sdg.utils import GenerateException, pandas

ALLOWED_COLS = ["id", "messages", "metadata"]
logger = setup_logger(__name__)
logger = logging.getLogger(__name__)


def _adjust_train_sample_size(ds: Dataset, num_samples: int):
Expand Down
6 changes: 2 additions & 4 deletions src/instructlab/sdg/eval_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Standard
from importlib import resources
from typing import Any
import logging
import re

# Third Party
Expand All @@ -10,10 +11,7 @@
# First Party
from instructlab.sdg.pipeline import EVAL_PIPELINES_PKG, Pipeline

# Local
from .logger_config import setup_logger

logger = setup_logger(__name__)
logger = logging.getLogger(__name__)


def _extract_options(text: str) -> list[Any]:
Expand Down
4 changes: 2 additions & 2 deletions src/instructlab/sdg/filterblock.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# SPDX-License-Identifier: Apache-2.0
# Standard
import logging
import operator

# Third Party
from datasets import Dataset

# Local
from .block import Block
from .logger_config import setup_logger

logger = setup_logger(__name__)
logger = logging.getLogger(__name__)


# This is part of the public API.
Expand Down
4 changes: 1 addition & 3 deletions src/instructlab/sdg/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,7 @@ def _convert_to_messages(sample):
return sample


def _gen_train_data(
machine_instruction_data, output_file_train, output_file_messages
):
def _gen_train_data(machine_instruction_data, output_file_train, output_file_messages):
"""
Generate training data in the legacy system/user/assistant format
used in train_*.jsonl as well as the legacy messages format used
Expand Down
6 changes: 4 additions & 2 deletions src/instructlab/sdg/importblock.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
# SPDX-License-Identifier: Apache-2.0
# Standard
import logging

# Third Party
from datasets import Dataset

# Local
from .block import Block
from .logger_config import setup_logger

logger = setup_logger(__name__)
logger = logging.getLogger(__name__)


# This is part of the public API.
Expand Down
4 changes: 2 additions & 2 deletions src/instructlab/sdg/llmblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Standard
from collections import ChainMap
from typing import Any, Dict
import logging
import re

# Third Party
Expand All @@ -10,9 +11,8 @@

# Local
from .block import Block
from .logger_config import setup_logger

logger = setup_logger(__name__)
logger = logging.getLogger(__name__)

MODEL_FAMILY_MIXTRAL = "mixtral"
MODEL_FAMILY_MERLINITE = "merlinite"
Expand Down
18 changes: 0 additions & 18 deletions src/instructlab/sdg/logger_config.py

This file was deleted.

4 changes: 2 additions & 2 deletions src/instructlab/sdg/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from dataclasses import dataclass
from importlib import resources
from typing import Iterable, Optional
import logging
import math
import os.path

Expand All @@ -18,9 +19,8 @@
# Local
from . import filterblock, importblock, llmblock, utilblocks
from .block import Block
from .logger_config import setup_logger

logger = setup_logger(__name__)
logger = logging.getLogger(__name__)


# This is part of the public API.
Expand Down
6 changes: 4 additions & 2 deletions src/instructlab/sdg/utilblocks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# Standard
import logging

# Third Party
from datasets import Dataset

Expand All @@ -7,9 +10,8 @@

# Local
from .block import Block
from .logger_config import setup_logger

logger = setup_logger(__name__)
logger = logging.getLogger(__name__)


# This is part of the public API.
Expand Down

0 comments on commit 81a69bb

Please sign in to comment.