Skip to content

Commit

Permalink
Fix flake8 lint issues
Browse files Browse the repository at this point in the history
Signed-off-by: Ignas Baranauskas <[email protected]>
  • Loading branch information
Ygnas committed Aug 22, 2024
1 parent 9fe8957 commit 39e57ff
Show file tree
Hide file tree
Showing 9 changed files with 58 additions and 49 deletions.
3 changes: 2 additions & 1 deletion pkg/metricscollector/v1beta1/common/pns.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ def WaitPIDs(pids, main_pid, pool_interval, timout, wait_all, completed_marked_d
contents = file_obj.read()
if contents.strip() != const.TRAINING_COMPLETED:
raise Exception(
"Unable to find marker: {} in file: {} with contents: {} for pid: {}".format(
"Unable to find marker: {} in file: {} with contents: {} "
"for pid: {}".format(
const.TRAINING_COMPLETED,
mark_file,
contents,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
# limitations under the License.

# TFEventFileParser parses tfevent files and returns an ObservationLog of the metrics specified.
# When the event file is under a directory(e.g. test dir), please specify "{{dirname}}/{{metrics name}}"
# When the event file is under a directory(e.g. test dir), please specify
# "{{dirname}}/{{metrics name}}"
# For example, in the Tensorflow MNIST Classification With Summaries:
# https://github.com/kubeflow/katib/blob/master/examples/v1beta1/trial-images/tf-mnist-with-summaries/mnist.py.
# The "accuracy" and "loss" metric is saved under "train" and "test" directories.
Expand All @@ -28,10 +29,7 @@
import api_pb2
import rfc3339
import tensorflow as tf
from tensorboard.backend.event_processing.event_accumulator import (
EventAccumulator,
TensorEvent,
)
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
from tensorboard.backend.event_processing.tag_types import TENSORS

from pkg.metricscollector.v1beta1.common import const
Expand All @@ -53,7 +51,6 @@ def parse_summary(self, tfefile):
event_accumulator.Reload()
for tag in event_accumulator.Tags()[TENSORS]:
for m in self.metric_names:

tfefile_parent_dir = (
os.path.dirname(m)
if len(m.split("/")) >= 2
Expand Down
14 changes: 8 additions & 6 deletions pkg/suggestion/v1beta1/hyperopt/base_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def __init__(
self.is_first_run = True

def create_hyperopt_domain(self):
# Construct search space, example: {"x": hyperopt.hp.uniform('x', -10, 10), "x2": hyperopt.hp.uniform('x2', -10, 10)}
# Construct search space, example: {"x": hyperopt.hp.uniform('x', -10, 10), "x2":
# hyperopt.hp.uniform('x2', -10, 10)}
hyperopt_search_space = {}
for param in self.search_space.params:
if param.type == INTEGER:
Expand Down Expand Up @@ -108,7 +109,8 @@ def getSuggestions(self, trials, current_request_number):
new_id = self.fmin.trials.new_trial_ids(1)
hyperopt_trial_new_ids.append(new_id[0])
hyperopt_trial_miscs_idxs = {}
# Example: {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]}
# Example: {'l1_normalization': [0.1], 'learning_rate': [0.1],
# 'hidden2': [1], 'optimizer': [1]}
hyperopt_trial_miscs_vals = {}

# Insert Trial assignment to the misc
Expand Down Expand Up @@ -147,7 +149,8 @@ def getSuggestions(self, trials, current_request_number):
# TODO: Do we need to analyse additional_metrics?
objective_for_hyperopt = float(trial.target_metric.value)
if self.search_space.goal == MAX_GOAL:
# Now hyperopt only supports fmin and we need to reverse objective value for maximization
# Now hyperopt only supports fmin and we need to reverse
# objective value for maximization
objective_for_hyperopt = -1 * objective_for_hyperopt
hyperopt_trial_result = {
"loss": objective_for_hyperopt,
Expand All @@ -156,7 +159,6 @@ def getSuggestions(self, trials, current_request_number):
hyperopt_trial_results.append(hyperopt_trial_result)

if len(trials) > 0:

# Create new Trial doc
hyperopt_trials = hyperopt.Trials().new_trial_docs(
tids=hyperopt_trial_new_ids,
Expand Down Expand Up @@ -226,7 +228,7 @@ def getSuggestions(self, trials, current_request_number):
trials=self.fmin.trials,
seed=random_state,
n_startup_jobs=current_request_number,
**self.algorithm_conf
**self.algorithm_conf,
)
self.is_first_run = False
else:
Expand All @@ -239,7 +241,7 @@ def getSuggestions(self, trials, current_request_number):
trials=self.fmin.trials,
seed=random_state,
n_startup_jobs=current_request_number,
**self.algorithm_conf
**self.algorithm_conf,
)[0]
)

Expand Down
5 changes: 2 additions & 3 deletions pkg/suggestion/v1beta1/internal/search_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,8 @@ def convert_to_combinations(search_space):
elif parameter.type == DOUBLE:
if parameter.step == "" or parameter.step is None:
raise Exception(
"Param {} step is nil; For discrete search space, all parameters must include step".format(
parameter.name
)
"Param {} step is nil; For discrete search space, all parameters "
"must include step".format(parameter.name)
)
double_list = np.arange(
float(parameter.min),
Expand Down
13 changes: 8 additions & 5 deletions pkg/suggestion/v1beta1/internal/trial.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,14 @@ def __str__(self):
", ".join([str(e) for e in self.assignments])
)
else:
return "Trial(assignment: {}, metric_name: {}, metric: {}, additional_metrics: {})".format(
", ".join([str(e) for e in self.assignments]),
self.metric_name,
self.target_metric,
", ".join(str(e) for e in self.additional_metrics),
return (
"Trial(assignment: {}, metric_name: {}, metric: {}, "
"additional_metrics: {})".format(
", ".join(str(e) for e in self.assignments),
self.metric_name,
self.target_metric,
", ".join(str(e) for e in self.additional_metrics),
)
)


Expand Down
15 changes: 6 additions & 9 deletions pkg/suggestion/v1beta1/nas/common/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,8 @@


def validate_operations(operations: list[api_pb2.Operation]) -> (bool, str):

# Validate each operation
for operation in operations:

# Check OperationType
if not operation.operation_type:
return False, "Missing operationType in Operation:\n{}".format(operation)
Expand All @@ -31,7 +29,6 @@ def validate_operations(operations: list[api_pb2.Operation]) -> (bool, str):
# Validate each ParameterConfig in Operation
parameters_list = list(operation.parameter_specs.parameters)
for parameter in parameters_list:

# Check Name
if not parameter.name:
return False, "Missing Name in ParameterConfig:\n{}".format(parameter)
Expand Down Expand Up @@ -78,16 +75,16 @@ def validate_operations(operations: list[api_pb2.Operation]) -> (bool, str):
):
return (
False,
"Step parameter should be > 0 in ParameterConfig.feasibleSpace:\n{}".format(
parameter
),
"Step parameter should be > 0 in ParameterConfig.feasibleSpace:\n"
"{}".format(parameter),
)
except Exception as e:
return (
False,
"failed to validate ParameterConfig.feasibleSpace \n{parameter}):\n{exception}".format(
parameter=parameter, exception=e
),
(
"failed to validate ParameterConfig.feasibleSpace \n"
"{parameter}):\n{exception}"
).format(parameter=parameter, exception=e),
)

return True, ""
34 changes: 22 additions & 12 deletions pkg/suggestion/v1beta1/nas/enas/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,10 @@ def ValidateAlgorithmSettings(self, request, context):
):
return self.set_validate_context_error(
context,
"Algorithm Setting {}: {} with {} type must be in range ({}, {}]".format(
(
"Algorithm Setting {}: {} with {} type must be in range "
"({}, {})"
).format(
setting.name,
converted_value,
setting_type.__name__,
Expand Down Expand Up @@ -309,7 +312,8 @@ def GetSuggestions(self, request, context):

if self.is_first_run:
self.logger.info(
">>> First time running suggestion for {}. Random architecture will be given.".format(
">>> First time running suggestion for {}. "
"Random architecture will be given.".format(
experiment.experiment_name
)
)
Expand All @@ -319,7 +323,8 @@ def GetSuggestions(self, request, context):
for _ in range(experiment.num_trials):
candidates.append(sess.run(controller_ops["sample_arc"]))

# TODO: will use PVC to store the checkpoint to protect against unexpected suggestion pod restart
# TODO: will use PVC to store the checkpoint to protect
# against unexpected suggestion pod restart
saver.save(sess, experiment.ctrl_cache_file)

self.is_first_run = False
Expand All @@ -331,17 +336,22 @@ def GetSuggestions(self, request, context):
result = self.GetEvaluationResult(request.trials)

# TODO: (andreyvelich) I deleted this part, should it be handle by controller?
# Sometimes training container may fail and GetEvaluationResult() will return None
# Sometimes training container may fail and GetEvaluationResult()
# will return None
# In this case, the Suggestion will:
# 1. Firstly try to respawn the previous trials after waiting for RESPAWN_SLEEP seconds
# 2. If respawning the trials for RESPAWN_LIMIT times still cannot collect valid results,
# then fail the task because it may indicate that the training container has errors.
# 1. Firstly try to respawn the previous trials after waiting for
# RESPAWN_SLEEP seconds
# 2. If respawning the trials for RESPAWN_LIMIT times still cannot
# collect valid results,
# then fail the task because it may indicate that the training
# container has errors.
if result is None:
self.logger.warning(
">>> Suggestion has spawned trials, but they all failed."
)
self.logger.warning(
">>> Please check whether the training container is correctly implemented"
">>> Please check whether the training container "
"is correctly implemented"
)
self.logger.info(
">>> Experiment {} failed".format(
Expand All @@ -351,7 +361,8 @@ def GetSuggestions(self, request, context):
return []

# This LSTM network is designed to maximize the metrics
# However, if the user wants to minimize the metrics, we can take the negative of the result
# However, if the user wants to minimize the metrics,
# we can take the negative of the result

if experiment.opt_direction == api_pb2.MINIMIZE:
result = -result
Expand Down Expand Up @@ -426,9 +437,8 @@ def GetSuggestions(self, request, context):
nn_config_str = str(nn_config_json).replace('"', "'")

self.logger.info(
"\n>>> New Neural Network Architecture Candidate #{} (internal representation):".format(
i
)
"\n>>> New Neural Network Architecture Candidate #{} "
"(internal representation):".format(i)
)
self.logger.info(organized_arc_json)
self.logger.info("\n>>> Corresponding Seach Space Description:")
Expand Down
3 changes: 2 additions & 1 deletion pkg/suggestion/v1beta1/optuna/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ def _validate_grid_setting(cls, experiment):
if max_trial_count > num_combinations:
return (
False,
"Max Trial Count: {max_trial} > all possible search combinations: {combinations}".format(
"Max Trial Count: {max_trial} > all possible search combinations: "
"{combinations}".format(
max_trial=max_trial_count, combinations=num_combinations
),
)
Expand Down
11 changes: 5 additions & 6 deletions pkg/suggestion/v1beta1/pbt/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
HyperParameter,
HyperParameterSearchSpace,
)
from pkg.suggestion.v1beta1.internal.trial import Assignment, Trial
from pkg.suggestion.v1beta1.internal.trial import Assignment

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -70,7 +70,8 @@ def ValidateAlgorithmSettings(self, request, context):
):
return self._set_validate_context_error(
context,
"Param(resample_probability) should be null to perturb at 0.8 or 1.2, or be between 0 and 1, inclusive, to resample",
"Param(resample_probability) should be null to perturb at 0.8 or 1.2, "
"or be between 0 and 1, inclusive, to resample",
)

return api_pb2.ValidateAlgorithmSettingsReply()
Expand Down Expand Up @@ -98,7 +99,7 @@ def GetSuggestions(self, request, context):
float(settings["truncation_threshold"]),
(
None
if not "resample_probability" in settings
if "resample_probability" not in settings
else float(settings["resample_probability"])
),
search_space,
Expand Down Expand Up @@ -184,7 +185,7 @@ def get(self):
labels = {
"pbt.suggestion.katib.kubeflow.org/generation": self.generation,
}
if not self.parent is None:
if self.parent is not None:
labels["pbt.suggestion.katib.kubeflow.org/parent"] = self.parent
return assignments, labels, self.uid

Expand Down Expand Up @@ -284,9 +285,7 @@ def get(self):
return obj.get()

def update(self, trial):
trial_labels = trial.spec.labels
uid = trial.name
generation = trial_labels["pbt.suggestion.katib.kubeflow.org/generation"]

# Do not update active/pending trials
if trial.status.condition in (
Expand Down

0 comments on commit 39e57ff

Please sign in to comment.