Skip to content
This repository has been archived by the owner on Jan 6, 2025. It is now read-only.

Commit

Permalink
Update to asreview 0.7 (#13)
Browse files Browse the repository at this point in the history
  • Loading branch information
qubixes authored Mar 12, 2020
1 parent 27de10a commit 2efd014
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 22 deletions.
2 changes: 1 addition & 1 deletion asreviewcontrib/hyperopt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@
from asreviewcontrib.hyperopt.show_trials import ShowTrialsEntryPoint
from asreviewcontrib.hyperopt.create_config import CreateConfigEntryPoint

__version__ = "0.1.3"
__version__ = "0.1.4"
__extension_name__ = "asreview-hyperopt"
11 changes: 10 additions & 1 deletion asreviewcontrib/hyperopt/active.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ def _parse_arguments():
action='store_true',
help="Use the mpi implementation.",
)
parser.add_argument(
"--server_job",
dest='server_job',
action='store_true',
help='Run job on the server. It will incur less overhead of used CPUs,'
' but more latency of workers waiting for the server to finish its own'
' job. Only makes sense in combination with the flag --mpi.'
)
return parser


Expand All @@ -106,6 +114,7 @@ def main(argv=sys.argv[1:]):
n_iter = args["n_iter"]
use_mpi = args["use_mpi"]
n_run = args["n_run"]
server_job = args["server_job"]

data_names = get_data_names(datasets)
if use_mpi:
Expand All @@ -116,7 +125,7 @@ def main(argv=sys.argv[1:]):
job_runner = ActiveJobRunner(
data_names, model_name=model_name, query_name=query_name,
balance_name=balance_name, feature_name=feature_name,
executor=executor, n_run=n_run)
executor=executor, n_run=n_run, server_job=server_job)

if use_mpi:
mpi_hyper_optimize(job_runner, n_iter)
Expand Down
23 changes: 12 additions & 11 deletions asreviewcontrib/hyperopt/active_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class ActiveJobRunner():
def __init__(self, data_names, model_name, query_name, balance_name,
feature_name, executor=serial_executor,
n_run=8, n_papers=1502, n_instances=50, n_included=1,
n_excluded=1):
n_excluded=1, server_job=False):

self.trials_dir, self.trials_fp = get_trial_fp(
data_names, model_name=model_name, balance_name=balance_name,
Expand All @@ -60,6 +60,7 @@ def __init__(self, data_names, model_name, query_name, balance_name,
self.n_included = n_included
self.n_excluded = n_excluded

self.server_job = server_job
self.data_dir = "data"
self._cache = {data_name: {"priors": {}}
for data_name in data_names}
Expand All @@ -68,7 +69,8 @@ def create_loss_function(self):
def objective_func(param):
jobs = create_jobs(param, self.data_names, self.n_run)

self.executor(jobs, self, stop_workers=False)
self.executor(jobs, self, stop_workers=False,
server_job=self.server_job)
losses = []
for data_name in self.data_names:
data_dir = os.path.join(self.trials_dir, 'current', data_name)
Expand All @@ -79,22 +81,21 @@ def objective_func(param):

def execute(self, param, data_name, i_run):
split_param = get_split_param(param)
log_file = get_log_file_name(self.trials_dir, data_name, i_run)
state_file = get_state_file_name(self.trials_dir, data_name, i_run)
try:
os.remove(log_file)
os.remove(state_file)
except FileNotFoundError:
pass

prior_included, prior_excluded = self.get_cached_priors(
data_name, i_run)
start_idx = self.get_cached_priors(data_name, i_run)

reviewer = get_reviewer(
data_fp_from_name(self.data_dir, data_name),
mode='simulate', model=self.model_name,
query_strategy=self.query_name, balance_strategy=self.balance_name,
feature_extraction=self.feature_name, n_instances=self.n_instances,
n_papers=self.n_papers, log_file=log_file,
prior_included=prior_included, prior_excluded=prior_excluded,
n_papers=self.n_papers, state_file=state_file,
prior_idx=start_idx,
**split_param)

reviewer.review()
Expand All @@ -117,9 +118,9 @@ def get_cached_priors(self, data_name, i_run):
zeros = np.where(as_data.labels == 0)[0]
included = np.random.choice(ones, self.n_included, replace=False)
excluded = np.random.choice(zeros, self.n_excluded, replace=False)
self._cache[data_name]["priors"][i_run] = (included, excluded)
self._cache[data_name]["priors"][i_run] = np.append(included, excluded)

return included, excluded
return self._cache[data_name]["priors"][i_run]

def get_hyper_space(self):
model_hs, model_hc = get_model(self.model_name).hyper_space()
Expand Down Expand Up @@ -210,6 +211,6 @@ def create_jobs(param, data_names, n_run):
return jobs


def get_log_file_name(trials_dir, data_name, i_run):
def get_state_file_name(trials_dir, data_name, i_run):
return os.path.join(trials_dir, "current", data_name,
f"results_{i_run}.h5")
12 changes: 11 additions & 1 deletion asreviewcontrib/hyperopt/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,15 @@ def _parse_arguments():
default=8,
help="Number of runs per dataset."
)
parser.add_argument(
"--server_job",
dest='server_job',
action='store_true',
help='Run job on the server. It will incur less overhead of used CPUs,'
' but more latency of workers waiting for the server to finish its own'
' job. Only makes sense in combination with the flag --mpi.'
)

return parser


Expand All @@ -85,6 +94,7 @@ def main(argv=sys.argv[1:]):
n_iter = args["n_iter"]
use_mpi = args["use_mpi"]
n_run = args["n_run"]
server_job = args["server_job"]

data_names = get_data_names(datasets)
if use_mpi:
Expand All @@ -93,7 +103,7 @@ def main(argv=sys.argv[1:]):
executor = serial_executor

job_runner = ClusterJobRunner(data_names, feature_name, executor=executor,
n_cluster_run=n_run)
n_cluster_run=n_run, server_job=server_job)

if use_mpi:
mpi_hyper_optimize(job_runner, n_iter)
Expand Down
6 changes: 4 additions & 2 deletions asreviewcontrib/hyperopt/cluster_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

class ClusterJobRunner():
def __init__(self, data_names, feature_name, executor=serial_executor,
n_cluster_run=30, n_feature_run=1):
n_cluster_run=30, n_feature_run=1, server_job=False):

self.trials_dir, self.trials_fp = get_trial_fp(
data_names, feature_name=feature_name, hyper_type="cluster")
Expand All @@ -50,14 +50,16 @@ def __init__(self, data_names, feature_name, executor=serial_executor,
self.n_cluster_run = n_cluster_run
self.n_feature_run = n_feature_run
self.data_dir = "data"
self.server_job = server_job
self._cache = {data_name: {}
for data_name in data_names}

def create_loss_function(self):
def objective_func(param):
jobs = create_jobs(param, self.data_names, self.n_feature_run)

self.executor(jobs, self, stop_workers=False)
self.executor(jobs, self, stop_workers=False,
server_job=self.server_job)
losses = []
for data_name in self.data_names:
label_fp = get_label_fp(self.trials_dir, data_name)
Expand Down
2 changes: 1 addition & 1 deletion asreviewcontrib/hyperopt/mpi_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def mpi_worker(job_runner):
return None, None


def mpi_executor(all_jobs, job_runner=None, server_job=True,
def mpi_executor(all_jobs, job_runner=None, server_job=False,
stop_workers=True):
comm = MPI.COMM_WORLD
n_proc = comm.Get_size()
Expand Down
12 changes: 11 additions & 1 deletion asreviewcontrib/hyperopt/passive.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,14 @@ def _parse_arguments():
action='store_true',
help="Use the mpi implementation.",
)
parser.add_argument(
"--server_job",
dest='server_job',
action='store_true',
help='Run job on the server. It will incur less overhead of used CPUs,'
' but more latency of workers waiting for the server to finish its own'
' job. Only makes sense in combination with the flag --mpi.'
)
return parser


Expand All @@ -99,6 +107,7 @@ def main(argv=sys.argv[1:]):
n_iter = args["n_iter"]
use_mpi = args["use_mpi"]
n_run = args["n_run"]
server_job = args["server_job"]

data_names = get_data_names(datasets)
if use_mpi:
Expand All @@ -107,7 +116,8 @@ def main(argv=sys.argv[1:]):
executor = serial_executor

job_runner = PassiveJobRunner(data_names, model_name, balance_name,
feature_name, executor=executor, n_run=n_run)
feature_name, executor=executor, n_run=n_run,
server_job=server_job)

if use_mpi:
mpi_hyper_optimize(job_runner, n_iter)
Expand Down
6 changes: 4 additions & 2 deletions asreviewcontrib/hyperopt/passive_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

class PassiveJobRunner():
def __init__(self, data_names, model_name, balance_name, feature_name,
executor=serial_executor, n_run=10):
executor=serial_executor, n_run=10, server_job=False):

self.trials_dir, self.trials_fp = get_trial_fp(
data_names, model_name=model_name, balance_name=balance_name,
Expand All @@ -53,6 +53,7 @@ def __init__(self, data_names, model_name, balance_name, feature_name,
self.feature_class = get_feature_class(feature_name)
self.balance_class = get_balance_class(balance_name)

self.server_job = server_job
self.data_names = data_names
self.executor = executor
self.n_run = n_run
Expand All @@ -64,7 +65,8 @@ def create_loss_function(self):
def objective_func(param):
jobs = create_jobs(param, self.data_names, self.n_run)

self.executor(jobs, self, stop_workers=False)
self.executor(jobs, self, stop_workers=False,
server_job=self.server_job)
losses = []
for data_name in self.data_names:
label_fp = get_label_fp(self.trials_dir, data_name)
Expand Down
2 changes: 1 addition & 1 deletion asreviewcontrib/hyperopt/serial_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.


def serial_executor(jobs, job_runner, stop_workers=False):
def serial_executor(jobs, job_runner, stop_workers=False, server_job=True):
for job in jobs:
job_runner.execute(**job)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
packages=find_namespace_packages(include=['asreviewcontrib.*']),
namespace_package=["asreview"],
install_requires=[
"asreview>=0.5.0", "numpy", "tqdm", "hyperopt", "sklearn"
"asreview>=0.7.0", "numpy", "tqdm", "hyperopt", "sklearn"
],

extras_require={
Expand Down

0 comments on commit 2efd014

Please sign in to comment.