Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parallelization for single evaluation tree #24

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 30 additions & 13 deletions pysidt/sidt.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from sklearn import linear_model
import scipy.sparse as sp
import scipy
from joblib import Parallel, delayed

logging.basicConfig(level=logging.INFO)

Expand Down Expand Up @@ -84,6 +85,7 @@ def __init__(
r_site=None,
r_morph=None,
uncertainty_prepruning=False,
n_jobs=1,
):
if nodes is None:
nodes = {}
Expand All @@ -107,6 +109,7 @@ def __init__(
self.r_morph = r_morph
self.skip_nodes = []
self.uncertainty_prepruning = uncertainty_prepruning
self.n_jobs = n_jobs

if len(nodes) > 0:
node = nodes[list(nodes.keys())[0]]
Expand All @@ -128,10 +131,12 @@ def load(self, nodes):
else:
self.root = None

def select_node(self):
def select_nodes(self):
"""
Picks a node to expand
Picks nodes to expand
"""
nodes = []

for name, node in self.nodes.items():
if len(node.items) <= 1 or node.name in self.skip_nodes:
continue
Expand All @@ -141,9 +146,9 @@ def select_node(self):

logging.info("Selected node {}".format(node.name))
logging.info("Node has {} items".format(len(node.items)))
return node
else:
return None
nodes.append(node)

return nodes

def generate_extensions(self, node, recursing=False):
"""
Expand Down Expand Up @@ -198,12 +203,14 @@ def extend_tree_from_node(self, parent):
exts = self.generate_extensions(parent)
extlist = [ext[0] for ext in exts]
if not extlist:
self.skip_nodes.append(parent.name)
return
return None
ext = self.choose_extension(parent, extlist)
new, comp = split_mols(parent.items, ext)
ind = extlist.index(ext)
grp, grpc, name, typ, indc = exts[ind]
return grp, grpc, name, new, comp

def add_extension(self, parent, grp, grpc, name, new, comp):
logging.info("Choose extension {}".format(name))

node = Node(
Expand Down Expand Up @@ -238,8 +245,9 @@ def extend_tree_from_node(self, parent):
parent.children.append(nodec)
parent.items = []
else:
for mol in new:
parent.items.remove(mol)

new_smis = {datum.mol.smiles for datum in new}
parent.items = [datum for datum in parent.items if datum.mol.smiles not in new_smis]

def descend_training_from_top(self, only_specific_match=True):
"""
Expand Down Expand Up @@ -290,11 +298,20 @@ def generate_tree(self, data=None, check_data=True):
self.clear_data()
self.root.items = data[:]

node = self.select_node()
nodes = self.select_nodes()

while nodes:
outs = Parallel(n_jobs=self.n_jobs)(
delayed(self.extend_tree_from_node)(node) for node in nodes
)

for out, node in zip(outs, nodes):
if out is None:
self.skip_nodes.append(node.name)
continue
self.add_extension(node, *out)

while node is not None:
self.extend_tree_from_node(node)
node = self.select_node()
nodes = self.select_nodes()

def fit_tree(self, data=None, confidence_level=0.95):
"""
Expand Down