Skip to content

Commit

Permalink
add comments for athina
Browse files Browse the repository at this point in the history
  • Loading branch information
cactuskid committed Sep 25, 2024
1 parent 759ddcf commit 349bdc9
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 4 deletions.
8 changes: 6 additions & 2 deletions src/HogProf/lshbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,10 @@ def __init__(self,h5_oma=None,fileglob = None, taxa=None,masterTree=None, saving

if self.h5OMA:
self.HAM_PIPELINE = functools.partial( pyhamutils.get_ham_treemap_from_row, tree=self.tree_string , swap_ids=self.swap2taxcode , reformat_names = self.reformat_names ,
orthoXML_as_string = True , use_phyloxml = self.use_phyloxml , orthomapper = self.idmapper )
orthoXML_as_string = True , use_phyloxml = self.use_phyloxml , orthomapper = self.idmapper , levels = None )
else:
self.HAM_PIPELINE = functools.partial( pyhamutils.get_ham_treemap_from_row, tree=self.tree_string , swap_ids=self.swap2taxcode ,
orthoXML_as_string = False , reformat_names = self.reformat_names , use_phyloxml = self.use_phyloxml , orthomapper = self.idmapper )
orthoXML_as_string = False , reformat_names = self.reformat_names , use_phyloxml = self.use_phyloxml , orthomapper = self.idmapper , levels = None )

self.HASH_PIPELINE = functools.partial( hashutils.row2hash , taxaIndex=self.taxaIndex, treeweights=self.treeweights, wmg=wmg , lossonly = lossonly, duplonly = duplonly)
if self.h5OMA:
Expand Down Expand Up @@ -254,6 +254,10 @@ def worker(self, i, q, retq, matq, l):
df = q.get()
if df is not None :
df['tree'] = df[['Fam', 'ortho']].apply(self.HAM_PIPELINE, axis=1)
#add a dictionary of results with subhogs { fam_sub1: { 'tree':tp , 'Fam':fam } , fam_sub2: { 'tree':tp , 'Fam':fam } , ... }
#returned_df = pd.DataFrame.from_dict(df['tree'].to_dict(), orient='index')
#merge with pandas on right e.g. df.merge( returned_df , on = 'Fam' , how = 'right' )

df[['hash','rows']] = df[['Fam', 'tree']].apply(self.HASH_PIPELINE, axis=1)
if self.fileglob:
retq.put(df[['Fam', 'hash', 'ortho']])
Expand Down
1 change: 1 addition & 0 deletions src/HogProf/utils/hashutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def row2hash(row , taxaIndex , treeweights , wmg , lossonly = False , duplonly =
#convert a dataframe row to a weighted minhash
fam, treemap = row.tolist()
hog_matrix,weighted_hash = hash_tree(treemap , taxaIndex , treeweights , wmg , lossonly = lossonly , duplonly = duplonly)

return pd.Series([weighted_hash,hog_matrix], index=['hash','rows'])

def fam2hash_hdf5(fam, hdf5, dataset = None, nsamples = 128 ):
Expand Down
6 changes: 4 additions & 2 deletions src/HogProf/utils/pyhamutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def orthoxml2numerical(orthoxml , mapper):
orthoxml = ET.tostring(root, encoding='unicode', method='xml')
return orthoxml

def get_ham_treemap_from_row(row, tree , level = None , swap_ids = True , orthoXML_as_string = True , use_phyloxml = False , use_internal_name = True ,reformat_names= False, orthomapper = None ):
def get_ham_treemap_from_row(row, tree , levels = None , swap_ids = True , orthoXML_as_string = True , use_phyloxml = False , use_internal_name = True ,reformat_names= False, orthomapper = None ):
fam, orthoxml = row
format = 'newick_string'
if use_phyloxml:
Expand All @@ -99,8 +99,10 @@ def get_ham_treemap_from_row(row, tree , level = None , swap_ids = True , orthoX
else:
quoted = True
try:
# return multiple treemaps corresponding to slices at different levels
ham_obj = pyham.Ham(tree, orthoxml, type_hog_file="orthoxml" , tree_format = format , use_internal_name=use_internal_name, orthoXML_as_string=orthoXML_as_string )
tp = ham_obj.create_tree_profile(hog=ham_obj.get_list_top_level_hogs()[0])
tp = ham_obj.create_tree_profile(hog=ham_obj.get_list_top_level_hogs()[0])
#check for losses / events and n leaves
return tp.treemap
except Exception as e:
# Capture the exception and format the traceback
Expand Down

0 comments on commit 349bdc9

Please sign in to comment.