Skip to content

Commit

Permalink
backport the new report file requirement
Browse files Browse the repository at this point in the history
  • Loading branch information
meren committed Apr 19, 2016
1 parent da0a4e1 commit cc2c848
Showing 1 changed file with 25 additions and 7 deletions.
32 changes: 25 additions & 7 deletions bin/anvi-rename-bins
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import anvio
import anvio.dbops as dbops
import anvio.utils as utils
import anvio.terminal as terminal
import anvio.filesnpaths as filesnpaths
import anvio.ccollections as ccollections

from anvio.completeness import Completeness
Expand Down Expand Up @@ -36,12 +37,7 @@ def main(args):
list_collections = A('list_collections')
collection_name = A('collection_name')
prefix = A('prefix')

if not prefix:
raise ConfigError, "Anvi'o is having hard time believing that you called this function without\
a prefix to rename bins in collection '%s'." % collection_name

utils.is_this_name_OK_for_database('prefix', prefix)
report_file_path = A('report_file')

dbops.is_profile_db_and_contigs_db_compatible(profile_db, contigs_db)

Expand All @@ -52,6 +48,19 @@ def main(args):
collections.list_collections()
sys.exit()

if not prefix:
raise ConfigError, "Anvi'o is having hard time believing that you called this function without\
a prefix to rename bins in collection '%s'." % collection_name

utils.is_this_name_OK_for_database('prefix', prefix)

if not report_file_path:
raise ConfigError, "You must provide an output file name to report file changes. It may or may not\
be useful to you, but let's don't take unnecessary risks, eh? (you can use the\
`--report-file` parameter)"

filesnpaths.is_output_file_writable(report_file_path)

if not collection_name:
raise ConfigError, "You must provide a collection name."

Expand All @@ -76,19 +85,25 @@ def main(args):
bins_sorted_by_completion.append((bin_name, d['percent_complete'] - d['percent_redundancy']),)

bins_sorted_by_completion.sort(key=operator.itemgetter(1), reverse=True)
report = open(report_file_path, 'w')
report.write('old_bin_name\tnew_bin_name\n')
counter = 1
for bin_name, _ in bins_sorted_by_completion:
new_bin_name = '%s_bin_%05d' % (prefix, counter)
collection_dict[new_bin_name] = copy.deepcopy(collection_dict[bin_name])
collection_dict.pop(bin_name)

report.write('%s\t%s\n' % (bin_name, new_bin_name))

counter += 1
report.close()

# update tables with new bin names.
collections_table = dbops.TablesForCollections(profile_db, anvio.__profile__version__)
collections_table.append(collection_name, collection_dict)

run.info('Rename', 'Renaming is done for %d bins in collection "%s".' % (len(collection_dict), collection_name))
run.info('Report', '%s' % (report_file_path))


if __name__ == '__main__':
Expand All @@ -99,13 +114,16 @@ if __name__ == '__main__':
parser.add_argument(*anvio.A('contigs-db'), **anvio.K('contigs-db'))
parser.add_argument(*anvio.A('profile-db'), **anvio.K('profile-db'))
parser.add_argument(*anvio.A('collection-name'), **anvio.K('collection-name'))
parser.add_argument('--prefix', default = None, required = True,\
parser.add_argument('--prefix', default = None,
help = "Prefix for the bin names. Must be a single word, composed\
of digits and numbers. The use of the underscore character is OK,\
but that's about it (fine, the use of the dash character is OK, too\
but no more!). If the prefix is 'PREFIX', each bin will be renamed\
as 'PREFIX_Bin_00001, PREFIX_bin_00002', and so on, in the order of\
percent completion minus percent redundancy.")
parser.add_argument('--report-file', metavar = 'REPORT_FILE_PATH', default = None, \
help = "This file will report each name change event, so you can trace back\
the original names of renamed bins later.")
parser.add_argument(*anvio.A('list-collections'), **anvio.K('list-collections'))

args = parser.parse_args()
Expand Down

0 comments on commit cc2c848

Please sign in to comment.