Skip to content

Commit

Permalink
get rid of unsed chloerefs
Browse files Browse the repository at this point in the history
  • Loading branch information
arabidopsis committed Dec 15, 2023
1 parent 2d9e8b4 commit ce13c19
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 70 deletions.
2 changes: 1 addition & 1 deletion bin/regression.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ G='\e[1;32m'
R='\e[1;31m'

echo "start annotations..."
time -p julia --project=. --threads=8 "$@" chloe.jl -l info annotate -o testo --numgsrefs 16 --numchloerefs 0 testfa/*.fa
time -p julia --project=. --threads=8 "$@" chloe.jl -l info annotate -o testo --numgsrefs 16 testfa/*.fa
for f in $(ls testo)
do
echo "diffing $f"
Expand Down
2 changes: 1 addition & 1 deletion bin/rregression.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ done
echo -e "index: ${index[@]}: $TOTAL/${C}$n${O}"
echo "start annotations with: ${todo[@]}"

time -p julia --threads=8 --project=. "$@" chloe.jl -l warn annotate -o testo --numgsrefs 16 --numchloerefs 0 "${todo[@]}"
time -p julia --threads=8 --project=. "$@" chloe.jl -l warn annotate -o testo --numgsrefs 16 "${todo[@]}"

for idx in ${index[@]}
do
Expand Down
2 changes: 1 addition & 1 deletion bin/tregression.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ C='\e[1;36m' # bold cyan
A='\e[1;30m' # grey

echo -e "testing ${C}${#}${O} files"
time -p julia --threads=8 --project=. chloe.jl -l warn annotate -o testo --numgsrefs 16 --numchloerefs 0 "$@"
time -p julia --threads=8 --project=. chloe.jl -l warn annotate -o testo --numgsrefs 16 "$@"

for f in "$@"
do
Expand Down
7 changes: 1 addition & 6 deletions src/annotate_genomes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -346,12 +346,7 @@ function annotate_one_worker(db::AbstractReferenceDb,
append!(refpicks, searchhashes(hash, refhashes)[1:numrefs])
end

# find closest chloe references
refhashes = get_chloeminhashes(db, config)
if !isnothing(refhashes)
numrefs = min(config.numchloerefs, length(refhashes))
append!(refpicks, searchhashes(hash, refhashes)[1:numrefs])
end

numrefs = length(refpicks)
t2 = time_ns()

Expand Down
15 changes: 5 additions & 10 deletions src/dist/chloe_cmd.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ function quiet_metafmt(level, _module, group, id, file, line)
return color, prefix, ""
end

function chloe(; gsrefsdir="default", chloerefsdir="default", numgsrefs=DEFAULT_NUMGSREFS, numchloerefs=DEFAULT_NUMGSREFS, fasta_files=String[],
function chloe(; gsrefsdir="default", numgsrefs=DEFAULT_NUMGSREFS, fasta_files=String[],
template="default", sensitivity=DEFAULT_SENSITIVITY,
output::Union{Nothing,String}=nothing, gff::Bool=false, nofilter::Bool=false)
db = Annotator.ReferenceDb(; gsrefsdir=gsrefsdir, chloerefsdir=chloerefsdir, template=template)
config = Annotator.ChloeConfig(; numgsrefs=numgsrefs, numchloerefs=numchloerefs, sensitivity=sensitivity, to_gff3=gff, nofilter=nofilter)
db = Annotator.ReferenceDb(; gsrefsdir=gsrefsdir, template=template)
config = Annotator.ChloeConfig(; numgsrefs=numgsrefs, sensitivity=sensitivity, to_gff3=gff, nofilter=nofilter)
Annotator.annotate(db, fasta_files, config, output)
end

Expand Down Expand Up @@ -86,19 +86,14 @@ function getargs()
"--reference", "-r"
arg_type = String
default = "default"
dest_name = "chloerefsdir"
dest_name = "gsrefsdir"
metavar = "DIRECTORY"
help = "reference directory [default: $(DEFAULT_CHLOEREFS)]"
help = "reference directory [default: $(DEFAULT_GSREFS)]"
"--numgsrefs"
arg_type = Int
default = DEFAULT_NUMGSREFS
dest_name = "numgsrefs"
help = "number of references to compare to [default: $(DEFAULT_NUMGSREFS)]"
"--numchloerefs"
arg_type = Int
default = DEFAULT_NUMCHLOEREFS
dest_name = "numchloerefs"
help = "number of references to compare to [default: $(DEFAULT_NUMCHLOEREFS)]"
"--template", "-t"
arg_type = String
default = "default"
Expand Down
26 changes: 9 additions & 17 deletions src/dist/chloe_distributed.jl
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,15 @@ function create_responder(apispecs::Vector{Function}, addr::String, ctx::ZMQ.Con
end

function arm_procs_full(procs, backend::MayBeString=nothing, level::String="info";
gsrefsdir="default", chloerefsdir="default", template="default")
gsrefsdir="default", template="default")

@everywhere procs begin
include(joinpath($REPO_DIR, "annotate_genomes.jl"))
include(joinpath($REPO_DIR, "dist/ZMQLogger.jl"))
include(joinpath($REPO_DIR, "dist/tasks.jl"))

set_global_logger($level, $backend; topic="annotator")
global REFERENCE = Annotator.ReferenceDb(; gsrefsdir=$gsrefsdir, chloerefsdir=$chloerefsdir, template=$template)
global REFERENCE = Annotator.ReferenceDb(; gsrefsdir=$gsrefsdir, template=$template)
end
# [ @spawnat p begin
# include(joinpath(REPO_DIR, "annotate_genomes.jl"))
Expand All @@ -96,13 +96,13 @@ function arm_procs_full(procs, backend::MayBeString=nothing, level::String="info

end
function arm_procs(procs, backend::MayBeString=nothing, level::String="info";
gsrefsdir="default", chloerefsdir="default", template="default")
gsrefsdir="default", template="default")

# use when toplevel has already done
# @everywhere using Chloe
@everywhere procs begin
set_global_logger($level, $backend; topic="annotator")
global REFERENCE = Annotator.ReferenceDb(; gsrefsdir=$gsrefsdir, chloerefsdir=$chloerefsdir, template=$template)
global REFERENCE = Annotator.ReferenceDb(; gsrefsdir=$gsrefsdir, template=$template)
end
# [ @spawnat p begin
# set_global_logger(level, backend; topic="annotator")
Expand All @@ -111,7 +111,7 @@ function arm_procs(procs, backend::MayBeString=nothing, level::String="info";
end

function chloe_distributed(full::Bool=true; gsrefsdir="default", address=ZMQ_WORKER,
chloerefsdir="default", template="default", level="warn", workers=3,
template="default", level="warn", workers=3,
backend::MayBeString=nothing, broker::MayBeString=nothing)

if !isnothing(backend)
Expand All @@ -124,14 +124,11 @@ function chloe_distributed(full::Bool=true; gsrefsdir="default", address=ZMQ_WOR
if gsrefsdir == "default"
gsrefsdir = normpath(joinpath(REPO_DIR, "..", "..", DEFAULT_GSREFS))
end
if chloerefsdir == "default"
chloerefsdir = normpath(joinpath(REPO_DIR, "..", "..", DEFAULT_CHLOEREFS))
end
if template == "default"
template = normpath(joinpath(REPO_DIR, "..", "..", DEFAULT_TEMPLATE))
end
# don't wait for workers to find the wrong directory
verify_refs(gsrefsdir, chloerefsdir, template)
verify_refs(gsrefsdir, template)

# user may have added run with
# julia command -p2 etc.
Expand All @@ -148,13 +145,13 @@ function chloe_distributed(full::Bool=true; gsrefsdir="default", address=ZMQ_WOR

# arm_procs(procs, reference, backend, level)
if full
arm_procs_full(procs, backend, level; gsrefsdir=gsrefsdir, chloerefsdir=chloerefsdir, template=template)
arm_procs_full(procs, backend, level; gsrefsdir=gsrefsdir, template=template)
else
arm_procs(procs, backend, level; gsrefsdir=gsrefsdir, chloerefsdir=chloerefsdir, template=template)
arm_procs(procs, backend, level; gsrefsdir=gsrefsdir, template=template)
end

function arm(new_procs)
arm_procs_full(new_procs, backend, level; gsrefsdir=gsrefsdir, chloerefsdir=chloerefsdir, template=template)
arm_procs_full(new_procs, backend, level; gsrefsdir=gsrefsdir, template=template)
end

# it seems impossible to add new workers after the fact
Expand Down Expand Up @@ -402,11 +399,6 @@ function get_distributed_args()
dest_name = "gsrefsdir"
metavar = "DIRECTORY"
help = "reference directory [default: $(DEFAULT_GSREFS)]"
"--chloerefsdir", "-c"
arg_type = String
default = "default"
dest_name = "chloerefsdir"
help = "reference minhashes [default: $(DEFAULT_CHLOEREFS)]"
"--template", "-t"
arg_type = String
default = "default"
Expand Down
2 changes: 0 additions & 2 deletions src/globals.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
const REPO_DIR = dirname(@__FILE__)
const DEFAULT_GSREFS = "chloe_references/gsrefs"
const DEFAULT_NUMGSREFS = 16
const DEFAULT_CHLOEREFS = "chloe_references/chloerefs"
const DEFAULT_NUMCHLOEREFS = 0
const DEFAULT_TEMPLATE = "chloe_references/templates.tsv"
const DEFAULT_SENSITIVITY = 0.5

Expand Down
41 changes: 9 additions & 32 deletions src/reference.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,26 @@ abstract type AbstractReferenceDb end
mutable struct ReferenceDb <: AbstractReferenceDb
lock::ReentrantLock
gsrefsdir::String
chloerefsdir::String
template_file::String
templates::Union{Nothing,Dict{String,FeatureTemplate}}
gsrefhashes::Union{Nothing,Dict{String,Vector{Int64}}}
chloerefhashes::Union{Nothing,Dict{String,Vector{Int64}}}
end

struct ChloeConfig
numgsrefs::Int
numchloerefs::Int
sensitivity::Real
to_gff3::Bool
nofilter::Bool
end

function ReferenceDb(; gsrefsdir="default", chloerefsdir="default", template="default")::ReferenceDb
function ReferenceDb(; gsrefsdir="default", template="default")::ReferenceDb
if gsrefsdir == "default"
gsrefsdir = normpath(joinpath(REPO_DIR, "..", "..", DEFAULT_GSREFS))
end
if chloerefsdir == "default"
chloerefsdir = normpath(joinpath(REPO_DIR, "..", "..", DEFAULT_CHLOEREFS))
end
if template == "default"
template = normpath(joinpath(REPO_DIR, "..", "..", DEFAULT_TEMPLATE))
end
return ReferenceDb(ReentrantLock(), gsrefsdir, chloerefsdir, template, nothing, nothing, nothing)
return ReferenceDb(ReentrantLock(), gsrefsdir, template, nothing, nothing)
end

function get_templates(db::ReferenceDb)
Expand All @@ -51,23 +45,11 @@ function get_gsminhashes(db::ReferenceDb, config::ChloeConfig)
end
end

function get_chloeminhashes(db::ReferenceDb, config::ChloeConfig)
config.numchloerefs < 1 && return nothing
lock(db.lock) do
if isnothing(db.chloerefhashes)
db.chloerefhashes = readminhashes(normpath(joinpath(db.chloerefsdir, "reference_minhashes.hash")))
end
return db.chloerefhashes
end
end

function get_single_reference!(db::ReferenceDb, refID::AbstractString, reference_feature_counts::Dict{String,Int})::SingleReference
path = findfastafile(db.gsrefsdir, refID)
if isnothing(path)
path = findfastafile(db.chloerefsdir, refID)
end
if isnothing(path) || !isfile(path)
msg = "unable to find $(refID) fasta file in $(db.gsrefsdir) or in $(db.chloerefsdir)!"
msg = "unable to find $(refID) fasta file in $(db.gsrefsdir)!"
@error msg
throw(ArgumentError(msg))
end
Expand All @@ -77,7 +59,7 @@ function get_single_reference!(db::ReferenceDb, refID::AbstractString, reference
read!(reader, ref)
sffpath = path[1:findlast('.', path)] * "sff" #assumes fasta files and sff files differ only by the file name extension
if !isfile(sffpath)
msg = "unable to find $(refID) sff file in $(db.gsrefsdir) or in $(db.chloerefsdir)!"
msg = "unable to find $(refID) sff file in $(db.gsrefsdir)!"
@error msg
throw(ArgumentError(msg))
end
Expand All @@ -87,11 +69,11 @@ function get_single_reference!(db::ReferenceDb, refID::AbstractString, reference
end
end

const KWARGS = ["numgsrefs", "numchloerefs", "sensitivity", "to_gff3", "nofilter"]
const KWARGS = ["numgsrefs", "sensitivity", "to_gff3", "nofilter"]

function ChloeConfig(; numgsrefs=DEFAULT_NUMGSREFS, numchloerefs=DEFAULT_NUMCHLOEREFS, sensitivity=DEFAULT_SENSITIVITY,
function ChloeConfig(; numgsrefs=DEFAULT_NUMGSREFS, sensitivity=DEFAULT_SENSITIVITY,
to_gff3::Bool=false, nofilter::Bool=false)
return ChloeConfig(numgsrefs, numchloerefs, sensitivity, to_gff3, nofilter)
return ChloeConfig(numgsrefs, sensitivity, to_gff3, nofilter)
end

# needs to be V <: Any since this is comming from a JSON blob
Expand All @@ -108,22 +90,17 @@ function ChloeConfig(dict::Dict{String,V} where {V<:Any})
return ChloeConfig(; Dict(Symbol(k) => cvt(k, v) for (k, v) in dict if k in KWARGS)...)
end
function Base.show(io::IO, c::ChloeConfig)
print(io, "ChloeConfig[numgsrefs=$(c.numgsrefs), numchloerefs=$(c.numchloerefs) sensitivity=$(c.sensitivity), nofilter=$(c.nofilter)]")
print(io, "ChloeConfig[numgsrefs=$(c.numgsrefs), sensitivity=$(c.sensitivity), nofilter=$(c.nofilter)]")
end

function verify_refs(gsrefsdir, chloerefsdir, template)
function verify_refs(gsrefsdir, template)
# used by master process to check reference directory
# *before* starting worker processes...
if !isdir(gsrefsdir)
msg = "Reference directory $(gsrefsdir) is not a directory!"
@error msg
throw(ArgumentError(msg))
end
# if !isdir(chloerefsdir)
# msg = "Reference directory $(chloerefsdir) is not a directory!"
# @error msg
# throw(ArgumentError(msg))
# end
end

function read_single_reference!(refdir::String, refID::AbstractString, reference_feature_counts::Dict{String,Int})::SingleReference
Expand Down

0 comments on commit ce13c19

Please sign in to comment.