Skip to content

Commit

Permalink
Use standard library calls.
Browse files Browse the repository at this point in the history
  • Loading branch information
orenbenkiki committed Apr 11, 2024
1 parent 1e46f2c commit 6974030
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 150 deletions.
47 changes: 46 additions & 1 deletion Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

julia_version = "1.10.2"
manifest_format = "2.0"
project_hash = "a43125d8a2b1a6ed36831f1ca19befab2b02d57d"
project_hash = "fcaafb7f3c2c1e84d828c7b23f10b06952b4eee1"

[[deps.ArgTools]]
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
Expand All @@ -26,6 +26,12 @@ git-tree-sha1 = "18d7f3e82c1a80dd38c16453b8fd3f0a7db92f23"
uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597"
version = "0.9.7"

[[deps.Clustering]]
deps = ["Distances", "LinearAlgebra", "NearestNeighbors", "Printf", "Random", "SparseArrays", "Statistics", "StatsBase"]
git-tree-sha1 = "9ebb045901e9bbf58767a9f34ff89831ed711aae"
uuid = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
version = "0.15.7"

[[deps.CodecZlib]]
deps = ["TranscodingStreams", "Zlib_jll"]
git-tree-sha1 = "59939d8a997469ee05c4b4944560a820f9ba0d73"
Expand Down Expand Up @@ -123,6 +129,20 @@ git-tree-sha1 = "9e2f36d3c96a820c678f2f1f1782582fcf685bae"
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
version = "1.9.1"

[[deps.Distances]]
deps = ["LinearAlgebra", "Statistics", "StatsAPI"]
git-tree-sha1 = "66c4c81f259586e8f002eacebc177e1fb06363b0"
uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
version = "0.10.11"

[deps.Distances.extensions]
DistancesChainRulesCoreExt = "ChainRulesCore"
DistancesSparseArraysExt = "SparseArrays"

[deps.Distances.weakdeps]
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[[deps.Distributed]]
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Expand Down Expand Up @@ -350,6 +370,12 @@ git-tree-sha1 = "0ae91efac93c3859f5c812a24c9468bb9e50b028"
uuid = "86f7a689-2022-50b4-a561-43c23ac3c673"
version = "0.10.1"

[[deps.NearestNeighbors]]
deps = ["Distances", "StaticArrays"]
git-tree-sha1 = "ded64ff6d4fdd1cb68dfcbb818c69e144a5b2e4c"
uuid = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
version = "0.4.16"

[[deps.NetworkOptions]]
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
version = "1.2.0"
Expand Down Expand Up @@ -461,6 +487,25 @@ deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
version = "1.10.0"

[[deps.StaticArrays]]
deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"]
git-tree-sha1 = "bf074c045d3d5ffd956fa0a461da38a44685d6b2"
uuid = "90137ffa-7385-5640-81b9-e52037218182"
version = "1.9.3"

[deps.StaticArrays.extensions]
StaticArraysChainRulesCoreExt = "ChainRulesCore"
StaticArraysStatisticsExt = "Statistics"

[deps.StaticArrays.weakdeps]
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[[deps.StaticArraysCore]]
git-tree-sha1 = "36b3d696ce6366023a0ea192b4cd442268995a0d"
uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
version = "1.4.2"

[[deps.Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Expand Down
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ version = "0.1.0"

[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
Daf = "1375bf9c-a47d-45a1-aad5-626dd8629d98"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Expand Down
2 changes: 1 addition & 1 deletion docs/v0.1.0/.documenter-siteinfo.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"documenter":{"julia_version":"1.10.2","generation_timestamp":"2024-04-10T14:50:53","documenter_version":"1.3.0"}}
{"documenter":{"julia_version":"1.10.2","generation_timestamp":"2024-04-11T20:07:03","documenter_version":"1.3.0"}}
166 changes: 18 additions & 148 deletions src/spheres.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export compute_spheres!

using Base.Iterators
using Base.Threads
using Clustering
using Daf
using Daf.GenericLogging
using Daf.GenericTypes
Expand Down Expand Up @@ -67,50 +68,35 @@ CONTRACT
daf["/ gene / metacell : fraction % Log base 2 eps $(gene_fraction_regularization)"].array
genes_metacells_total_UMIs = get_matrix(daf, "gene", "metacell", "total_UMIs").array

check_efficient_action("compute_spheres", Columns, "genes_metacells_log_fraction", genes_metacells_log_fraction)
check_efficient_action("compute_spheres", Columns, "genes_metacells_total_UMIs", genes_metacells_total_UMIs)

sorted_distance_pairs = compute_sorted_distance_pairs(
daf,
max_sphere_fold_factor,
min_significant_gene_UMIs,
genes_metacells_log_fraction,
genes_metacells_total_UMIs,
)

spheres_of_metacells = compute_sphere_of_metacells(
sorted_distance_pairs,
max_sphere_fold_factor,
min_significant_gene_UMIs,
genes_metacells_log_fraction,
genes_metacells_total_UMIs,
)
@debug "compute..."

distances = compute_distances(min_significant_gene_UMIs, genes_metacells_log_fraction, genes_metacells_total_UMIs)
clusters = hclust(distances; linkage = :complete, uplo = :U) # NOJET
spheres_of_metacells = Vector{UInt32}(cutree(clusters; h = max_sphere_fold_factor))
metacells_of_spheres = collect_group_members(spheres_of_metacells)
sphere_names = group_names(daf, "metacell", metacells_of_spheres; prefix = "S")

@debug "group..."
@debug "write..."

sphere_names = group_names(daf, "metacell", metacells_of_spheres; prefix = "S")
add_axis!(daf, "sphere", sphere_names)
set_vector!(daf, "metacell", "sphere", sphere_names[spheres_of_metacells])

return nothing
end

@logged function compute_sorted_distance_pairs( # untested
daf::DafReader,
max_sphere_fold_factor::AbstractFloat,
@logged function compute_distances( # untested
min_significant_gene_UMIs::Unsigned,
genes_metacells_log_fraction::AbstractMatrix{<:AbstractFloat},
genes_metacells_total_UMIs::AbstractMatrix{<:Unsigned},
)::Vector{Tuple{Float32, UInt32, UInt32}}
n_metacells = axis_length(daf, "metacell")
n_pairs = div(n_metacells * (n_metacells - 1), 2)
)::Matrix{Float32}
check_efficient_action("compute_spheres", Columns, "genes_metacells_log_fraction", genes_metacells_log_fraction)
check_efficient_action("compute_spheres", Columns, "genes_metacells_total_UMIs", genes_metacells_total_UMIs)
@assert size(genes_metacells_log_fraction) == size(genes_metacells_total_UMIs)

distance_pairs = Vector{Tuple{Float32, UInt32, UInt32}}(undef, n_pairs)
next_pair_index = Atomic{Int64}(1)
n_metacells = size(genes_metacells_log_fraction, 2)
distances = Matrix{Float32}(undef, n_metacells, n_metacells)
distances[1, 1] = 0.0

@debug "collect close distance pairs..."
@threads for base_metacell in reverse(2:n_metacells)
@views genes_log_fraction_of_base_metacell = genes_metacells_log_fraction[:, base_metacell]
@views genes_log_fraction_of_other_metacells = genes_metacells_log_fraction[:, 1:(base_metacell - 1)]
Expand All @@ -121,126 +107,10 @@ end
abs.(genes_log_fraction_of_other_metacells .- genes_log_fraction_of_base_metacell) .*
(genes_total_UMIs_of_other_metacells .+ genes_total_UMIs_of_base_metacell .>= min_significant_gene_UMIs)
distances_of_other_metacells = maximum(fold_factors_of_other_metacells; dims = 1) # NOJET
is_close_of_other_metacells = distances_of_other_metacells .<= max_sphere_fold_factor
n_close = sum(is_close_of_other_metacells)
pair_index = atomic_add!(next_pair_index, n_close)
for (other_metacell, is_close) in enumerate(is_close_of_other_metacells)
if is_close
distance = distances_of_other_metacells[other_metacell]
distance_pairs[pair_index] = (distance, base_metacell, other_metacell)
pair_index += 1
end
end
end

n_close = next_pair_index[] - 1
resize!(distance_pairs, n_close)
@debug "collected $(n_close) close distance pairs, sort..."
sort!(distance_pairs)
return distance_pairs
end

@logged function compute_sphere_of_metacells( # untested
sorted_distance_pairs::Vector{Tuple{Float32, UInt32, UInt32}},
max_sphere_fold_factor::AbstractFloat,
min_significant_gene_UMIs::Unsigned,
genes_metacells_log_fraction::AbstractMatrix{<:AbstractFloat},
genes_metacells_total_UMIs::AbstractMatrix{<:Unsigned},
)::Vector{UInt32}
n_metacells = size(genes_metacells_log_fraction, 2)
spheres_of_metacells = collect(UInt32, 1:n_metacells)
distinct_spheres_set = Set{Tuple{UInt32, UInt32}}()
metacells_of_spheres = Vector{Vector{UInt32}}()
for metacell in 1:n_metacells
push!(metacells_of_spheres, UInt32[metacell])
end

for (_, first_metacell, second_metacell) in sorted_distance_pairs
first_sphere = spheres_of_metacells[first_metacell]
second_sphere = spheres_of_metacells[second_metacell]
if first_sphere != second_sphere
low_sphere = min(first_sphere, second_sphere)
high_sphere = max(first_sphere, second_sphere)
spheres_key = (low_sphere, high_sphere)
if spheres_key in distinct_spheres_set
continue
end
if !can_merge(
low_sphere,
high_sphere,
metacells_of_spheres,
max_sphere_fold_factor,
min_significant_gene_UMIs,
genes_metacells_log_fraction,
genes_metacells_total_UMIs,
)
push!(distinct_spheres_set, spheres_key)
end
merge_spheres(low_sphere, high_sphere, spheres_of_metacells, metacells_of_spheres)
end
distances[base_metacell, base_metacell] = 0.0
distances[1:(base_metacell - 1), base_metacell] = distances_of_other_metacells
end

compact_groups!(spheres_of_metacells)
return spheres_of_metacells
end

function can_merge( # untested
low_sphere::Integer,
high_sphere::Integer,
metacells_of_spheres::Vector{Vector{UInt32}},
max_sphere_fold_factor::AbstractFloat,
min_significant_gene_UMIs::Unsigned,
genes_metacells_log_fraction::AbstractMatrix{<:AbstractFloat},
genes_metacells_total_UMIs::AbstractMatrix{<:Unsigned},
)::Bool
low_metacells = metacells_of_spheres[low_sphere]
high_metacells = metacells_of_spheres[high_sphere]
for (low_metacell, high_metacell) in product(low_metacells, high_metacells)
if compute_distance(
low_metacell,
high_metacell,
min_significant_gene_UMIs,
genes_metacells_log_fraction,
genes_metacells_total_UMIs,
) > max_sphere_fold_factor
return false
end
end

return true
end

function compute_distance( # untested
first_metacell::Integer,
second_metacell::Integer,
min_significant_gene_UMIs::Unsigned,
genes_metacells_log_fraction::AbstractMatrix{<:AbstractFloat},
genes_metacells_total_UMIs::AbstractMatrix{<:Unsigned},
)::Float32
@views genes_log_fraction_of_first_metacell = genes_metacells_log_fraction[:, first_metacell] # NOJET
@views genes_log_fraction_of_second_metacell = genes_metacells_log_fraction[:, second_metacell]
@views genes_total_UMIs_of_first_metacell = genes_metacells_total_UMIs[:, first_metacell] # NOJET
@views genes_total_UMIs_of_second_metacell = genes_metacells_total_UMIs[:, second_metacell]

return maximum(
abs.(
genes_log_fraction_of_first_metacell .- genes_log_fraction_of_second_metacell #
)[genes_total_UMIs_of_first_metacell .+ genes_total_UMIs_of_second_metacell .>= min_significant_gene_UMIs],
)
end

function merge_spheres( # untested
low_sphere::Integer,
high_sphere::Integer,
spheres_of_metacells::Vector{UInt32},
metacells_of_spheres::Vector{Vector{UInt32}},
)::Nothing
low_metacells = metacells_of_spheres[low_sphere]
high_metacells = metacells_of_spheres[high_sphere]
spheres_of_metacells[high_metacells] .= low_sphere
append!(low_metacells, high_metacells)
empty!(high_metacells)
return nothing
return distances
end

end # module

0 comments on commit 6974030

Please sign in to comment.