Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add guesser #23

Merged
merged 2 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,13 @@ See `sort!` for an explanation of the keyword arguments `by`, `lt` and `rev`.
### `searchsortedfirstcorrelated(v::AbstractVector, x, guess)`

```julia
searchsortedfirstcorrelated(v::AbstractVector{T}, x, guess::T)
searchsortedfirstcorrelated(v::AbstractVector, x, guess)
```

An accelerated `findfirst` on sorted vectors using a bracketed search. Requires a `guess`
to start the search from.
to start the search from, which is either an integer or an instance of `Guesser`.

An analogous function `searchsortedlastcorrelated` exists.


Some benchmarks:
Expand Down
3 changes: 3 additions & 0 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ Pkg.add("FindFirstFunctions")
```@docs
FindFirstFunctions.findfirstequal
FindFirstFunctions.bracketstrictlymontonic
FindFirstFunctions.looks_linear
FindFirstFunctions.Guesser
FindFirstFunctions.searchsortedfirstcorrelated
FindFirstFunctions.searchsortedlastcorrelated
FindFirstFunctions.findfirstsortedequal
```

Expand Down
90 changes: 84 additions & 6 deletions src/FindFirstFunctions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -192,22 +192,100 @@ function bracketstrictlymontonic(
end

"""
searchsortedfirstcorrelated(v::AbstractVector{T}, x, guess)
looks_linear(v; threshold = 1e-2)

An accelerated `findfirst` on sorted vectors using a bracketed search. Requires a `guess::T`
Determine if the abscissae `v` are regularly distributed, taking the standard deviation of
the difference between the array of abscissae with respect to the straight line linking
its first and last elements, normalized by the range of `v`. If this standard deviation is
below the given `threshold`, the vector looks linear (return true). Internal function -
interface may change.
"""
function looks_linear(v; threshold = 1e-2)
length(v) <= 2 && return true
x_0, x_f = first(v), last(v)
N = length(v)
x_span = x_f - x_0
mean_x_dist = x_span / (N - 1)
norm_var =
sum((x_i - x_0 - (i - 1) * mean_x_dist)^2 for (i, x_i) in enumerate(v)) /
(N * x_span^2)
norm_var < threshold^2
end

"""
Guesser(v::AbstractVector; looks_linear_threshold = 1e-2)

Wrapper of the searched vector `v` which makes an informed guess
for `searchsorted*correlated` by either
- Exploiting that `v` is sufficiently evenly spaced
- Using the previous outcome of `searchsorted*correlated`
"""
struct Guesser{T<:AbstractVector}
v::T
idx_prev::Base.RefValue{Int}
linear_lookup::Bool
end

function Guesser(v::AbstractVector; looks_linear_threshold = 1e-2)
Guesser(v, Ref(1), looks_linear(v; threshold = looks_linear_threshold))
end

function (g::Guesser)(x)
(; v, idx_prev, linear_lookup) = g
if linear_lookup
f = (x - first(v)) / (last(v) - first(v))
if isinf(f)
f > 0 ? lastindex(v) : firstindex(v)
else
i_0, i_f = firstindex(v), lastindex(v)
round(typeof(firstindex(v)), f * (i_f - i_0) + i_0)
end
else
idx_prev[]
end
end

"""
searchsortedfirstcorrelated(v::AbstractVector, x, guess)

An accelerated `findfirst` on sorted vectors using a bracketed search. Requires a `guess::Union{<:Integer, Guesser}`
to start the search from.
"""
function searchsortedfirstcorrelated(v::AbstractVector, x, guess)
function searchsortedfirstcorrelated(v::AbstractVector, x, guess::T) where {T<:Integer}
lo, hi = bracketstrictlymontonic(v, x, guess, Base.Order.Forward)
searchsortedfirst(v, x, lo, hi, Base.Order.Forward)
end

function searchsortedlastcorrelated(v::AbstractVector, x, guess)
"""
searchsortedlastcorrelated(v::AbstractVector{T}, x, guess)

An accelerated `findlast` on sorted vectors using a bracketed search. Requires a `guess::Union{<:Integer, Guesser}`
to start the search from.
"""
function searchsortedlastcorrelated(v::AbstractVector, x, guess::T) where {T<:Integer}
lo, hi = bracketstrictlymontonic(v, x, guess, Base.Order.Forward)
searchsortedlast(v, x, lo, hi, Base.Order.Forward)
end

searchsortedfirstcorrelated(r::AbstractRange, x, _) = searchsortedfirst(r, x)
searchsortedlastcorrelated(r::AbstractRange, x, _) = searchsortedlast(r, x)
searchsortedfirstcorrelated(r::AbstractRange, x, ::Integer) = searchsortedfirst(r, x)
searchsortedlastcorrelated(r::AbstractRange, x, ::Integer) = searchsortedlast(r, x)

function searchsortedfirstcorrelated(
v::AbstractVector,
x,
guess::Guesser{T},
) where {T<:AbstractVector}
@assert v === guess.v
out = searchsortedfirstcorrelated(v, x, guess(x))
guess.idx_prev[] = out
out
end

function searchsortedlastcorrelated(v::T, x, guess::Guesser{T}) where {T<:AbstractVector}
@assert v === guess.v
out = searchsortedlastcorrelated(v, x, guess(x))
guess.idx_prev[] = out
out
end

end # module FindFirstFunctions
19 changes: 16 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,28 @@ using SafeTestsets, Test
if length(x) > 0
@test FindFirstFunctions.findfirstequal(x[begin], @view(x[begin:end])) === 1
@test FindFirstFunctions.findfirstequal(x[begin], @view(x[begin+1:end])) ===
nothing
nothing
@test FindFirstFunctions.findfirstequal(x[end], @view(x[begin:end-1])) ===
nothing
nothing
end
y = rand(Int)
ff = findfirst(==(y), x)
@test FindFirstFunctions.findfirstequal(y, x) === ff
ff === nothing && @test FindFirstFunctions.findfirstsortedequal(y, x) === nothing
ff === nothing &&
@test FindFirstFunctions.findfirstsortedequal(y, x) === nothing
end

end

@safetestset "Guesser" begin
using FindFirstFunctions:
Guesser, searchsortedfirstcorrelated, searchsortedlastcorrelated
v = collect(LinRange(0, 10, 4))
guesser_linear = Guesser(v)
guesser_prev = Guesser(v, Ref(1), false)
@test guesser_linear.linear_lookup
@test searchsortedfirstcorrelated(v, 4.0, guesser_linear) == 3
@test searchsortedlastcorrelated(v, 4.0, guesser_prev) == 2
@test guesser_prev.idx_prev[] == 2
end
end
Loading