Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LinuxPerf extension for branch + instruction counts #375

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,14 @@ jobs:
fail-fast: false
matrix:
version:
- '1.6'
- '1.10'
- '1'
- 'nightly'
arch:
- x64
os:
- ubuntu-latest
include:
- version: '1.7'
arch: x64
os: ubuntu-20.04
- version: '1.8'
arch: x64
os: ubuntu-22.04
- version: '1.9'
arch: x64
os: ubuntu-22.04
Expand Down
11 changes: 9 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[weakdeps]
LinuxPerf = "b4c46c6c-4fb0-484d-a11a-41bc3392d094"

[extensions]
LinuxPerfExt = "LinuxPerf"

[compat]
Aqua = "0.8"
Compat = ">= 4.11.0"
Expand All @@ -22,7 +28,8 @@ Profile = "<0.0.1, 1"
Statistics = "<0.0.1, 1"
Test = "<0.0.1, 1"
UUIDs = "<0.0.1, 1"
julia = "1.6"
julia = "1.9"
LinuxPerf = ">= 0.4"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This type of unbound specifiers aren't accepted in the registry

Suggested change
LinuxPerf = ">= 0.4"
LinuxPerf = "0.4"


[extras]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
Expand All @@ -31,4 +38,4 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Aqua", "JuliaFormatter", "Statistics", "Test"]
test = ["Aqua", "JuliaFormatter", "Statistics", "Test", "LinuxPerf"]
49 changes: 49 additions & 0 deletions ext/LinuxPerfExt/LinuxPerfExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
module LinuxPerfExt

import BenchmarkTools: PerfInterface
import LinuxPerf: LinuxPerf, PerfBench, EventGroup, EventType
import LinuxPerf: enable!, disable!, enable_all!, disable_all!, close, read!

function interface()
let g = try
EventGroup([EventType(:hw, :instructions), EventType(:hw, :branches)])
catch
# If perf is not working on the system, the above constructor will throw an
# ioctl or perf_event_open error (after presenting a warning to the user)
return PerfInterface()
end
close(g)
length(g.fds) != 2 && return PerfInterface()
end

# If we made it here, perf seems to be working on this system
return PerfInterface(;
setup=() ->
let g = EventGroup([EventType(:hw, :instructions), EventType(:hw, :branches)])
PerfBench(0, EventGroup[g])
end,
start=(bench) -> enable_all!(),
stop=(bench) -> disable_all!(),
# start=(bench) -> enable!(bench),
# stop=(bench) -> disable!(bench),
teardown=(bench) -> close(bench),
read=(bench) -> let g = only(bench.groups)
(N, time_enabled, time_running, insts, branches) = read!(
g.leader_io, Vector{UInt64}(undef, 5)
)
if 2 * time_running <= time_enabled
# enabled less than 50% of the time
# (most likely due to PMU contention with other perf events)
return (NaN, NaN)
else
# account for partially-active measurement
k = time_enabled / time_running
estimated_instructions = Float64(insts) * k
estimated_branches = Float64(branches) * k
return (estimated_instructions, estimated_branches)
end
end,
)
end

end
2 changes: 2 additions & 0 deletions src/BenchmarkTools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ export loadparams!
include("trials.jl")

export gctime,
instructions,
branches,
memory,
allocs,
params,
Expand Down
57 changes: 48 additions & 9 deletions src/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,24 @@ macro benchmarkable(args...)
end
end

struct PerfInterface
setup::Function
start::Function
stop::Function
read::Function
teardown::Function

function PerfInterface(;
setup=Returns(nothing),
start=Returns(nothing),
stop=Returns(nothing),
read=Returns((NaN, NaN)),
teardown=Returns(nothing),
)
return new(setup, start, stop, read, teardown)
end
end

# `eval` an expression that forcibly defines the specified benchmark at
# top-level in order to allow transfer of locally-scoped variables into
# benchmark scope.
Expand Down Expand Up @@ -553,6 +571,8 @@ function generate_benchmark_definition(
end
)
end
ext = Base.get_extension(BenchmarkTools, :LinuxPerfExt)
LinuxPerf = isnothing(ext) ? PerfInterface() : ext.interface()
return Core.eval(
eval_module,
quote
Expand All @@ -563,17 +583,34 @@ function generate_benchmark_definition(
$(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
)
$(setup)
__perf_bench = $(LinuxPerf.setup)()
__gcdiff = nothing
__return_val = nothing
__sample_time::Int64 = 0
__sample_instructions::Float64 = 0
__sample_branches::Float64 = 0
__evals = __params.evals
__gc_start = Base.gc_num()
__start_time = time_ns()
__return_val = $(invocation)
for __iter in 2:__evals
$(invocation)
try
__gc_start = Base.gc_num()
$(LinuxPerf.start)(__perf_bench)
__start_time = time_ns()
__return_val = $(invocation)
for __iter in 2:__evals
$(invocation)
end
__sample_time = time_ns() - __start_time
$(LinuxPerf.stop)(__perf_bench)
__gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
__sample_instructions, __sample_branches = $(LinuxPerf.read)(
__perf_bench
)
finally
$(LinuxPerf.teardown)(__perf_bench)
$(teardown)
end
__sample_time = time_ns() - __start_time
__gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
$(teardown)
__time = max((__sample_time / __evals) - __params.overhead, 0.001)
__instructions = max(__sample_instructions / __evals, 0.0) # may be NaN
__branches = max(__sample_branches / __evals, 0.0) # may be NaN
__gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
__memory = Int(Base.fld(__gcdiff.allocd, __evals))
__allocs = Int(
Expand All @@ -585,7 +622,9 @@ function generate_benchmark_definition(
__evals,
),
)
return __time, __gctime, __memory, __allocs, __return_val
return __time,
__instructions, __branches, __gctime, __memory, __allocs,
__return_val
end
$BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params))
end,
Expand Down
2 changes: 2 additions & 0 deletions src/groups.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ Base.min(groups::BenchmarkGroup...) = mapvals(min, groups...)
Base.max(groups::BenchmarkGroup...) = mapvals(max, groups...)

Base.time(group::BenchmarkGroup) = mapvals(time, group)
instructions(group::BenchmarkGroup) = mapvals(instructions, group)
branches(group::BenchmarkGroup) = mapvals(branches, group)
gctime(group::BenchmarkGroup) = mapvals(gctime, group)
memory(group::BenchmarkGroup) = mapvals(memory, group)
allocs(group::BenchmarkGroup) = mapvals(allocs, group)
Expand Down
29 changes: 27 additions & 2 deletions src/parameters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,14 @@ mutable struct Parameters
gctrial::Bool
gcsample::Bool
time_tolerance::Float64
instruction_tolerance::Float64
branch_tolerance::Float64
memory_tolerance::Float64
end

const DEFAULT_PARAMETERS = Parameters(5.0, 10000, 1, false, 0, true, false, 0.05, 0.01)
const DEFAULT_PARAMETERS = Parameters(
5.0, 10000, 1, false, 0, true, false, 0.05, 0.05, 0.05, 0.01
)

function Parameters(;
seconds=DEFAULT_PARAMETERS.seconds,
Expand All @@ -28,6 +32,8 @@ function Parameters(;
gctrial=DEFAULT_PARAMETERS.gctrial,
gcsample=DEFAULT_PARAMETERS.gcsample,
time_tolerance=DEFAULT_PARAMETERS.time_tolerance,
instruction_tolerance=DEFAULT_PARAMETERS.instruction_tolerance,
branch_tolerance=DEFAULT_PARAMETERS.branch_tolerance,
memory_tolerance=DEFAULT_PARAMETERS.memory_tolerance,
)
return Parameters(
Expand All @@ -39,6 +45,8 @@ function Parameters(;
gctrial,
gcsample,
time_tolerance,
instruction_tolerance,
branch_tolerance,
memory_tolerance,
)
end
Expand All @@ -52,6 +60,8 @@ function Parameters(
gctrial=nothing,
gcsample=nothing,
time_tolerance=nothing,
instruction_tolerance=nothing,
branch_tolerance=nothing,
memory_tolerance=nothing,
)
params = Parameters()
Expand All @@ -63,6 +73,13 @@ function Parameters(
params.gcsample = gcsample != nothing ? gcsample : default.gcsample
params.time_tolerance =
time_tolerance != nothing ? time_tolerance : default.time_tolerance
params.instruction_tolerance = if instruction_tolerance != nothing
instruction_tolerance
else
default.instruction_tolerance
end
params.branch_tolerance =
branch_tolerance != nothing ? branch_tolerance : default.branch_tolerance
params.memory_tolerance =
memory_tolerance != nothing ? memory_tolerance : default.memory_tolerance
return params::BenchmarkTools.Parameters
Expand All @@ -76,6 +93,8 @@ function Base.:(==)(a::Parameters, b::Parameters)
a.gctrial == b.gctrial &&
a.gcsample == b.gcsample &&
a.time_tolerance == b.time_tolerance &&
a.instruction_tolerance == b.instruction_tolerance &&
a.branch_tolerance == b.branch_tolerance &&
a.memory_tolerance == b.memory_tolerance
end

Expand All @@ -89,6 +108,8 @@ function Base.copy(p::Parameters)
p.gctrial,
p.gcsample,
p.time_tolerance,
p.instruction_tolerance,
p.branch_tolerance,
p.memory_tolerance,
)
end
Expand All @@ -109,7 +130,11 @@ end

@noinline function overhead_sample(evals)
start_time = time_ns()
for _ in 1:evals
try
for _ in 1:evals
nullfunc()
end
finally
nullfunc()
end
sample_time = time_ns() - start_time
Expand Down
34 changes: 32 additions & 2 deletions src/serialization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,38 @@ function recover(x::Vector)
else
xsi = if fn == "evals_set" && !haskey(fields, fn)
false
elseif fn in ("seconds", "overhead", "time_tolerance", "memory_tolerance") &&
fields[fn] === nothing
elseif fn in ("instructions", "branches")
# JSON spec doesn't support NaN, so handle it specially here
if !haskey(fields, fn)
if ft === Vector{Float64}
Float64[NaN for _ in length(fields["time"])]
elseif ft === Float64
NaN
else
@assert false
end
else
if ft === Vector{Float64}
Float64[
elem === nothing ? NaN : convert(Float64, elem) for
elem in fields[fn]
]
else
fields[fn] === nothing ? NaN : convert(ft, fields[fn])
end
end
elseif fn == "instruction_tolerance" && !haskey(fields, fn)
DEFAULT_PARAMETERS.instruction_tolerance
elseif fn == "branch_tolerance" && !haskey(fields, fn)
DEFAULT_PARAMETERS.branch_tolerance
elseif fn in (
"seconds",
"overhead",
"time_tolerance",
"instruction_tolerance",
"branch_tolerance",
"memory_tolerance",
) && fields[fn] === nothing
# JSON spec doesn't support Inf
# These fields should all be >= 0, so we can ignore -Inf case
typemax(ft)
Expand Down
Loading
Loading