diff --git a/Project.toml b/Project.toml index 8b3c177..e61bf6a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PProf" uuid = "e4faabce-9ead-11e9-39d9-4379958e3056" authors = ["Valentin Churavy ", "Nathan Daly "] -version = "3.0.0" +version = "3.1.0" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" diff --git a/src/PProf.jl b/src/PProf.jl index df66087..a97f73f 100644 --- a/src/PProf.jl +++ b/src/PProf.jl @@ -19,7 +19,8 @@ clear include(joinpath("..", "lib", "perftools", "perftools.jl")) -import .perftools.profiles: ValueType, Sample, Function, Location, Line +import .perftools.profiles: ValueType, Sample, Function, + Location, Line, Label const PProfile = perftools.profiles.Profile const proc = Ref{Union{Base.Process, Nothing}}(nothing) @@ -47,6 +48,7 @@ using Base.StackTraces: StackFrame web = true, webhost = "localhost", webport = 57599, out = "profile.pb.gz", from_c = true, full_signatures = true, drop_frames = "", keep_frames = "", ui_relative_percentages = true, sampling_delay = nothing, + tagroot = "taskid,threadid" ) pprof(FlameGraphs.flamegraph(); kwargs...) @@ -77,6 +79,14 @@ You can also use `PProf.refresh(file="...")` to open a new file in the server. - `from_c::Bool`: If `false`, exclude frames that come from from_c. Defaults to `true`. - `full_signatures::Bool`: If `true`, methods are printed as signatures with full argument types. If `false`, as only names. E.g. `eval(::Module, ::Any)` vs `eval`. +- `tagroot`: Set which metadata tags you want to turn into root frames for the profile. This + is used to view the metadata tags in the Flamegraph view. This should be a + comma-separated string, chosing from the following metadata options: + - `taskid` + - `threadid` + - `thread_sleeping` + - `cycle_clock` + Defaults to `"taskid,threadid"`, grouping by taskid then threadid. - `drop_frames`: frames with function_name fully matching regexp string will be dropped from the samples, along with their successors. - `keep_frames`: frames with function_name fully matching regexp string will be kept, even if it matches drop_functions. @@ -95,15 +105,18 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, drop_frames::Union{Nothing, AbstractString} = nothing, keep_frames::Union{Nothing, AbstractString} = nothing, ui_relative_percentages::Bool = true, + tagroot::Union{Nothing, AbstractString} = "taskid,threadid", ) + has_meta = false if data === nothing data = if isdefined(Profile, :has_meta) - copy(Profile.fetch(include_meta = false)) + has_meta = true + copy(Profile.fetch(include_meta = true)) else copy(Profile.fetch()) end - elseif isdefined(Profile, :has_meta) && Profile.has_meta(data) - data = Profile.strip_meta(data) + elseif isdefined(Profile, :has_meta) + has_meta = Profile.has_meta(data) end lookup = lidict if lookup === nothing @@ -122,6 +135,8 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, enter!(string) = _enter!(string_table, string) enter!(::Nothing) = _enter!(string_table, "nothing") ValueType!(_type, unit) = ValueType(enter!(_type), enter!(unit)) + Label!(key, value, unit) = Label(key = enter!(key), num = value, num_unit = enter!(unit)) + Label!(key, value) = Label(key = enter!(key), str = enter!(string(value))) # Setup: enter!("") # NOTE: pprof requires first entry to be "" @@ -136,7 +151,6 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, sample_type = [ ValueType!("events", "count"), # Mandatory - ValueType!("stack_depth", "count") ] period_type = ValueType!("cpu", "nanoseconds") @@ -144,27 +158,64 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, keep_frames = isnothing(keep_frames) ? 0 : enter!(keep_frames) # start decoding backtraces location_id = Vector{eltype(data)}() - lastwaszero = true - for ip in data - # ip == 0x0 is the sentinel value for finishing a backtrace, therefore finising a sample - if ip == 0 + # All samples get the same value for CPU profiles. + value = [ + 1, # events + ] + + lastwaszero = true # (Legacy: used when has_meta = false) + + # The Profile data buffer is a big array, with each sample appended one after the other. + # Each sample now looks like this: + # | ip | ip | ip | meta1 | meta2 | meta3 | meta4| 0x0 | 0x0 | + # We iterate backwards, starting from the end, so that we don't encounter the metadata + # and mistake it for more ip addresses. For each sample, we skip the zeros, consume the + # metadata, then continue scanning the ip addresses, and when we hit another end of a + # block, we finish the sample we just consumed. + idx = length(data) + meta = nothing + while idx > 0 + # We handle the very first sample after the loop. + if has_meta && Profile.is_block_end(data, idx) + if meta !== nothing + # Finish last block + push!(samples, Sample(;location_id = reverse!(location_id), value = value, label = meta)) + location_id = Vector{eltype(data)}() + end + + # Consume all of the metadata entries in the buffer, and then position the IP + # at the idx for the actual ip. + thread_sleeping = data[idx - Profile.META_OFFSET_SLEEPSTATE] - 1 # "Sleeping" is recorded as 1 or 2, to avoid 0s, which indicate end-of-block. + cpu_cycle_clock = data[idx - Profile.META_OFFSET_CPUCYCLECLOCK] + taskid = data[idx - Profile.META_OFFSET_TASKID] + threadid = data[idx - Profile.META_OFFSET_THREADID] + + meta = Label[ + Label!("thread_sleeping", thread_sleeping != 0), + Label!("cycle_clock", cpu_cycle_clock, "nanoseconds"), + Label!("taskid", taskid), + Label!("threadid", threadid), + ] + idx -= (Profile.nmeta + 2) # skip all the metas, plus the 2 nulls that end a block. + continue + elseif !has_meta && data[idx] == 0 # Avoid creating empty samples + # ip == 0x0 is the sentinel value for finishing a backtrace (when meta is disabled), therefore finising a sample + # On some platforms, we sometimes get two 0s in a row for some reason... if lastwaszero @assert length(location_id) == 0 - continue + else + # Finish last block + push!(samples, Sample(;location_id = reverse!(location_id), value = value)) + location_id = Vector{eltype(data)}() + lastwaszero = true end - - # End of sample - value = [ - 1, # events - length(location_id), # stack_depth - ] - push!(samples, Sample(;location_id, value)) - location_id = Vector{eltype(data)}() - lastwaszero = true + idx -= 1 continue end + ip = data[idx] + idx -= 1 lastwaszero = false # A backtrace consists of a set of IP (Instruction Pointers), each IP points @@ -245,6 +296,15 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, push!(location_id, ip) end end + if length(data) > 0 + # Finish the very last sample + if has_meta + push!(samples, Sample(;location_id = reverse!(location_id), value = value, label = meta)) + else + push!(samples, Sample(;location_id = reverse!(location_id), value = value)) + end + location_id = Vector{eltype(data)}() + end # If from_c=false funcs and locs should NOT contain C functions prof = PProfile( @@ -269,8 +329,7 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing, end if web - refresh(webhost = webhost, webport = webport, file = out, - ui_relative_percentages = ui_relative_percentages) + refresh(; webhost, webport, file = out, ui_relative_percentages, tagroot) end out @@ -311,6 +370,7 @@ function refresh(; webhost::AbstractString = "localhost", webport::Integer = 57599, file::AbstractString = "profile.pb.gz", ui_relative_percentages::Bool = true, + tagroot::Union{AbstractString,Nothing} = "taskid,threadid", ) if proc[] === nothing @@ -324,7 +384,11 @@ function refresh(; webhost::AbstractString = "localhost", relative_percentages_flag = ui_relative_percentages ? "-relative_percentages" : "" proc[] = pprof_jll.pprof() do pprof_path - open(pipeline(`$pprof_path -http=$webhost:$webport $relative_percentages_flag $file`)) + if tagroot !== nothing && !isempty(tagroot) + open(pipeline(`$pprof_path -tagroot $tagroot -http=$webhost:$webport $relative_percentages_flag $file`)) + else + open(pipeline(`$pprof_path -http=$webhost:$webport $relative_percentages_flag $file`)) + end end end diff --git a/test/PProf.jl b/test/PProf.jl index 2868e67..7daee67 100644 --- a/test/PProf.jl +++ b/test/PProf.jl @@ -65,6 +65,42 @@ function load_prof_proto(file) open(io->decode(ProtoDecoder(GzipDecompressorStream(io)), PProf.perftools.profiles.Profile), file, "r") end +@testset "Corner Cases" begin + @testset "non-meta profile" begin + + @testset "0 sample profile" begin + prof = load_prof_proto(pprof(UInt64[], out=tempname(), web=false)) + @test length(prof.sample) == 0 + end + @testset "1 sample profile" begin + prof = load_prof_proto(pprof(UInt64[0xdeadbeef,0], out=tempname(), web=false)) + @test length(prof.sample) == 1 + end + + @testset "2 sample, 1 location profile" begin + prof = load_prof_proto(pprof(UInt64[0xdeadbeef,0, 0xdeadbeef, 0], out=tempname(), web=false)) + @test length(prof.sample) == 2 + @test length(prof.location) == 1 + end + end + @testset "with-meta profile" begin + @testset "1 sample profile" begin + data = UInt64[0xdeadbeef, 1, 1, 1, 1, 0, 0] + prof = load_prof_proto(pprof(data, out=tempname(), web=false)) + @test length(prof.sample) == 1 + end + + @testset "2 sample 1 location profile" begin + data = UInt64[0xdeadbeef, 1, 1, 1, 1, 0, 0, 0xdeadbeef, 1, 1, 1, 1, 0, 0] + prof = load_prof_proto(pprof(data, out=tempname(), web=false)) + @test length(prof.sample) == 2 + @test length(prof.location) == 1 + end + end +end + + +const HAS_META = isdefined(Profile, :has_meta) @testset "with_c" begin Profile.clear() @@ -74,12 +110,34 @@ end end sleep(2) end - for i in 1:2 + @testset for i in 1:4 if i == 1 - data = Profile.fetch() + if !HAS_META + continue + end + data = Profile.fetch(include_meta = true) + args = (data,) + elseif i == 2 + if !HAS_META + continue + end + data,lidict = Profile.retrieve(include_meta = true) + args = (data, lidict) + elseif i == 3 + # Ensure we are backwards compatible with older, non-meta profiles + if HAS_META + data = Profile.fetch(include_meta = false) + else + data = Profile.fetch() + end args = (data,) else - data,lidict = Profile.retrieve() + # Ensure we are backwards compatible with older, non-meta profiles + if HAS_META + data,lidict = Profile.retrieve(include_meta = false) + else + data,lidict = Profile.retrieve() + end args = (data, lidict) end @@ -135,6 +193,7 @@ end @testset "subprocess refresh" begin + PProf.kill() @pprof foo(10000, 5, []) current_proc = PProf.proc[]