Skip to content

Commit

Permalink
add memory exps
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebd99 committed Oct 18, 2023
1 parent b35dcf6 commit bb62125
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 11 deletions.
14 changes: 9 additions & 5 deletions Experiments/Scripts/cycle_prob_exps.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,19 @@ datasets = [aids, yeast, hprd, dblp, youtube, wordnet]
experiment_params = Vector{ExperimentParams}()
build_params = Vector{ExperimentParams}()
for dataset in datasets
push!(build_params, ExperimentParams(dataset=dataset))
push!(build_params, ExperimentParams(dataset=dataset,
num_colors=16,
label_refining_rounds=2))
for only_shortest_path_cycle in [false, true]
push!(experiment_params, ExperimentParams(dataset=dataset,
only_shortest_path_cycle=only_shortest_path_cycle))
num_colors=16,
label_refining_rounds=2,
only_shortest_path_cycle=only_shortest_path_cycle))
end
end

#build_experiments(build_params)
build_experiments(build_params)

#run_estimation_experiments(experiment_params)
run_estimation_experiments(experiment_params)

graph_grouped_box_plot(experiment_params; grouping=cycle_stats, filename="cycle_stats_exps")
graph_grouped_box_plot(experiment_params; grouping=cycle_stats, filename="cycle_stats_exps_w_refining")
20 changes: 20 additions & 0 deletions Experiments/Scripts/memory_exps.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

using Profile
include("../Experiments.jl")

#datasets = [aids, yeast, hprd, dblp, youtube, wordnet]
datasets = [aids, yeast, hprd, dblp, youtube, wordnet]
num_colors = [4, 8, 16, 32, 64, 128]
experiment_params = Vector{ExperimentParams}()
build_params = Vector{ExperimentParams}()
for dataset in datasets
for n in num_colors
push!(build_params, ExperimentParams(dataset=dataset, num_colors=n))
end
end
build_experiments(build_params)

graph_grouped_bar_plot(build_params; grouping=number_of_colors,
y_type=memory_footprint,
y_lims=[1, 10000],
filename="memory_size_vs_colors")
10 changes: 7 additions & 3 deletions Experiments/build_color_summaries.jl
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
function build_experiments(experiment_params_list::Vector{ExperimentParams})
build_times = [("Dataset", "Partitioner", "NumColors", "BuildTime")]
for experiment_params in experiment_params_list
build_times = [("Dataset", "Partitioner", "NumColors", "BuildTime", "MemoryFootprint")]
dataset = experiment_params.dataset
summary_params = experiment_params.summary_params
data = load_dataset(dataset)
summary_name = params_to_summary_filename(experiment_params)
summary_file_location = "Experiments/SerializedSummaries/" * summary_name
println("Building Color Summary: ", summary_name)
results = @timed generate_color_summary(data, summary_params; verbose=1)
summary_size = Base.summarysize(results.value)
serialize(summary_file_location, results.value)
push!(build_times, (string(dataset), string(summary_params.partitioner),
string(summary_params.num_colors), string(results.time)))
push!(build_times, (string(dataset),
string(summary_params.partitioner),
string(summary_params.num_colors),
string(results.time),
string(summary_size)))
results_filename = params_to_results_filename(experiment_params)
result_file_location = "Experiments/Results/Build_" * results_filename
writedlm(result_file_location, build_times, ",")
Expand Down
76 changes: 73 additions & 3 deletions Experiments/graph_results.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
@enum GROUP dataset technique cycle_size summary_paths inference_paths query_type sampling_type cycle_stats
@enum GROUP dataset technique cycle_size summary_paths inference_paths query_type sampling_type cycle_stats number_of_colors
#todo: query type

@enum VALUE estimate_error runtime
@enum VALUE estimate_error runtime memory_footprint

function graph_grouped_box_plot(experiment_params_list::Vector{ExperimentParams};
x_type::GROUP=dataset, y_type::VALUE=estimate_error,
Expand Down Expand Up @@ -43,14 +43,82 @@ function graph_grouped_box_plot(experiment_params_list::Vector{ExperimentParams}
# See this: https://discourse.julialang.org/t/deactivate-plot-display-to-avoid-need-for-x-server/19359/15
ENV["GKSwstype"]="100"
gbplot = groupedboxplot(x_values, y_values, group = groups, yscale =:log10,
ylims=[10^-13, 10^11], yticks=[10^-10, 10^-5, 1, 10^5, 10^10],
ylims=[10^-13, 10^11], yticks=[10^-10, 10^-5, 10^-2, 1, 10^2, 10^5, 10^10],
legend = :outertopleft, size = (1000, 600))
x_label !== nothing && xlabel!(gbplot, x_label)
y_label !== nothing && ylabel!(gbplot, y_label)
plotname = (isnothing(filename)) ? results_filename * ".png" : filename * ".png"
savefig(gbplot, "Experiments/Results/Figures/" * plotname)
end

function graph_grouped_bar_plot(experiment_params_list::Vector{ExperimentParams};
x_type::GROUP=dataset,
y_type::VALUE=estimate_error,
grouping::GROUP=technique,
x_label=nothing,
y_label=nothing,
y_lims=[0, 10],
filename=nothing)
# for now let's just use the dataset as the x-values and the cycle size as the groups
x_values = []
y_values = Float64[]
groups = []
for experiment_params in experiment_params_list
# load the results
results_filename = params_to_results_filename(experiment_params)
prefix = "Experiments/Results/Estimation_"
if y_type == memory_footprint
prefix = "Experiments/Results/Build_"
end
results_path = prefix * results_filename
results_df = CSV.read(results_path, DataFrame; normalizenames=true)

# get the x_value and grouping (same for all results in this experiment param)
println(results_df)
# keep track of the data points
for i in 1:nrow(results_df)
current_x = x_type == query_type ? results_df[i, :QueryType] : get_value_from_param(experiment_params, x_type)
current_group = grouping == query_type ? results_df[i, :QueryType] : get_value_from_param(experiment_params, grouping)
current_y = 0
if y_type == estimate_error
current_y = results_df[i, :Estimate] / results_df[i, :TrueCard]
elseif y_type == memory_footprint
current_y = results_df[i, :MemoryFootprint]/(10^6)
else
# y_type == runtime
current_y = results_df[i, :EstimationTime]
end
# push the errors and their groupings into the correct vector
push!(x_values, current_x)
push!(y_values, current_y)
push!(groups, current_group)
end
end
results_filename = params_to_results_filename(experiment_params_list[1])
println("starting graphs")

# This seems to be necessary for using Plots.jl outside of the ipynb framework.
# See this: https://discourse.julialang.org/t/deactivate-plot-display-to-avoid-need-for-x-server/19359/15
ENV["GKSwstype"]="100"
println(x_values)
println(y_values)
println(groups)
gbplot = StatsPlots.groupedbar(x_values,
y_values,
group = groups,
# yscale =:log10,
ylims=y_lims,
legend = :outertopleft,
size = (1000, 600))
x_label !== nothing && xlabel!(gbplot, x_label)
y_label !== nothing && ylabel!(gbplot, y_label)
plotname = (isnothing(filename)) ? results_filename * ".png" : filename * ".png"
savefig(gbplot, "Experiments/Results/Figures/" * plotname)
end




# default to grouping by dataset
function get_value_from_param(experiment_param::ExperimentParams, value_type::GROUP)
if value_type == dataset
Expand All @@ -65,6 +133,8 @@ function get_value_from_param(experiment_param::ExperimentParams, value_type::GR
return experiment_param.sampling_strategy
elseif value_type == cycle_stats
return experiment_param.only_shortest_path_cycle
elseif value_type == number_of_colors
return experiment_param.summary_params.num_colors
else
# default to grouping by technique
return (experiment_param.summary_params.partitioner, experiment_param.summary_params.label_refining_rounds)
Expand Down

0 comments on commit bb62125

Please sign in to comment.