Skip to content

Commit

Permalink
Random code push
Browse files Browse the repository at this point in the history
  • Loading branch information
Devesh Sarda committed Feb 13, 2024
1 parent 2f3fb98 commit b3b5019
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 8 deletions.
8 changes: 8 additions & 0 deletions simulator/configs/arvix_linear.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"dataset_name" : "ogbn_arxiv",
"features_stats" : {
"page_size" : "16 KB",
"feature_dimension" : 128,
"feature_size" : "float32"
}
}
9 changes: 9 additions & 0 deletions simulator/configs/arvix_random.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"dataset_name" : "ogbn_arxiv",
"features_stats" : {
"feature_layout" : "random",
"page_size" : "16 KB",
"feature_dimension" : 128,
"feature_size" : "float32"
}
}
9 changes: 8 additions & 1 deletion simulator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,14 @@ def main():

# Save the histogram
os.makedirs(os.path.dirname(arguments.save_path), exist_ok=True)
visualize_results(pages_loaded, arguments.save_path, arguments.graph_title, config["dataset_name"])
visualize_arguments = {
"pages_loaded": pages_loaded,
"save_path": arguments.save_path,
"graph_title": arguments.graph_title,
"total_space": features_loader.get_total_file_size(),
"dataset_name": config["dataset_name"],
}
visualize_results(visualize_arguments)


if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions simulator/src/features_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ def __init__(self, data_loader, features_stat):

def get_node_page(self, node_id):
node_location = self.node_location_map[node_id]
return int(node_id / self.nodes_per_page)
return int(node_location / self.nodes_per_page)

def get_total_file_size(self):
total_bytes = self.page_size * self.total_bytes
total_bytes = self.page_size * self.total_pages
return humanfriendly.format_size(total_bytes)
36 changes: 31 additions & 5 deletions simulator/src/visualizer.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,44 @@
import matplotlib.pyplot as plt
import os
import numpy as np


def visualize_results(pages_loaded, save_path, graph_title, dataset_name, num_bins=50):
def visualize_results(visualize_args, num_bins=60, x_range=(0, 75), write_location=(0.75, 0.6)):
# Get the number of pages read
pages_loaded = visualize_args["pages_loaded"]
np_arr = np.array(pages_loaded)
page_mean, page_std = round(np.mean(np_arr), 2), round(np.std(np_arr), 2)

# Create the histogram
plt.figure()
plt.ecdf(pages_loaded, label="CDF")
plt.hist(pages_loaded, bins=num_bins, histtype="step", density=True, cumulative=True, label="Cumulative histogram")
plt.xlabel("Number of pages loaded for node inference")
plt.ylabel("Percentage of nodes")
plt.title(graph_title)
plt.xlim(0, 50)
plt.title(visualize_args["graph_title"] + " for dataset " + visualize_args["dataset_name"])
plt.xlim(x_range)
plt.legend()

# Write some resulting text
text_to_write = "Mean Pages Loaded: " + str(page_mean) + "\n"
text_to_write += "Std Dev of Pages Loaded: " + str(page_std) + "\n"
text_to_write += "Feature File Size: " + visualize_args["total_space"]

# Get the current axis limits
xlim = plt.xlim()
ylim = plt.ylim()
actual_x = write_location[0] * (xlim[1] - xlim[0]) + xlim[0]
actual_y = write_location[1] * (ylim[1] - ylim[0]) + ylim[0]
plt.text(
actual_x,
actual_y,
text_to_write,
fontsize=10,
horizontalalignment="center",
verticalalignment="center",
bbox=dict(facecolor="red", alpha=0.5),
)

# Save the result
print("Saving the result to", save_path)
plt.savefig(save_path)
plt.tight_layout()
plt.savefig(visualize_args["save_path"])

0 comments on commit b3b5019

Please sign in to comment.