Upload CoGNN details.

guessmewho233 · Apr 10, 2022 · 2962d06 · 2962d06
commit 2962d06
Show file tree

Hide file tree

Showing 55 changed files with 5,963 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,61 @@
+# These are some examples of commonly ignored file patterns.
+# You should customize this list as applicable to your project.
+# Learn more about .gitignore:
+#     https://www.atlassian.com/git/tutorials/saving-changes/gitignore
+
+# Node artifact files
+node_modules/
+dist/
+
+# Compiled Java class files
+*.class
+
+# Compiled Python bytecode
+*.py[cod]
+
+# Log files
+*.log
+
+# Package files
+*.jar
+
+# Maven
+target/
+dist/
+
+# JetBrains IDE
+.idea/
+
+# Unit test reports
+TEST*.xml
+
+# Generated by MacOS
+.DS_Store
+
+# Generated by Windows
+Thumbs.db
+
+# Applications
+*.app
+*.exe
+*.war
+
+# Large media files
+*.mp4
+*.tiff
+*.avi
+*.flv
+*.mov
+*.wmv
+
+*.npz
+*.txt
+*.csv
+*.config
+*.graph
+*.zip
+*.qdrep
+*.sqlite
+*.json
+*.graph
+*.log
diff --git a/README.md b/README.md
@@ -0,0 +1,21 @@
+# CoGNN
+
+## Experiment data
+
+Directory: data
+
+## Environment
+
+Directory: environment
+
+Read environment/README.md for more information.
+
+## Code
+
+Directory: software
+
+Read software/README.md for more information.
+
+## DOI by Zenodo
+
+TODO
diff --git a/data/Figure_10_relative_error.xlsx b/data/Figure_10_relative_error.xlsx
diff --git a/data/Figure_11_beyond_pairwise.xlsx b/data/Figure_11_beyond_pairwise.xlsx
diff --git a/data/Figure_12_overhead.xlsx b/data/Figure_12_overhead.xlsx
diff --git a/data/Figure_2_motivation_PMC.xlsx b/data/Figure_2_motivation_PMC.xlsx
diff --git a/data/Figure_3_motivation_execution_time.xlsx b/data/Figure_3_motivation_execution_time.xlsx
diff --git a/data/Figure_4_motivation_occupancy.xlsx b/data/Figure_4_motivation_occupancy.xlsx
diff --git a/data/Figure_5_MPS_performance.xlsx b/data/Figure_5_MPS_performance.xlsx
diff --git a/data/Figure_9_JCT_CDF.xlsx b/data/Figure_9_JCT_CDF.xlsx
diff --git a/environment/README.md b/environment/README.md
@@ -0,0 +1,27 @@
+# Environment
+
+This folder contains the execution environment and softwares to be installed from source.
+
+## Files
+
+- execution-environment.txt: Hardware and software specifications.
+
+- pytorch: 
+    - [Github](https://github.com/pytorch/pytorch/tree/v1.8.1)
+    - [Install](https://github.com/pytorch/pytorch/tree/v1.8.1#from-source)
+
+- dgl: 
+    - [Github](https://github.com/dmlc/dgl/tree/v0.7.0)
+    - [DGL Install](https://docs.dgl.ai/en/0.7.x/install/)
+
+- pytorch\_sparse:
+    - [Github](https://github.com/rusty1s/pytorch_sparse/tree/0.6.10)
+    - Install: python setup.py install
+
+- pytorch\_scatter:
+    - [Github](https://github.com/rusty1s/pytorch_scatter/tree/2.0.7)
+    - Install: python setup.py install
+
+- pytorch\_geometric:
+    - [Github](https://github.com/pyg-team/pytorch_geometric/tree/1.7.0)
+    - Install: python setup.py install
diff --git a/software/README.md b/software/README.md
@@ -0,0 +1,17 @@
+# CoGNN
+
+This folder contains the system prototype of `CoGNN` and other comparison methods. We run each method `10` times and present the average results to isolate the effects of randomness. 
+
+## Environment
+
+Add the path to this folder to `PYTHONPATH`.
+
+## Usage
+
+- Compile PyTorch for `CoGNN`, `PipeSwitch` or `MPS`. `CoGNN` and `PipeSwitch` could use the same modified PyTorch.
+
+- For `CoGNN` and `PipeSwitch`, start the server first. After a few seconds, start the client to send requests.
+
+- For `MPS`, enable the MPS server and run the execution script.
+
+More details are included in README under folders for each system.
diff --git a/software/client/README.md b/software/client/README.md
@@ -0,0 +1,15 @@
+# Client
+
+This folder contains client code. The client submits training tasks to `CoGNN` or `PipeSwitch` server.
+
+## Files
+
+- client.py: send training request and wait for the reply.
+
+- run\_client.sh: specify model list file and execute client code.
+
+## Usage
+
+```
+./run_client.sh
+```
diff --git a/software/client/client.py b/software/client/client.py
@@ -0,0 +1,91 @@
+import sys
+import time
+import struct
+import statistics
+import argparse
+
+from util.util import TcpClient, timestamp
+
+def send_request(client, task_name, data, layers):
+    timestamp('client', 'before_request_%s' % task_name)
+
+    # Serialize task name
+    task_name_b = task_name.encode()
+    task_name_length = len(task_name_b)
+    task_name_length_b = struct.pack('I', task_name_length)
+
+    # Serialize data
+    data_b = data.encode()
+    data_length = len(data_b)
+    data_length_b = struct.pack('I', data_length)
+
+    # Serialize number of layers
+    layers_b = layers.encode()
+    layers_length = len(layers_b)
+    layers_length_b = struct.pack('I', layers_length)
+
+    timestamp('client', 'after_serialization')
+
+    # Send task name / data
+    client.send(task_name_length_b)
+    client.send(task_name_b)
+    client.send(data_length_b)
+    client.send(data_b)
+    client.send(layers_length_b)
+    client.send(layers_b)
+
+    timestamp('client', 'after_request_%s' % task_name)
+
+
+def recv_response(client):
+    reply_b = client.recv(4)
+    reply = reply_b.decode()
+    timestamp('client', 'after_reply')
+
+
+def close_connection(client):
+    model_name_length = 0
+    model_name_length_b = struct.pack('I', model_name_length)
+    client.send(model_name_length_b)
+    timestamp('client', 'close_connection')
+
+
+def main():
+    # Load model list (task & data)
+    model_list_file_name = sys.argv[1]
+    model_list = []
+    with open(model_list_file_name) as f:
+        for line in f.readlines():
+            if len(line.split()) != 3:
+                continue
+            model_list.append([line.split()[0], line.split()[1], line.split()[2]])
+    print(model_list)
+
+    # Send training request
+    client_train = []
+    for i in range(len(model_list)):
+        client_train.append(TcpClient('localhost', 12345))
+        send_request(client_train[i], model_list[i][0], model_list[i][1], model_list[i][2])
+
+#    time.sleep(4)
+
+    timestamp('client', 'after_connect')
+    time_1 = time.time()
+
+    # Recv training reply
+    for i in range(len(model_list)):
+        recv_response(client_train[i])
+
+    time_2 = time.time()
+    duration = (time_2 - time_1) * 1000
+
+    # Close connection
+    for i in range(len(model_list)):
+        close_connection(client_train[i])
+
+    time.sleep(1)
+    timestamp('**********', '**********')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/software/client/run_client.sh b/software/client/run_client.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+CUDA_VISIBLE_DEVICES=0 python client.py ../data/model/mix_model.txt
diff --git a/software/cognn/README.md b/software/cognn/README.md
@@ -0,0 +1,27 @@
+# MPS
+
+This folder contains `CoGNN` code.
+
+## Files
+
+- main.py: accept connections from the client, create worker and scheduler processes.
+
+- frontend\_tcp.py: accept client requests and send replies.
+
+- frontend\_schedule.py: manage and schedule tasks according to the specified policy.
+
+- worker.py: dispatch the tasks to worker threads and recycle results.
+
+- worker\_common.py: add hooks for parameter transfering, attach the model to CUDA stream and execute it.
+
+- policy.py: functions for scheduling policies.
+
+- run\_server.sh: specify the model list to load and enable `CoGNN` server.
+
+## Usage
+
+```
+./run_server.sh
+```
+
+After enabling the server, wait seconds to load models before sending the training request.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		#!/bin/bash
		CUDA_VISIBLE_DEVICES=0 python client.py ../data/model/mix_model.txt