Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Auto clang format #322

Merged
merged 4 commits into from
Jul 14, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion .github/workflows/short-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,33 @@ on:

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
format-code:
runs-on: ubuntu-latest

permissions:
# Give the default GITHUB_TOKEN write permission to commit and push the
# added or changed files to the repository.
contents: write

steps:
- uses: actions/checkout@v4
# Other steps that change files in the repository go here
# …
- name: Run clang-format
run: |
sudo apt-get install -y clang-format
./gpu-simulator/format-code.sh
./util/tracer_nvbit/tracer_tool/format-code.sh

- uses: stefanzweifel/git-auto-commit-action@v5
with:
# Optional. Commit message for the created commit.
# Defaults to "Apply automatic changes"
commit_message: Automated clang-format
# Optional. Option used by `git-status` to determine if the repository is
# dirty. See https://git-scm.com/docs/git-status#_options
status_options: '--untracked-files=no'

SASS-Simulation:
runs-on: ubuntu-latest
container:
Expand Down Expand Up @@ -46,4 +73,4 @@ jobs:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v4
- name: Run Simulation
run: echo "skipped Tracer-Simulation. Will perform in merge queue"
run: echo "skipped Tracer-Simulation. Will perform in merge queue"
2 changes: 1 addition & 1 deletion gpu-simulator/ISA_Def/ampere_opcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ static const std::unordered_map<std::string, OpcodeChar> Ampere_OpcodeMap = {
{"CCTLT", OpcodeChar(OP_CCTLT, ALU_OP)},

{"LDGDEPBAR", OpcodeChar(OP_LDGDEPBAR, ALU_OP)},
{"LDGSTS", OpcodeChar(OP_LDGSTS, LOAD_OP)},
{"LDGSTS", OpcodeChar(OP_LDGSTS, LOAD_OP)},

// Uniform Datapath Instruction
// UDP unit
Expand Down
30 changes: 14 additions & 16 deletions gpu-simulator/trace-driven/trace_driven.cc
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,10 @@ bool trace_warp_inst_t::parse_from_trace_struct(
cache_op = CACHE_ALL;
break;
case OP_LDG:
// LDGSTS is loading the values needed directly from the global memory to shared memory.
// Before this feature, the values need to be loaded to registers first, then store to
// the shared memory.
case OP_LDGSTS: // Add for memcpy_async
// LDGSTS is loading the values needed directly from the global memory to
// shared memory. Before this feature, the values need to be loaded to
// registers first, then store to the shared memory.
case OP_LDGSTS: // Add for memcpy_async
case OP_LDL:
assert(data_size > 0);
memory_op = memory_load;
Expand All @@ -272,8 +272,7 @@ bool trace_warp_inst_t::parse_from_trace_struct(
else
space.set_type(global_space);
// Add for LDGSTS instruction
if (m_opcode == OP_LDGSTS)
m_is_ldgsts = true;
if (m_opcode == OP_LDGSTS) m_is_ldgsts = true;
// check the cache scope, if its strong GPU, then bypass L1
if (trace.check_opcode_contain(opcode_tokens, "STRONG") &&
trace.check_opcode_contain(opcode_tokens, "GPU")) {
Expand Down Expand Up @@ -369,19 +368,18 @@ bool trace_warp_inst_t::parse_from_trace_struct(
// barrier_type bar_type;
// reduction_type red_type;
break;
// LDGDEPBAR is to form a group containing the previous LDGSTS instructions that
// have not been grouped yet.
// In the implementation, a group number will be assigned once the instruction is
// met.
// LDGDEPBAR is to form a group containing the previous LDGSTS instructions
// that have not been grouped yet. In the implementation, a group number
// will be assigned once the instruction is met.
case OP_LDGDEPBAR:
m_is_ldgdepbar = true;
break;
// DEPBAR is served as a warp-wise barrier that is only effective for LDGSTS
// instructions. It is associated with a immediate value. The immediate value
// indicates the last N LDGDEPBAR groups to not wait once the instruction is met.
// For example, if the immediate value is 1, then the last group is able to proceed
// even with DEPBAR present; if the immediate value is 0, then all of the groups
// need to finish before proceed.
// DEPBAR is served as a warp-wise barrier that is only effective for LDGSTS
// instructions. It is associated with a immediate value. The immediate
// value indicates the last N LDGDEPBAR groups to not wait once the
// instruction is met. For example, if the immediate value is 1, then the
// last group is able to proceed even with DEPBAR present; if the immediate
// value is 0, then all of the groups need to finish before proceed.
case OP_DEPBAR:
m_is_depbar = true;
m_depbar_group_no = trace.imm;
Expand Down
41 changes: 22 additions & 19 deletions gpu-simulator/trace-parser/trace_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
#include <vector>

#include <errno.h>
#include <unistd.h>
#include <signal.h>
#include <unistd.h>

#include "trace_parser.h"

Expand All @@ -32,7 +32,10 @@ void split(const std::string &str, std::vector<std::string> &cont,
}
}

inst_trace_t::inst_trace_t() { memadd_info = NULL; imm = 0;}
inst_trace_t::inst_trace_t() {
memadd_info = NULL;
imm = 0;
}

inst_trace_t::~inst_trace_t() {
if (memadd_info != NULL) delete memadd_info;
Expand Down Expand Up @@ -169,7 +172,7 @@ bool inst_trace_t::parse_from_string(std::string trace, unsigned trace_version,
ss >> temp;
sscanf(temp.c_str(), "R%d", &reg_src[i]);
}

// parse mem info
unsigned address_mode = 0;
unsigned mem_width = 0;
Expand Down Expand Up @@ -287,60 +290,60 @@ kernel_trace_t *trace_parser::parse_kernel_info(

std::string read_trace_cmd;
int _l = kerneltraces_filepath.length();
if(_l > 3 && kerneltraces_filepath.substr(_l-3, 3) == ".xz"){
if (_l > 3 && kerneltraces_filepath.substr(_l - 3, 3) == ".xz") {
// this is xz-compressed trace
read_trace_cmd = "xz -dc " + kerneltraces_filepath;
} else if(_l > 7 && kerneltraces_filepath.substr(_l-7, 7) == ".traceg"){
} else if (_l > 7 && kerneltraces_filepath.substr(_l - 7, 7) == ".traceg") {
// this is plain text trace
read_trace_cmd ="cat " + kerneltraces_filepath;
read_trace_cmd = "cat " + kerneltraces_filepath;
} else {
std::cerr << "Can't read trace. Only .xz and plain text are supported: "
<< kerneltraces_filepath <<"\n";
std::cerr << "Can't read trace. Only .xz and plain text are supported: "
<< kerneltraces_filepath << "\n";
exit(1);
}

// Create an interprocess channel, and fork out a data source process. The
// data source process reads trace from disk, write to the channel, and the
// simulator process read from the channel.
// simulator process read from the channel.
int *pipefd = kernel_info->pipefd;
if(pipe(pipefd) != 0){
if (pipe(pipefd) != 0) {
std::cerr << "Failed to create interprocess channel\n";
perror("pipe");
exit(1);
}

pid_t pid = fork();
if(pid == 0){
if (pid == 0) {
// The child process is the data source. Redirect its
// stdout to the write end of the pipe.
close(pipefd[0]);
dup2(pipefd[1], STDOUT_FILENO);

// When using GDB, sending Ctrl+C to the simulator will send a SIGINT signal
// to the child process as well, subsequently causing it to terminate. To
// avoid this, we let the child process ignore (SIG_IGN) the SIGINT signal.
// avoid this, we let the child process ignore (SIG_IGN) the SIGINT signal.
// Reference:
// https://stackoverflow.com/questions/38404925/gdb-interrupt-running-process-without-killing-child-processes
// https://stackoverflow.com/questions/38404925/gdb-interrupt-running-process-without-killing-child-processes
signal(SIGINT, SIG_IGN);

execle("/bin/sh", "sh", "-c", read_trace_cmd.c_str(), NULL, environ);
perror("execle"); // the child process shouldn't reach here if all is well.
perror("execle"); // the child process shouldn't reach here if all is well.
exit(1);
} else {
// parent (simulator)
close(pipefd[1]);
dup2(pipefd[0], STDIN_FILENO);
}
// Parent continues from here.
kernel_info->ifs = &std::cin;

// Parent continues from here.
kernel_info->ifs = &std::cin;
std::istream *ifs = kernel_info->ifs;

std::cout << "Processing kernel " << kerneltraces_filepath << std::endl;

std::string line;

// Important to clear the istream. Otherwise, the eofbit from the last
// Important to clear the istream. Otherwise, the eofbit from the last
// kernel may be carried over to this kernel
ifs->clear();
clearerr(stdin);
Expand Down Expand Up @@ -417,7 +420,7 @@ void trace_parser::kernel_finalizer(kernel_trace_t *trace_info) {
// The pipe read/write end file descriptors held by the child process would
// have been automatically closed when it terminated. But the parent
// process may read an arbitrary amount of trace files, so it has to close
// all file descriptors.
// all file descriptors.
close(trace_info->pipefd[0]);
close(trace_info->pipefd[1]);
delete trace_info;
Expand Down
4 changes: 2 additions & 2 deletions gpu-simulator/trace-parser/trace_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,12 @@ struct kernel_trace_t {
std::istream *ifs;
// Anonymous pipe through which the trace is transmitted from a trace reader
// process to the simulator process
int pipefd[2]={};
int pipefd[2] = {};
};

class trace_parser {
public:
trace_parser(){}
trace_parser() {}
trace_parser(const char *kernellist_filepath);

std::vector<trace_command> parse_commandlist_file();
Expand Down
15 changes: 8 additions & 7 deletions util/tracer_nvbit/tracer_tool/inject_funcs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
* extern "C" __device__ __noinline__
* To prevent "dead"-code elimination by the compiler.
*/
extern "C" __device__ __noinline__ void instrument_inst(
int pred, int opcode_id, int32_t vpc, bool is_mem, uint64_t addr,
int32_t width, int32_t desReg, int32_t srcReg1, int32_t srcReg2,
int32_t srcReg3, int32_t srcReg4, int32_t srcReg5, int32_t srcNum,
uint64_t immediate,
uint64_t pchannel_dev, uint64_t ptotal_dynamic_instr_counter,
uint64_t preported_dynamic_instr_counter, uint64_t pstop_report, uint32_t line_num) {
extern "C" __device__ __noinline__ void
instrument_inst(int pred, int opcode_id, int32_t vpc, bool is_mem,
uint64_t addr, int32_t width, int32_t desReg, int32_t srcReg1,
int32_t srcReg2, int32_t srcReg3, int32_t srcReg4,
int32_t srcReg5, int32_t srcNum, uint64_t immediate,
uint64_t pchannel_dev, uint64_t ptotal_dynamic_instr_counter,
uint64_t preported_dynamic_instr_counter, uint64_t pstop_report,
uint32_t line_num) {

const int active_mask = __ballot_sync(__activemask(), 1);
const int predicate_mask = __ballot_sync(__activemask(), pred);
Expand Down
Loading